mirror of
https://expo.survex.com/repositories/troggle/.git
synced 2024-12-18 14:32:19 +00:00
1007 lines
45 KiB
Python
1007 lines
45 KiB
Python
import os
|
|
import re
|
|
import string
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
from django.conf import settings
|
|
from django.db import transaction
|
|
|
|
from troggle.core.models.caves import Cave, CaveAndEntrance, Entrance, GetCaveLookup
|
|
from troggle.core.models.survex import SurvexStation
|
|
from troggle.core.models.troggle import DataIssue
|
|
from troggle.settings import CAVEDESCRIPTIONS, ENTRANCEDESCRIPTIONS, EXPOWEB, SURVEX_DATA
|
|
|
|
"""Reads all the cave description data and entrance description data
|
|
by parsing the xml files stored as e.g.
|
|
:EXPOWEB:/cave_data/1623-161.html
|
|
or
|
|
:EXPOWEB:/entrance_data/1623-161g.html
|
|
|
|
and creating the various Cave, Entrance and necessary Area objects.
|
|
|
|
This is the first import that happens after the database is reinitialised.
|
|
So is the first thing that creates tables.
|
|
|
|
"""
|
|
|
|
todo = """
|
|
- When reading cave data, to start off wit we do not know the cave id (slug) so we can't give a useful url in
|
|
the error message, but we do have the filename. Systematize this, and the same thing with reading entrance files.
|
|
|
|
- Cannot use Edit This Page for pendingcaves.txt_edit as Edit This Page is expecting an html file.
|
|
So we will need a separate file-editing capability just for this configuration file ?!
|
|
|
|
- we want to overwrite a PENDING cave if we are now importing the 1623-xxx.html file for it
|
|
|
|
- rewrite archaic regex
|
|
re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S)
|
|
in modern form and pre-compile it.
|
|
|
|
- crashes on MariaDB in databasereset.py on server when deleting Caves and complains Area needs a
|
|
non null parent, But this is not true. The only solution we have found is to let it crash, then
|
|
stop and restart MariaDB (requires a logon able to sudo) and then restart the databasereset.py
|
|
again. (status as of July 2022). May not happen now that class Area is removed (Sept.2023).
|
|
"""
|
|
AREACODES = {"1623", "1624", "1626", "1627"} # NB set not dict
|
|
ARGEAREAS = {"1626", "1627", "1624"} # NB set not dict
|
|
|
|
entrances_xslug = {}
|
|
caves_xslug = {}
|
|
areas_xslug = {}
|
|
|
|
LETTERS = list(string.ascii_lowercase)
|
|
|
|
|
|
def dummy_entrance(k, slug, msg="DUMMY"):
|
|
"""Returns an empty entrance object for either a PENDING cave or a DUMMY entrance if
|
|
user forgot to provide one when creating the cave
|
|
"""
|
|
ent = Entrance.objects.create( # creates object and saves into db
|
|
name=k, slug=k, filename = k + ".html",
|
|
entrance_description="Dummy entrance: auto-created when registering a new cave "
|
|
+ "and you forgot to create an entrance for it. Click on 'Edit' to enter the correct data, then 'Submit'.",
|
|
marking="?",
|
|
)
|
|
if ent:
|
|
return ent
|
|
else:
|
|
message = f" ! {k:11s} {msg}-{slug} {k} entrance create failure"
|
|
DataIssue.objects.create(parser="entrances", message=message, url=f"{slug}")
|
|
print(message)
|
|
raise # caught and handled by calling routine.
|
|
|
|
|
|
|
|
def set_dummy_entrance(id, slug, cave, msg="DUMMY"):
|
|
"""Called only when reading the cave and entrance html files
|
|
|
|
Called when the Entrance field in a cave_data file is either missing or
|
|
holds a null string instead of a filename.
|
|
|
|
Previously, the lack of an entrance where an entrance was expected, caused troggle to crash in several places.
|
|
But it is more robust now, so this is not necessary... we hope.
|
|
|
|
Also, Cave and Entrance editing now expects there to be a real file (since April 2023), so creating this
|
|
dummy is actually harmful. So this is commented out, pending removal after further experience.
|
|
|
|
global variable entrances_xslug is simply a cache of references to Entrance objects
|
|
to speed things up when parsing a lot of caves and entrances. All DB actions are time-consuming
|
|
so
|
|
"""
|
|
global entrances_xslug
|
|
message = f" - Note: Missing Entrance for entrance '{id}' on cave '{cave}' - Is this a problem?"
|
|
DataIssue.objects.create(parser="entrances", message=message, url=f"{cave.url}")
|
|
|
|
def add_cave_to_pending_list(id, wallet, message):
|
|
"""(id, f"Wallet {wallet} - Could not find id <{id}>")
|
|
"""
|
|
wurl = f"/walletedit/{wallet.walletname}".replace('#', ':')
|
|
DataIssue.objects.update_or_create(parser="wallets", message=message, url=wurl)
|
|
|
|
def create_new_cave(svxpath, svxid=None, msg=None):
|
|
"""This is called only when a new survex file is edited online which has a path on the
|
|
:loser: repo which is not recognised as a known cave.
|
|
ALSO called by survex parser when it finds a cave it doesn't recognise
|
|
"""
|
|
# e.g. svxpath = "caves-1623/666/beast" .svx - from the *inlcude tree
|
|
# e.g. svxid = "caves-1623/666/beast"
|
|
print(f"Create new cave at {svxpath} - {msg}")
|
|
#
|
|
survex_file = ""
|
|
if svxid:
|
|
sv = Path(settings.SURVEX_DATA, svxid + ".svx")
|
|
if sv.is_file:
|
|
survex_file = svxid + ".svx"
|
|
else:
|
|
sv = Path(settings.SURVEX_DATA, svxpath + ".svx")
|
|
if sv.is_file:
|
|
survex_file = svxpath + ".svx"
|
|
|
|
if survex_file:
|
|
# message = f"Found a survex file {survex_file=} {svxpath=} {svxid=} "
|
|
# DataIssue.objects.create(parser="caves", message=message)
|
|
# print(message, file=sys.stderr)
|
|
# print(message)
|
|
pass
|
|
else:
|
|
message = f"NOT found a survex file {svxpath=} {svxid=}"
|
|
DataIssue.objects.create(parser="caves", message=message)
|
|
print(message, file=sys.stderr)
|
|
print(message)
|
|
|
|
parts = svxpath.split("/")
|
|
a = parts[0][-4:]
|
|
caveid = parts[1]
|
|
print(f"parts {parts}, {a}, {caveid}")
|
|
# double check
|
|
if a[0:3] == "162":
|
|
areacode = a[0:4]
|
|
url = f"{areacode}/{caveid}/{caveid}.html" # Note we are appending the .html as we are believe in backwards compatability. This is to fix Martin's new 2023 app.
|
|
else:
|
|
print(f"WARNING: parsers/caves/create_new_cave called with svxpath '{svxpath}'. Surely it should start 'caves-162*'? {msg}")
|
|
areacode = "1623"
|
|
url = f"1623/{caveid}/{caveid}.html"
|
|
|
|
k = f"{areacode}-{caveid}"
|
|
|
|
caves = Cave.objects.filter(unofficial_number=caveid, areacode =areacode)
|
|
if caves:
|
|
message = f" ! Already exists, caveid:{k} in areacode {areacode} {caves} - {msg}"
|
|
DataIssue.objects.create(parser="caves", message=message)
|
|
print(message)
|
|
return caves[0]
|
|
|
|
urltest = Cave.objects.filter(url=url)
|
|
if urltest:
|
|
message = f" ! Cave {urltest[0]} already exists with this url {url}. Can't create new cave {slug} from {svxpath} "
|
|
DataIssue.objects.create(parser="caves", message=message, url=url)
|
|
print(message)
|
|
return urltest[0]
|
|
try:
|
|
cave = do_pending_cave(k, caveid, url, areacode, msg)
|
|
except:
|
|
message = f" ! Error. Cannot create pending cave and entrance, pending-id:{k} in area {areacode} - {msg}"
|
|
DataIssue.objects.create(parser="caves", message=message)
|
|
print(message)
|
|
raise
|
|
|
|
# But this sets the survex file on the Cave from the first one we find, not necessarily the best survex file for this cave
|
|
cave.survex_file=survex_file # primary survex file for Cave
|
|
cave.areacode=areacode
|
|
cave.save()
|
|
return cave
|
|
|
|
def do_ARGE_cave(slug, caveid, areacode, svxid):
|
|
"""Only called by survex parser, NOT the cave parser.
|
|
Creates a new Cave object, but with abbreviated data as the survex file (from ARGE) is all we have.
|
|
We already know the survex file.
|
|
We already know that the cave doesn't exist... though there are bugs..
|
|
|
|
Assumes anything in the ARGE list of cave areas is Arge, which is not true for 1626...
|
|
|
|
caveid may be kataster number or it may be e.g. LA34
|
|
"""
|
|
|
|
default_note = "This is (probably) an ARGE cave where we only have the survex file and no other information"
|
|
url = f"{areacode}/{caveid}/{caveid}.html"
|
|
|
|
urltest = Cave.objects.filter(url=url)
|
|
if urltest:
|
|
message = f" ! Cave {urltest[0]} already exists with this url {url}. Can't create new ARGE cave {slug} from {svxid}"
|
|
DataIssue.objects.create(parser="caves", message=message, url=url)
|
|
print(message)
|
|
return urltest[0]
|
|
|
|
numtest = Cave.objects.filter(unofficial_number=caveid.upper(), areacode=areacode)
|
|
if numtest:
|
|
message = f" ! Cave {numtest[0]} already exists with this areacode {areacode} and unofficial_number {caveid.upper()}. Can't create new ARGE cave {slug}"
|
|
DataIssue.objects.create(parser="caves", message=message, url=url)
|
|
print(message)
|
|
return numtest[0]
|
|
|
|
sv = Path(settings.SURVEX_DATA, svxid + ".svx")
|
|
if sv.is_file:
|
|
with open(sv, "r") as s:
|
|
line1 = s.readline()
|
|
line2 = s.readline()
|
|
line3 = s.readline()
|
|
rest = s.readlines()
|
|
else:
|
|
print(f"not correct svxid {svxid} {sv}", file=sys.stderr)
|
|
|
|
print(f"{caveid} {rest}")
|
|
passages = "\n"
|
|
for line in rest:
|
|
if line.strip().startswith("*begin"):
|
|
passages = f"{passages}{line}"
|
|
cave = Cave(
|
|
underground_description="ARGE cave.<br>3 lines of the survexfile, then all the *begin lines:<br><pre>" + line1 +line2 +line3 +passages +"</pre>",
|
|
unofficial_number="ARGE",
|
|
survex_file= f"{svxid}.svx",
|
|
url=url,
|
|
notes=default_note,
|
|
areacode=areacode,
|
|
)
|
|
if cave:
|
|
try:
|
|
kn = int(caveid)
|
|
cave.kataster_number=kn # should only set this if all digit
|
|
except:
|
|
# must be unofficial 'number' or name
|
|
cave.unofficial_number=caveid
|
|
|
|
cave.save()
|
|
|
|
else:
|
|
message = f" ! {slug:11s} ARGE cave create failure {caveid=} {url=} {areacode=} {svxid=}"
|
|
DataIssue.objects.create(parser="caves", message=message)
|
|
print(message)
|
|
return None
|
|
|
|
return cave
|
|
|
|
def do_pending_cave(slug, caveid, url, areacode, msg=None):
|
|
"""
|
|
default for a PENDING cave, should be overwritten in the db later if a real cave of the same name exists
|
|
in expoweb/cave_data/1623-"k".html
|
|
|
|
Note that at this point (parsing caves) in importing the data we have not yet seen the survex files, so we can't
|
|
look inside the relevant survex file to find the year and so we con't provide helpful links.
|
|
This also gets called when parsing survex files, when we do have this info.
|
|
"""
|
|
|
|
def get_survex_file(k):
|
|
"""Guesses at and finds a survex file for this pending cave.
|
|
Convoluted. Needs rewriting.
|
|
Pointless if this cave is being created because we found a survex file...
|
|
|
|
One problem is that the Cave name may have different capitalisation from the survex filename,
|
|
e.g. 2018-NTU-02 has a survex file 2018-ntu-02.svx
|
|
"""
|
|
if k[0:3] == "162":
|
|
id = Path(k[5:])
|
|
else:
|
|
id = Path(k)
|
|
|
|
survex_file = f"caves-{areacode}/{id}/{id}.svx"
|
|
if Path(settings.SURVEX_DATA, survex_file).is_file():
|
|
return survex_file
|
|
else:
|
|
survex_file = f"caves-{areacode}/{id}.svx"
|
|
if Path(settings.SURVEX_DATA, survex_file).is_file():
|
|
return survex_file
|
|
|
|
# This should find the file even if the capitalisation is different, or if the directory name is totally different
|
|
survex_file = ""
|
|
d = Path(settings.SURVEX_DATA, f"caves-{areacode}/{id}")
|
|
if d.is_dir():
|
|
prime_suspect = ""
|
|
dir = d.iterdir()
|
|
for f in dir:
|
|
if f.suffix == ".svx":
|
|
survex_file = f.relative_to(settings.SURVEX_DATA)
|
|
chk = min(5, len(f.name) - 1)
|
|
if str(f.name)[:chk].lower() == str(id.name)[:chk].lower(): # bodge which mostly works
|
|
prime_suspect = survex_file
|
|
if prime_suspect:
|
|
survex_file = prime_suspect
|
|
# message = f" ! {k:14} Found a survex file which might be the right one: {survex_file} - {msg}"
|
|
# DataIssue.objects.create(parser='caves', message=message, url=url)
|
|
# print(message)
|
|
if Path(settings.SURVEX_DATA, survex_file).is_file():
|
|
return survex_file
|
|
return ""
|
|
|
|
g = GetCaveLookup()
|
|
with transaction.atomic():
|
|
if slug in g:
|
|
message = f" ! {slug:18} cave listed in pendingcaves.txt already exists. - {msg}"
|
|
DataIssue.objects.create(parser="caves", message=message, url=url)
|
|
print(message)
|
|
return
|
|
|
|
default_note = "A reference has been found to this cave id in a survex file in the loser repo, or in a wallet metadata"
|
|
default_note += " in a JSON file in the drawings repo, but no Cave Description exists in expoweb (in /cave_data/)<br><br><br>\n"
|
|
default_note += "INSTRUCTIONS: FIRST read the notes in <a href='/cave_data/pendingcaves.txt'>pendingcaves.txt</a><br />"
|
|
default_note += "Next open 'This survex file' (link above the CaveView panel) to find the date and info. Then "
|
|
default_note += '<br><br>\n\n - (0) look in the <a href="/noinfo/cave-number-index">cave number index</a> for notes on this cave, '
|
|
default_note += "<br><br>\n\n - (1) search in the survex file for the *ref to find a "
|
|
default_note += "relevant wallet, e.g.<a href='/survey_scans/2009%252311/'>2009#11</a> and read the notes image files <br>\n - "
|
|
default_note += (
|
|
"<br><br>\n\n - (2) search in the Expo for that year e.g. <a href='/expedition/2009'>2009</a> to find a "
|
|
)
|
|
default_note += "relevant logbook entry, remember that the date may have been recorded incorrectly, "
|
|
default_note += (
|
|
"so check for trips i.e. logbook entries involving the same people as were listed in the survex file, "
|
|
)
|
|
default_note += (
|
|
"and you should also check the scanned copy of the logbook (linked from each logbook entry page) "
|
|
)
|
|
default_note += "just in case a vital trip was not transcribed, then <br>\n - "
|
|
default_note += (
|
|
"click on 'Edit this cave' and copy the information you find in the survex file and the logbook"
|
|
)
|
|
default_note += "and delete all the text in the 'Notes' section - which is the text you are reading now."
|
|
default_note += "<br><br>\n\n - Only two fields on this form are essential. "
|
|
default_note += "Documentation of all the fields on 'Edit this cave' form is in <a href='/handbook/survey/caveentryfields.html'>handbook/survey/caveentryfields</a>"
|
|
default_note += "<br><br>\n\n - "
|
|
default_note += "You will also need to create a new entrance from the 'Edit this cave' page. Ignore the existing dummy one, it will evaporate on the next full import."
|
|
default_note += "<br><br>\n\n - "
|
|
default_note += "When you Submit it will create a new file in expoweb/cave_data/ "
|
|
default_note += (
|
|
"<br><br>\n\n - Now you can edit the entrance info: click on Edit below for the dummy entrance. "
|
|
)
|
|
default_note += "and then Submit to save it (if you forget to do this, a dummy entrance will be created for your new cave description)."
|
|
default_note += "<br><br>\n\n - Finally, you need to find a nerd to edit the file '<var>expoweb/cave_data/pending.txt</var>' "
|
|
default_note += (
|
|
f"to remove the line <br><var>{slug}</var><br> as it is no longer 'pending' but 'done. Well Done."
|
|
)
|
|
urltest = Cave.objects.filter(url=url)
|
|
if urltest:
|
|
message = f" ! Cave {urltest[0]} already exists with this url {url}. Can't create new cave {slug}"
|
|
DataIssue.objects.create(parser="caves", message=message, url=url)
|
|
print(message)
|
|
return urltest[0]
|
|
|
|
survex_file = get_survex_file(slug)
|
|
|
|
cave = Cave(
|
|
unofficial_number=caveid,
|
|
underground_description="Pending cave write-up - No cave description created yet.",
|
|
survex_file=survex_file,
|
|
url=url,
|
|
notes=default_note,
|
|
areacode=areacode,
|
|
)
|
|
if cave:
|
|
cave.save() # must save to have id before foreign keys work. This is also a ManyToMany key.
|
|
message = f" ! {slug:18} Pending cave write-up url: {url} - {survex_file=} - {msg}"
|
|
DataIssue.objects.create(parser="caves", message=message, url=url)
|
|
print(message)
|
|
|
|
else:
|
|
message = f" ! {slug:11s} PENDING cave create failure - {msg}"
|
|
DataIssue.objects.create(parser="caves", message=message)
|
|
print(message)
|
|
|
|
return cave
|
|
|
|
def getXML(text, itemname, minItems=1, maxItems=None, context=""):
|
|
"""Reads a single XML tag
|
|
Should throw exception rather than producing error message here,
|
|
then handle exception in calling routine where it has the context.
|
|
|
|
This always succeeds, but it produces error message on the terminal and in the
|
|
DataIssues log.
|
|
"""
|
|
items = re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S)
|
|
if len(items) < minItems:
|
|
message = (
|
|
" ! %(count)i x %(itemname)s found, at least %(min)i expected. Load may ABORT. "
|
|
% {"count": len(items), "itemname": itemname, "min": minItems}
|
|
+ " in file "
|
|
+ context
|
|
)
|
|
DataIssue.objects.create(parser="caves", message=message, url="" + context)
|
|
print(message)
|
|
|
|
if maxItems is not None and len(items) > maxItems:
|
|
message = (
|
|
" ! %(count)i x %(itemname)s found, no more than %(max)i expected in this XML unit. Load may ABORT. "
|
|
% {"count": len(items), "itemname": itemname, "max": maxItems}
|
|
+ " in file "
|
|
+ context
|
|
)
|
|
DataIssue.objects.create(parser="caves", message=message, url="" + context)
|
|
print(message)
|
|
if minItems == 0:
|
|
if not items:
|
|
items = [""]
|
|
return items
|
|
|
|
|
|
def boolify(boolstrs):
|
|
return {
|
|
"True": True,
|
|
"False": False,
|
|
"true": True,
|
|
"false": False}[boolstrs[0]]
|
|
|
|
def validate_station(station):
|
|
"""It is possible to break troggle entirely by getting this wrong.
|
|
These station identifiers are matched against other stations using .endswith()
|
|
in parsers/locations.py
|
|
so a simple number here will match hundreds of SUrvexStation objects
|
|
It should be, e.g. "1623.p240"
|
|
|
|
We will test them against survex stations after we have loaded them.
|
|
"""
|
|
if station == "":
|
|
return True
|
|
|
|
# CANNOT test against locations as we have not read the survex files yet. Hmph.
|
|
|
|
# Must have the right format in its name
|
|
dot = station.find(".")
|
|
if dot == -1:
|
|
print(dot)
|
|
# no full stop found. Bad station identifier.
|
|
# should just skip really, and log an error
|
|
raise
|
|
else:
|
|
return True
|
|
|
|
def read_entrance(filename, ent=None):
|
|
"""Reads an entrance description from the .html file.
|
|
Runs on initial full import, and also whenever an entrance is edited online.
|
|
|
|
If not called as part of initial import, then the global lists will not be correct
|
|
but this is OK, a search will find them in the db.
|
|
|
|
EDIT href examples
|
|
/1623-1:1623-1a_entrance_edit
|
|
/1623/1/1623-1_cave_edit/
|
|
|
|
Args:
|
|
filename: The name of the entrance_data .html file, e.g. 1623-JS-01a.html
|
|
ent: The entrance object, if it already exists.
|
|
|
|
Returns:
|
|
The entrance object, or a new entrance object if `ent` is None.
|
|
"""
|
|
def getXMLmin0(field):
|
|
return getXML(entrancecontents, field, minItems=0, maxItems=1, context=context)
|
|
|
|
def getXMLmax1(field):
|
|
return getXML(entrancecontents, field, maxItems=1, context=context)
|
|
|
|
global entrances_xslug
|
|
global caves_xslug
|
|
global areas_xslug
|
|
|
|
# Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
|
|
with open(os.path.join(ENTRANCEDESCRIPTIONS, filename)) as f:
|
|
contents = f.read()
|
|
context = filename
|
|
|
|
# Derive the letter, entrance slug and cave slug from the filename
|
|
entslug_fn = filename[:-5] # remove .html
|
|
if entslug_fn[-1] in LETTERS:
|
|
caveslug_fn = entslug_fn[:-1]
|
|
letter_fn = entslug_fn[-1]
|
|
else:
|
|
caveslug_fn = entslug_fn
|
|
letter_fn = ""
|
|
|
|
ent_area = filename[:4]
|
|
cave_name = caveslug_fn[5:] # remove initial 1623-
|
|
|
|
ent_edit_url = f"/{caveslug_fn}:{entslug_fn}_entrance_edit"
|
|
cave_edit_url = f"/{ent_area}/{cave_name}/{cave_name}_cave_edit"
|
|
|
|
|
|
# validate filename, check areacode
|
|
if ent_area not in AREACODES:
|
|
message = f'! BAD AREA CODE in "{filename}". Not recognised.'
|
|
DataIssue.objects.create(parser="entrances", message=message, url=ent_edit_url)
|
|
print(message)
|
|
|
|
# New system 2024, create the Cave object when parsing Entrances, not Caves
|
|
cave = make_cave(caveslug_fn)
|
|
|
|
# try:
|
|
# cs = CaveSlug.objects.update_or_create(cave=cave, slug=caveslug_fn, primary=True)
|
|
# except Exception as ex:
|
|
# #raise
|
|
# # This fails to do an update! It just crashes.. to be fixed
|
|
# message = f" ! Entrances: CaveSlug {cave} update/create failure : {caveslug_fn}, skipping cave_data file {filename} with exception\nException: {ex.__class__}"
|
|
# DataIssue.objects.create(parser="caves", message=message, url=context)
|
|
# print(message)
|
|
|
|
entrancecontentslist = getXML(contents, "entrance", maxItems=1, context=context)
|
|
if len(entrancecontentslist) != 1:
|
|
message = f'! BAD ENTRANCE DATA in "{filename}". More than one entrance. Edit file manually, click.'
|
|
DataIssue.objects.create(parser="entrances", message=message, url=ent_edit_url)
|
|
print(message)
|
|
return None
|
|
|
|
entrancecontents = entrancecontentslist[0]
|
|
slugs = getXMLmin0("slug") # not the full slug, just the id: i.e. without the 1623- prefix
|
|
# we ignore all these, because we now just use the filename. But if they are there, we validate them.
|
|
if len(slugs) > 0 :
|
|
slug = slugs[0]
|
|
|
|
if len(slugs) >1:
|
|
# Only ever one of these per entrance in the expo dataset
|
|
message = f" ! - More than one slug for an entrance: {entrance}, slugs: {slugs}. Ignoring all of them."
|
|
DataIssue.objects.create(parser="entrances", message=message, url=cave_edit_url)
|
|
print(message)
|
|
|
|
if slug and slug != entslug_fn:
|
|
message = f" ! - Warning, mismatch between entrance slug (or unofficial name) and filename: {slug=} {filename=}. Ignoring slug field, using filename."
|
|
DataIssue.objects.create(parser="xEntrances", message=message, url=cave_edit_url)
|
|
print(message)
|
|
slug = entslug_fn # force
|
|
|
|
lastvisit = getXML(entrancecontents, "lastvisit", maxItems=1, minItems=0, context=cave_edit_url)
|
|
lat_wgs84 = getXML(entrancecontents, "lat_wgs84", maxItems=1, minItems=0, context=cave_edit_url)
|
|
long_wgs84 = getXML(entrancecontents, "long_wgs84", maxItems=1, minItems=0, context=cave_edit_url)
|
|
|
|
alt = getXMLmax1("alt")
|
|
approach = getXMLmax1("approach")
|
|
bearings = getXMLmax1("bearings")
|
|
entrance_description = getXMLmax1("entrance_description")
|
|
explorers = getXMLmax1("explorers")
|
|
findability = getXMLmax1("findability")
|
|
findability_description = getXMLmax1("findability_description")
|
|
location_description = getXMLmax1("location_description")
|
|
#map_description = getXMLmax1("map_description")
|
|
marking = getXMLmax1("marking")
|
|
marking_comment = getXMLmax1("marking_comment")
|
|
name = getXMLmax1("name")
|
|
non_public = getXMLmax1("non_public")
|
|
other_description = getXMLmax1("other_description")
|
|
other_station = getXMLmax1("other_station")
|
|
photo = getXMLmax1("photo")
|
|
tag_station = getXMLmax1("tag_station")
|
|
underground_description = getXMLmax1("underground_description")
|
|
|
|
if not ent:
|
|
ent, state = Entrance.objects.update_or_create(slug=slug)
|
|
|
|
ent.name=name[0]
|
|
ent.non_public=boolify(non_public)
|
|
ent.alt=alt[0]
|
|
ent.approach=approach[0]
|
|
ent.bearings=bearings[0]
|
|
ent.lat_wgs84=lat_wgs84[0]
|
|
ent.long_wgs84=long_wgs84[0]
|
|
ent.entrance_description=entrance_description[0]
|
|
ent.explorers=explorers[0]
|
|
ent.filename=filename
|
|
ent.findability=findability[0]
|
|
ent.findability_description=findability_description[0]
|
|
ent.lastvisit=lastvisit[0]
|
|
ent.location_description=location_description[0]
|
|
#ent.map_description=map_description[0]
|
|
ent.marking=marking[0]
|
|
ent.marking_comment=marking_comment[0]
|
|
ent.other_description=other_description[0]
|
|
ent.other_station=other_station[0]
|
|
ent.photo=photo[0]
|
|
# ent.slug=slugs[0] # set algorithically
|
|
ent.tag_station=tag_station[0]
|
|
ent.underground_description=underground_description[0]
|
|
|
|
for st in [ent.other_station, ent.tag_station]:
|
|
#validate_station(st)
|
|
try:
|
|
validate_station(st)
|
|
except:
|
|
message = f" ! BAD ENTRANCE TAG '{st}' in '{filename}'. Must format like '1623.p204'. Edit file manually, click."
|
|
#http://localhost:8000/1623/2023-EBH-01/1623-2023-EBH-01:1623-2023-EBH-01_entrance_edit
|
|
DataIssue.objects.create(parser="entrances", message=message, url=ent_edit_url) # url=f"/1623/{slug}/{slug}:{slug}_entrance_edit")
|
|
print(message)
|
|
ent.save()
|
|
return ent
|
|
|
|
def make_cave(slug):
|
|
"""Making a Cave object, but when we have multiple entrances, the Cave object may already exist"""
|
|
filename = f"{slug}.html"
|
|
try:
|
|
cave, state = Cave.objects.update_or_create(filename=filename) # replace with slug when CaveSlug tidied up
|
|
#print(f" - created:{state} cave.id:{cave} with {filename=}")
|
|
|
|
except:
|
|
print(f" ! FAILED to get only one CAVE in db when updating using: {filename} or not found.")
|
|
kaves = Cave.objects.all().filter(filename=filename) # assumes filename unique, eeugh.
|
|
for k in kaves:
|
|
message = " ! - DUPLICATES in db. kataster:" + str(k.kataster_number) + ", slug:" + str(k.slug())
|
|
DataIssue.objects.create(parser="caves", message=message, url=context)
|
|
print(message)
|
|
for k in kaves:
|
|
if k.slug() is not None:
|
|
print(" ! - OVERWRITING this one: slug:" + str(k.slug()))
|
|
k.notes = "DUPLICATE kataster number found on import. Please fix\n" + k.notes
|
|
cave = k
|
|
return cave
|
|
|
|
def read_cave(filename, mvf=None, cave=None):
|
|
"""Reads an entrance description from the .html file
|
|
Convoluted. Sorry. Needs rewriting
|
|
|
|
mvf is a file handle for misnamed files
|
|
|
|
If not called as part of initial import, then the global lists will not be correct
|
|
but this is OK, a search will find them in the db.
|
|
|
|
Attempted to use standard python3.11 xml library but fails on HTML entities (2023-04-23)
|
|
import xml.etree.ElementTree as ET
|
|
tree = ET.parse(fn)
|
|
xml_root = tree.getroot()
|
|
for t in ["html", "head", "body", "cave","non_public", "caveslug", "official_name","entrance"]:
|
|
elements = xml_root.findall(t)
|
|
|
|
"""
|
|
def getXMLmin0(field):
|
|
return getXML(cavecontents, field, minItems=0, maxItems=1, context=context)
|
|
|
|
def getXMLmax1(field):
|
|
return getXML(cavecontents, field, minItems=0, maxItems=1, context=context)
|
|
|
|
def do_entrances():
|
|
"""For both bulk import and individual re-reading of cave_data file,
|
|
fix the entrances
|
|
|
|
What is Class CaveAndEntrance for? It was to allow mandy<=>many relationship between caves and entrances, but now we insist
|
|
only one Cave for any Entrance, so this Class is reduncdant and should be removed..
|
|
"""
|
|
c = cave
|
|
for e in entrances:
|
|
eslugs = getXML(e, "entranceslug", maxItems=1, context=context)
|
|
if len(eslugs) < 1:
|
|
print(f"TYPO IN cave_data file <entrance> tag contents\n <entrance> contents: {e}\n {eslugs=}")
|
|
eslug = eslugs[0]
|
|
# if eslug.endswith(('a','b','c','d','e','f')):
|
|
# print(f"! Entrance {eslug}")
|
|
# if eslug.endswith('a b'):
|
|
# message = f' - Entrance has weird name slug:"{eslug}" cave:"{cave}" caveslug:"{slug}" filename:"cave_data/{filename}"'
|
|
# DataIssue.objects.create(parser="xEntrances", message=message, url=f"{cave.url}_cave_edit/")
|
|
# print(message)
|
|
|
|
letter = getXML(e, "letter", maxItems=1, context=context)[0]
|
|
|
|
if len(entrances) > 1 and letter =="":
|
|
# Usually the second entrance is 'b', but the first is still unlettered. So probably 'a'
|
|
letter = eslug[-1].lower()
|
|
if letter.lower() not in list(string.ascii_lowercase):
|
|
letter = "a"
|
|
message = f"- Warning - Empty 'letter' field for '{eslug}' in multiple-entrance cave '{cave}', setting to {letter}."
|
|
#eurl = f"{cave.url}_cave_edit/"
|
|
eurl = Path(cave.url).parent + f"{cave.slug()}_cave_edit/"
|
|
# edit recognizer: (?P<path>.*)/(?P<slug>[^/]+)_cave_edit/$
|
|
DataIssue.objects.create(parser="entrances", message=message, url=eurl)
|
|
print(message)
|
|
|
|
if len(entrances) == 1 and not eslug: # may be empty: <entranceslug></entranceslug>
|
|
msg="DUMMY: no entrance slug read from file, so assume textually same as cave slug"
|
|
set_dummy_entrance(slug[5:], slug, c, msg=msg)
|
|
print(f"! {msg}\n- {slug} {c}")
|
|
else:
|
|
if eslug in entrances_xslug:
|
|
# print(f"eslug {eslug} found eslug in xslug cache ")
|
|
entrance = entrances_xslug[eslug]
|
|
else:
|
|
# print(f"eslug {eslug} looking up entrance ")
|
|
try:
|
|
entrance = Entrance.objects.get(slug=eslug)
|
|
entrances_xslug[eslug] = entrance
|
|
except:
|
|
message = f"! Fail entrance loading {eslug} /entrance_data/{eslug} file does not exist or loading it from {filename} failed."
|
|
DataIssue.objects.create(parser="entrances", message=message, url=f"{cave.url}_cave_edit/")
|
|
print(message)
|
|
print(e)
|
|
return
|
|
|
|
if eslug != f"{entrance}":
|
|
message = f"eslug {eslug} using different entrance {entrance} to set CaveAndEntrance"
|
|
DataIssue.objects.create(parser="xEntrances", message=message, url=f"{cave.areacode}/{cave.areacode}-{cave.url}_cave_edit/")
|
|
print(message)
|
|
try:
|
|
# this fails if there is not an unambiguous letter set.
|
|
CaveAndEntrance.objects.update_or_create(
|
|
cave=cave, entranceletter=letter, entrance=entrance
|
|
)
|
|
except:
|
|
print(f"! Entrance setting failure {slug}")
|
|
message = f' ! Entrance setting failure, slug:"{slug}" #entrances:{len(entrances)} letter:"{letter}" cave:"{cave}" filename:"cave_data/{filename}"\n{e}'
|
|
DataIssue.objects.create(parser="entrances", message=message, url=f"{cave.areacode}/{cave.areacode}-{cave.url}_cave_edit/")
|
|
print(message)
|
|
def reload_entrances():
|
|
"""For individual re-reading of a cave_data file when editing,
|
|
also re-read the entrance_data files
|
|
"""
|
|
for eslug in entrances_xslug:
|
|
entrance = entrances_xslug[eslug]
|
|
read_entrance(entrance.filename, ent=entrance)
|
|
entrance.save()
|
|
|
|
def do_caveslugstuff(context):
|
|
"""This may be a fossil. We only have one slug per cave in troggle.
|
|
Pending destruction of this whole concept and Class CaveSlug
|
|
What is Class CaveSlug for?
|
|
"""
|
|
return
|
|
|
|
def check_directory(areacode, caveid, url, cave):
|
|
dir = Path(settings.EXPOWEB, areacode, caveid)
|
|
dir_l = Path(settings.EXPOWEB, areacode, caveid.lower())
|
|
dir_u = Path(settings.EXPOWEB, areacode, caveid.upper())
|
|
if dir.is_dir():
|
|
return
|
|
if dir_l.is_dir() or dir_u.is_dir():
|
|
message = f" ! Cave URL capitalisation incorrect '{dir}' is not a directory but different capitalisation is. {url=}\n - Fix by renaming cave_data/{caveid}.html which determines the cave id OR by renaming the directory and hand-fixing all the links to the files in the cave description."
|
|
DataIssue.objects.create(parser="caves", message=message, url=f"{cave.newslug()}_cave_edit/")
|
|
print(message)
|
|
return
|
|
if cave.filename:
|
|
# not a pending cave, yet the directory does not exist. This is FINE. Many don't (yet)
|
|
pass
|
|
|
|
def check_slug(areacode, kataster_number, unofficial_number, url):
|
|
"""There is a <caveslug> field in the .html file, but we now ignore it as we use the
|
|
filename itself to set the slug.
|
|
However we do check it for sanity, if it is there, pending its removal eventually."""
|
|
|
|
if kataster_number:
|
|
if slug == f"{areacode}-{kataster_number}":
|
|
return slug
|
|
message = f" ! Cave Slug mismatch (kataster): '{slug}' != '{areacode}-{kataster_number}' {url=} in file {filename}. IGNORING caveslug field in the .html file."
|
|
correctslug = f"{areacode}-{kataster_number}"
|
|
|
|
else:
|
|
if slug == f"{areacode}-{unofficial_number}":
|
|
return slug
|
|
if slug.lower() == f"{areacode}-{unofficial_number.lower()}":
|
|
message = f" ! Cave Slug capitalisation incorrect (unofficial): '{slug}' != '{areacode}-{unofficial_number}' {url=} in file {filename}. IGNORING caveslug field in the .html file."
|
|
correctslug = slug.lower()
|
|
else:
|
|
message = f" ! Cave Slug mismatch (unofficial): '{slug}' != '{areacode}-{unofficial_number}' {url=} in file {filename} IGNORING caveslug field in the .html file."
|
|
correctslug = f"{areacode}-{unofficial_number}"
|
|
|
|
msgurl=f"/{correctslug[0:4]}/{correctslug}_cave_edit/"
|
|
DataIssue.objects.create(parser="caves", message=message, url=msgurl) # url here is for the href link to edit the bad data in the DataIssues page
|
|
mvtext = f"mv {filename} {correctslug}.html"
|
|
#print(mvtext)
|
|
if filename != f"{correctslug}.html" :
|
|
message = f" ! Filename is not the same as the cave slug '{slug}' != '{areacode}-{unofficial_number}' {url=} in file {filename} IGNORING caveslug field in the .html file."
|
|
DataIssue.objects.create(parser="caves", message=message, url=msgurl) # url here is for where the file actually is, for editing
|
|
mvf.write(mvtext + "\n")
|
|
print(message)
|
|
return correctslug
|
|
|
|
global entrances_xslug
|
|
global caves_xslug
|
|
global areas_xslug
|
|
|
|
# Note: we want to overwrite a PENDING cave if we are now importing the 1623-xxx.html file for it
|
|
|
|
# Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
|
|
fn = settings.CAVEDESCRIPTIONS / filename
|
|
|
|
#print(f" - Reading Cave from cave descriptions file {fn}")
|
|
if not fn.exists():
|
|
message = f" ! Cave_data file reading problem filename:'cave_data/{filename}'"
|
|
DataIssue.objects.create(parser="caves", message=message, url="")
|
|
print(message)
|
|
return None
|
|
|
|
# ignore <caveslug> read from the filename
|
|
slug = filename[:-5] # strip off the ".html" at the end of the filename
|
|
slugs = [slug]
|
|
areacode = slug[:4]
|
|
|
|
context = f"/{areacode}/{slug}_cave_edit"
|
|
|
|
with open(fn) as f:
|
|
contents = f.read()
|
|
cavecontentslist = getXML(contents, "cave", maxItems=1, context=context)
|
|
|
|
if len(cavecontentslist) != 1:
|
|
message = f'! BAD CAVE DATA in "{filename}". More than one cave. Edit file manually, click.'
|
|
DataIssue.objects.create(parser="caves", message=message, url="")
|
|
print(message)
|
|
return None
|
|
|
|
cavecontents = cavecontentslist[0]
|
|
|
|
#print(f"{filename=} {slug=}")
|
|
if slug[-1].lower() in LETTERS:
|
|
message = f" ! Cave name ends in a letter not a number. Fix this! in file {filename} "
|
|
DataIssue.objects.create(parser="caves", message=message, url=context) # url here is for where the file actually is, for editing
|
|
print(message)
|
|
|
|
non_public = getXMLmax1("non_public")
|
|
official_name = getXMLmax1("official_name")
|
|
kataster_code = getXMLmax1("kataster_code")
|
|
kataster_number = getXMLmax1("kataster_number")
|
|
unofficial_number = getXMLmax1("unofficial_number")
|
|
explorers = getXMLmax1("explorers")
|
|
underground_description = getXMLmax1("underground_description")
|
|
equipment = getXMLmax1("equipment")
|
|
references = getXMLmax1("references")
|
|
survey = getXMLmax1("survey")
|
|
#kataster_status = getXMLmax1("kataster_status")
|
|
#underground_centre_line = getXMLmax1("underground_centre_line")
|
|
notes = getXMLmax1("notes")
|
|
survex_file = getXMLmax1("survex_file")
|
|
description_file = getXMLmax1("description_file")
|
|
|
|
# Optional, but probably deprecated as we should just derive this from the survex data
|
|
length = getXMLmin0("length")
|
|
depth = getXMLmin0("depth")
|
|
extent = getXMLmin0("extent")
|
|
|
|
manual_edit = True
|
|
if not cave:
|
|
# we are parsing using databaseReset.py not an online edit
|
|
# we have already checked for uniqueness but the Cave object may/should be already created by the Entrance parsing
|
|
manual_edit = False
|
|
|
|
# The Cave object should already have been created when reading the entrance_data file
|
|
caves = Cave.objects.filter(filename=filename)
|
|
if len(caves) ==1:
|
|
cave = caves[0]
|
|
else:
|
|
c = Cave.objects.filter(filename=filename.lower())
|
|
if len(c) ==1:
|
|
cave = c[0]
|
|
else:
|
|
print(f" * Cannot find single Cave object for cave_data/{filename} from entrance_data file. {len(caves)} found")
|
|
return False
|
|
|
|
|
|
# From here on the code applies to both edited and newly-imported caves (mostly!)
|
|
do_caveslugstuff(context) # needs cave!=None
|
|
|
|
# We no longer need the <area> tag to define 1623 etc as we get that from the filename.
|
|
areas = getXML(cavecontents, "area", context=context, minItems=0) # can be multiple <area> tags
|
|
for area_slug in areas:
|
|
if area_slug not in AREACODES: # only detect subareas
|
|
cave.subarea = area_slug
|
|
if not cave.areacode:
|
|
if areacode in AREACODES:
|
|
cave.areacode = areacode
|
|
|
|
cave.non_public=boolify(non_public)
|
|
cave.official_name=official_name[0]
|
|
cave.kataster_code=kataster_code[0]
|
|
if "+" in kataster_code[0]:
|
|
cave.fully_explored = True
|
|
# print(f"{kataster_code[0]} {slug}")
|
|
cave.kataster_number=kataster_number[0]
|
|
cave.unofficial_number=unofficial_number[0]
|
|
cave.explorers=explorers[0]
|
|
cave.underground_description=underground_description[0]
|
|
cave.equipment=equipment[0]
|
|
cave.references=references[0]
|
|
cave.survey=survey[0]
|
|
#cave.kataster_status=kataster_status[0]
|
|
#cave.underground_centre_line=underground_centre_line[0]
|
|
cave.notes=notes[0]
|
|
if length:
|
|
cave.length=length[0]
|
|
if depth:
|
|
cave.depth=depth[0]
|
|
if extent:
|
|
cave.extent=extent[0]
|
|
cave.survex_file=survex_file[0]
|
|
cave.description_file=description_file[0]
|
|
# cave.url=url[0] # set algorithically now:
|
|
cave.url = f"{cave.areacode}/{cave.number()}/{cave.number()}.html"
|
|
check_directory(cave.areacode, cave.number(), cave.url, cave)
|
|
|
|
slug = check_slug(cave.areacode, cave.kataster_number, cave.unofficial_number, cave.url) #NB cave.slug is not a field on Cave
|
|
|
|
# Thsi whole way of doing entrances can be replaced by simply knowing the entrance_data filename what the cave is. to do.
|
|
entrances = getXML(cavecontents, "entrance", context=context)
|
|
#do_entrances()
|
|
if not entrances or len(entrances) < 1:
|
|
# missing entrance link in cave_data/1623-* .html file
|
|
set_dummy_entrance(slug[5:], slug, cave, msg="DUMMY: no entrances")
|
|
else:
|
|
do_entrances()
|
|
if manual_edit:
|
|
reload_entrances()
|
|
|
|
if survex_file[0]:
|
|
if not (Path(SURVEX_DATA) / survex_file[0]).is_file():
|
|
message = f' ! {slug:12} survex filename does not exist :LOSER:"{survex_file[0]}" in "{filename}"'
|
|
DataIssue.objects.create(parser="caves", message=message, url=f"/{slug[0:4]}/{slug}_cave_edit/")
|
|
print(message)
|
|
# else:
|
|
# print(f"{slug:12} survex filename UNSET")
|
|
|
|
|
|
if description_file[0]: # if not an empty string
|
|
message = f' - {slug:12} Note (not an error): complex description filename "{description_file[0]}" inside "cave_data/{filename}"'
|
|
DataIssue.objects.create(parser="caves ok", message=message, url=f"/{slug}_cave_edit/")
|
|
# print(message)
|
|
|
|
if not (Path(EXPOWEB) / description_file[0]).is_file():
|
|
message = f' ! {slug:12} description filename "{EXPOWEB}/{description_file[0]}" does not refer to a real file'
|
|
DataIssue.objects.create(parser="caves", message=message, url=f"/{slug}_cave_edit/")
|
|
print(message)
|
|
cave.save()
|
|
return cave
|
|
|
|
def readcaves():
|
|
"""Called from databaseReset mass importer.
|
|
Reads the xml-format HTML 'cave' files in the EXPOWEB repo, the survex files from the loser repo.
|
|
"""
|
|
# Pending is for those caves which do not have cave_data/1623-xxx.html XML files even though
|
|
# they exist and have surveys.
|
|
with transaction.atomic():
|
|
pending = set()
|
|
fpending = Path(CAVEDESCRIPTIONS, "pendingcaves.txt")
|
|
if fpending.is_file():
|
|
with open(fpending, "r") as fo:
|
|
cids = fo.readlines()
|
|
for cid in cids:
|
|
pcaveid = cid.split(";", 1)[0] # split on ";" and take the first bit
|
|
pcaveid = pcaveid.strip().rstrip("\n")
|
|
if pcaveid =="":
|
|
continue
|
|
pending.add(pcaveid)
|
|
|
|
with transaction.atomic():
|
|
print(" - Deleting Caves and Entrances")
|
|
# attempting to avoid MariaDB crash when doing this
|
|
try:
|
|
Area.objects.all().delete()
|
|
except:
|
|
pass
|
|
try:
|
|
Cave.objects.all().delete()
|
|
except:
|
|
pass
|
|
try:
|
|
Entrance.objects.all().delete()
|
|
except:
|
|
pass
|
|
|
|
# Clear the cave data issues and the caves as we are reloading
|
|
DataIssue.objects.filter(parser="areas").delete()
|
|
DataIssue.objects.filter(parser="caves").delete()
|
|
DataIssue.objects.filter(parser="caves ok").delete()
|
|
DataIssue.objects.filter(parser="aliases").delete()
|
|
DataIssue.objects.filter(parser="aliases ok").delete()
|
|
|
|
#DataIssue.objects.filter(parser="entrances").delete()
|
|
#DataIssue.objects.filter(parser="xEntrances").delete()
|
|
|
|
with transaction.atomic():
|
|
print(" - Reading Entrances from entrance descriptions xml files")
|
|
for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files, use pathlib !
|
|
read_entrance(filename)
|
|
|
|
# Why is this needed ? Without it, we lose these DataIssues!
|
|
ent_issues = DataIssue.objects.filter(parser="entrances")
|
|
print(f" _ We now have {len(ent_issues)} entrance DataIssues")
|
|
|
|
with transaction.atomic():
|
|
print(" - Reading Caves from cave descriptions xml files")
|
|
mvscript = "/tmp/mvscript.sh" # in .gitignore so no problem creating it on server in /troggle/
|
|
with open(mvscript, "w") as mvf: # overwrite
|
|
mvf.write(f"cd {CAVEDESCRIPTIONS}\n")
|
|
for filename in next(os.walk(CAVEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files
|
|
if filename.endswith(".html"):
|
|
read_cave(filename, mvf)
|
|
|
|
print(" - Setting up all the variously useful alias names")
|
|
GetCaveLookup()
|
|
|
|
print(" - Setting pending caves")
|
|
# Do this last, so we can detect if they are created and no longer 'pending'
|
|
|
|
with transaction.atomic():
|
|
for k in pending:
|
|
|
|
if k[0:3] == "162":
|
|
areacode = k[0:4]
|
|
number = k[5:]
|
|
url = f"{areacode}/{k[5:]}.html" # Note we are appending the .html to allow for offline websites
|
|
else:
|
|
areacode = "1623"
|
|
number = k
|
|
url = f"1623/{k}"
|
|
|
|
try:
|
|
do_pending_cave(k, number, url, areacode)
|
|
except:
|
|
message = f" ! Error. Cannot create pending cave, pending-id:{k} in area {areacode}"
|
|
DataIssue.objects.create(parser="caves", message=message)
|
|
print(message)
|
|
raise
|