forked from expo/troggle
679 lines
32 KiB
Python
679 lines
32 KiB
Python
import os
|
|
import re
|
|
from pathlib import Path
|
|
|
|
from django.conf import settings
|
|
from django.db import transaction
|
|
|
|
from troggle.core.models.caves import Area, Cave, CaveAndEntrance, Entrance, EntranceSlug, GetCaveLookup
|
|
from troggle.core.models.logbooks import CaveSlug
|
|
from troggle.core.models.troggle import DataIssue
|
|
from troggle.settings import CAVEDESCRIPTIONS, ENTRANCEDESCRIPTIONS, EXPOWEB, SURVEX_DATA
|
|
|
|
"""Reads all the cave description data by parsing the xml files (stored as e.g. :EXPOWEB:/cave_data/1623-161.html )
|
|
and creating the various Cave, Entrance and necessary Area objects.
|
|
|
|
This is the first import that happens after the database is reinitialised.
|
|
So is the first thing that creates tables.
|
|
|
|
"""
|
|
|
|
todo = """
|
|
- Cannot use Edit This Page for pendingcaves.txt_edit as Edit This Page is expecting an html file.
|
|
So we will need a separate file-editing capability just for this configuration file ?!
|
|
|
|
- Semi-automagically import all the 1627- pending caves and create HTML files for them to be
|
|
edited individually. (These are caves we only know about because we have German survex files.)
|
|
|
|
- crashes on MariaDB in databasereset.py on server when deleting Caves and complains Area needs a
|
|
non null parent, But this is not true. The only solution we have found is to let it crash, then
|
|
stop and restart MariaDB (requires a logon able to sudo) and then restart the databasereset.py
|
|
again. (status as of July 2022)
|
|
"""
|
|
entrances_xslug = {}
|
|
caves_xslug = {}
|
|
areas_xslug = {}
|
|
|
|
|
|
def dummy_entrance(k, slug, msg="DUMMY"):
|
|
"""Returns an empty entrance object for either a PENDING cave or a DUMMY entrance if
|
|
user forgot to provide one when creating the cave
|
|
"""
|
|
ent = Entrance(
|
|
name=k,
|
|
entrance_description="Dummy entrance: auto-created when registering a new cave "
|
|
+ "and you forgot to create an entrance for it. Click on 'Edit' to enter the correct data, then 'Submit'.",
|
|
marking="?",
|
|
)
|
|
if ent:
|
|
ent.save() # must save to have id before foreign keys work.
|
|
try: # Now create a entrance slug ID
|
|
EntranceSlug(entrance=ent, slug=slug, primary=False)
|
|
except:
|
|
message = f" ! {k:11s} {msg}-{slug} entrance create failure"
|
|
DataIssue.objects.create(parser="caves", message=message, url=f"{slug}")
|
|
print(message)
|
|
|
|
ent.cached_primary_slug = slug
|
|
ent.filename = slug + ".html"
|
|
ent.save()
|
|
return ent
|
|
else:
|
|
message = f" ! {k:11s} {msg} cave SLUG '{slug}' create failure"
|
|
DataIssue.objects.create(parser="caves", message=message, url=f"{slug}")
|
|
print(message)
|
|
raise
|
|
|
|
|
|
def set_dummy_entrance(id, slug, cave, msg="DUMMY"):
|
|
"""Entrance field either missing or holds a null string instead of a filename in a cave_data file."""
|
|
global entrances_xslug
|
|
try:
|
|
entrance = dummy_entrance(id, slug, msg="DUMMY")
|
|
entrances_xslug[slug] = entrance
|
|
CaveAndEntrance.objects.update_or_create(cave=cave, entrance_letter="", entrance=entrance)
|
|
message = f" ! Warning: Dummy Entrance successfully set for entrance {id} on cave {cave}"
|
|
|
|
DataIssue.objects.create(parser="caves", message=message, url=f"{cave.url}")
|
|
print(message)
|
|
except:
|
|
# raise
|
|
message = f' ! Entrance Dummy setting failure, slug:"{slug}" cave id :"{id}" '
|
|
DataIssue.objects.create(parser="caves", message=message, url=f"{cave.url}")
|
|
print(message)
|
|
|
|
def make_areas():
|
|
print(" - Creating Areas 1623, 1624, 1627 and 1626")
|
|
# This crashes on the server with MariaDB even though a null parent is explicitly allowed.
|
|
area_1623 = Area.objects.create(short_name="1623", super=None)
|
|
area_1623.save()
|
|
area_1624 = Area.objects.create(short_name="1624", super=None)
|
|
area_1624.save()
|
|
area_1626 = Area.objects.create(short_name="1626", super=None)
|
|
area_1626.save()
|
|
area_1627 = Area.objects.create(short_name="1627", super=None)
|
|
area_1627.save()
|
|
|
|
def get_area(areanum):
|
|
"""Given the number as a string, return the area object
|
|
"""
|
|
a = Area.objects.all()
|
|
if len(a) == 0:
|
|
make_areas()
|
|
|
|
area = Area.objects.get(short_name="1623") # default
|
|
|
|
if areanum == "1623":
|
|
area = Area.objects.get(short_name="1623")
|
|
if areanum == "1624":
|
|
area = Area.objects.get(short_name="1624")
|
|
if areanum == "1626":
|
|
area = Area.objects.get(short_name="1626")
|
|
if areanum == "1627":
|
|
area = Area.objects.get(short_name="1627")
|
|
return area
|
|
|
|
def create_new_cave(svxpath):
|
|
"""This is called only when a new survex file is edited online which has a path on the
|
|
:loser: repo which is not recognised as a known cave.
|
|
"""
|
|
# e.g. svxpath = "caves-1623/666/antig"
|
|
print(f"Create new cave at {svxpath}")
|
|
#
|
|
survex_file = svxpath + ".svx"
|
|
parts = svxpath.split("/")
|
|
a = parts[0][-4:]
|
|
caveid = parts[1]
|
|
print(f"parts {parts}, {a}, {caveid}")
|
|
# double check
|
|
if a[0:3] == "162":
|
|
areanum = a[0:4]
|
|
url = f"{areanum}/{a[5:]}" # Note we are not appending the .htm as we are modern folks now.
|
|
else:
|
|
areanum = "1623"
|
|
url = f"1623/{k}"
|
|
|
|
k = f"{areanum}-{caveid}"
|
|
area = get_area(areanum)
|
|
|
|
caves = Cave.objects.filter(unofficial_number=caveid)
|
|
if caves:
|
|
message = f" ! Already exists, caveid:{k} in area {areanum} {caves}"
|
|
DataIssue.objects.create(parser="caves", message=message)
|
|
print(message)
|
|
return caves[0]
|
|
|
|
try:
|
|
cave = do_pending_cave(k, url, area)
|
|
except:
|
|
message = f" ! Error. Cannot create pending cave and entrance, pending-id:{k} in area {areanum}"
|
|
DataIssue.objects.create(parser="caves", message=message)
|
|
print(message)
|
|
raise
|
|
|
|
# we know what the survex file is, we don't need to use the guess
|
|
cave.survex_file=survex_file
|
|
cave.save()
|
|
return cave
|
|
|
|
def do_pending_cave(k, url, area):
|
|
"""
|
|
default for a PENDING cave, should be overwritten in the db later if a real cave of the same name exists
|
|
in expoweb/cave_data/1623-"k".html
|
|
|
|
Note that at this point in importing the data we have not yet seen the survex files, so we can't
|
|
look inside the relevant survex file to find the year and so we con't provide helpful links.
|
|
"""
|
|
|
|
def get_survex_file(k):
|
|
"""Guesses at and finds a survex file for this pending cave.
|
|
Convoluted. Needs rewriting
|
|
"""
|
|
if k[0:3] == "162":
|
|
id = Path(k[5:])
|
|
else:
|
|
id = Path(k)
|
|
|
|
survex_file = f"caves-{area.short_name}/{id}/{id}.svx"
|
|
if Path(settings.SURVEX_DATA, survex_file).is_file():
|
|
return survex_file
|
|
else:
|
|
survex_file = f"caves-{area.short_name}/{id}.svx"
|
|
if Path(settings.SURVEX_DATA, survex_file).is_file():
|
|
return survex_file
|
|
|
|
survex_file = ""
|
|
d = Path(settings.SURVEX_DATA, f"caves-{area.short_name}/{id}")
|
|
if d.is_dir():
|
|
prime_suspect = ""
|
|
dir = d.iterdir()
|
|
for f in dir:
|
|
if f.suffix == ".svx":
|
|
survex_file = f.relative_to(settings.SURVEX_DATA)
|
|
chk = min(5, len(f.name) - 1)
|
|
if str(f.name)[:chk].lower() == str(id.name)[:chk].lower(): # bodge which mostly works
|
|
prime_suspect = survex_file
|
|
if prime_suspect:
|
|
survex_file = prime_suspect
|
|
# message = f" ! {k:14} Found a survex file which might be the right one: {survex_file}"
|
|
# DataIssue.objects.create(parser='caves', message=message, url=url)
|
|
# print(message)
|
|
return survex_file
|
|
|
|
slug = k
|
|
|
|
g = GetCaveLookup()
|
|
with transaction.atomic():
|
|
if slug in g:
|
|
message = f" ! {k:18} cave listed in pendingcaves.txt already exists."
|
|
DataIssue.objects.create(parser="caves", message=message, url=url)
|
|
print(message)
|
|
return
|
|
|
|
default_note = "_Survex file found in loser repo but no description in expoweb <br><br><br>\n"
|
|
default_note += "INSTRUCTIONS: First open 'This survex file' (link above the CaveView panel) to find the date and info. Then "
|
|
default_note += '<br><br>\n\n - (0) look in the <a href="/noinfo/cave-number-index">cave number index</a> for notes on this cave, '
|
|
default_note += "<br><br>\n\n - (1) search in the survex file for the *ref to find a "
|
|
default_note += "relevant wallet, e.g.<a href='/survey_scans/2009%252311/'>2009#11</a> and read the notes image files <br>\n - "
|
|
default_note += (
|
|
"<br><br>\n\n - (2) search in the Expo for that year e.g. <a href='/expedition/2009'>2009</a> to find a "
|
|
)
|
|
default_note += "relevant logbook entry, remember that the date may have been recorded incorrectly, "
|
|
default_note += (
|
|
"so check for trips i.e. logbook entries involving the same people as were listed in the survex file, "
|
|
)
|
|
default_note += (
|
|
"and you should also check the scanned copy of the logbook (linked from each logbook entry page) "
|
|
)
|
|
default_note += "just in case a vital trip was not transcribed, then <br>\n - "
|
|
default_note += (
|
|
"click on 'Edit this cave' and copy the information you find in the survex file and the logbook"
|
|
)
|
|
default_note += "and delete all the text in the 'Notes' section - which is the text you are reading now."
|
|
default_note += "<br><br>\n\n - Only two fields on this form are essential. "
|
|
default_note += "Documentation of all the fields on 'Edit this cave' form is in <a href='/handbook/survey/caveentryfields.html'>handbook/survey/caveentryfields</a>"
|
|
default_note += "<br><br>\n\n - "
|
|
default_note += "You will also need to create a new entrance from the 'Edit this cave' page. Ignore the existing dummy one, it will evaporate on the next full import."
|
|
default_note += "<br><br>\n\n - "
|
|
default_note += "When you Submit it will create a new file in expoweb/cave_data/ "
|
|
default_note += (
|
|
"<br><br>\n\n - Now you can edit the entrance info: click on Edit below for the dummy entrance. "
|
|
)
|
|
default_note += "and then Submit to save it (if you forget to do this, a dummy entrance will be created for your new cave description)."
|
|
default_note += "<br><br>\n\n - Finally, you need to find a nerd to edit the file '<var>expoweb/cave_data/pending.txt</var>' "
|
|
default_note += (
|
|
f"to remove the line <br><var>{slug}</var><br> as it is no longer 'pending' but 'done. Well Done."
|
|
)
|
|
|
|
survex_file = get_survex_file(k)
|
|
|
|
cave = Cave(
|
|
unofficial_number=k,
|
|
underground_description="Pending cave write-up - creating as empty object. No XML file available yet.",
|
|
survex_file=survex_file,
|
|
url=url,
|
|
notes=default_note,
|
|
)
|
|
if cave:
|
|
cave.save() # must save to have id before foreign keys work. This is also a ManyToMany key.
|
|
cave.area.add(area)
|
|
cave.save()
|
|
message = f" ! {k:18} {cave.underground_description} url: {url}"
|
|
DataIssue.objects.create(parser="caves", message=message, url=url)
|
|
print(message)
|
|
|
|
try: # Now create a cave slug ID
|
|
CaveSlug.objects.update_or_create(cave=cave, slug=slug, primary=False)
|
|
except:
|
|
message = f" ! {k:11s} PENDING CaveSLUG {slug} create failure"
|
|
DataIssue.objects.create(parser="caves", message=message)
|
|
print(message)
|
|
else:
|
|
message = f" ! {k:11s} PENDING cave create failure"
|
|
DataIssue.objects.create(parser="caves", message=message)
|
|
print(message)
|
|
|
|
try:
|
|
ent = dummy_entrance(k, slug, msg="PENDING")
|
|
ceinsts = CaveAndEntrance.objects.update_or_create(cave=cave, entrance_letter="", entrance=ent)
|
|
for ceinst in ceinsts:
|
|
if str(ceinst) == str(cave): # magic runes... why is the next value a Bool?
|
|
ceinst.cave = cave
|
|
ceinst.save()
|
|
break
|
|
except:
|
|
message = f" ! {k:11s} PENDING entrance + cave UNION create failure '{cave}' [{ent}]"
|
|
DataIssue.objects.create(parser="caves", message=message)
|
|
print(message)
|
|
return cave
|
|
|
|
def readentrance(filename):
|
|
"""Reads an enrance description from the .html file
|
|
Convoluted. Sorry.This is as I inherited it and I haven't fiddled with it. Needs rewriting
|
|
"""
|
|
global entrances_xslug
|
|
global caves_xslug
|
|
global areas_xslug
|
|
|
|
# Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
|
|
with open(os.path.join(ENTRANCEDESCRIPTIONS, filename)) as f:
|
|
contents = f.read()
|
|
context = filename
|
|
# print("Reading file ENTRANCE {} / {}".format(ENTRANCEDESCRIPTIONS, filename))
|
|
entrancecontentslist = getXML(contents, "entrance", maxItems=1, context=context)
|
|
if len(entrancecontentslist) != 1:
|
|
message = f'! BAD ENTRANCE at "{filename}"'
|
|
DataIssue.objects.create(parser="caves", message=message)
|
|
print(message)
|
|
else:
|
|
entrancecontents = entrancecontentslist[0]
|
|
non_public = getXML(entrancecontents, "non_public", maxItems=1, context=context)
|
|
name = getXML(entrancecontents, "name", maxItems=1, context=context)
|
|
slugs = getXML(entrancecontents, "slug", context=context)
|
|
entrance_description = getXML(entrancecontents, "entrance_description", maxItems=1, context=context)
|
|
explorers = getXML(entrancecontents, "explorers", maxItems=1, context=context)
|
|
map_description = getXML(entrancecontents, "map_description", maxItems=1, context=context)
|
|
location_description = getXML(entrancecontents, "location_description", maxItems=1, context=context)
|
|
lastvisit = getXML(entrancecontents, "last visit date", maxItems=1, minItems=0, context=context)
|
|
approach = getXML(entrancecontents, "approach", maxItems=1, context=context)
|
|
underground_description = getXML(entrancecontents, "underground_description", maxItems=1, context=context)
|
|
photo = getXML(entrancecontents, "photo", maxItems=1, context=context)
|
|
marking = getXML(entrancecontents, "marking", maxItems=1, context=context)
|
|
marking_comment = getXML(entrancecontents, "marking_comment", maxItems=1, context=context)
|
|
findability = getXML(entrancecontents, "findability", maxItems=1, context=context)
|
|
findability_description = getXML(entrancecontents, "findability_description", maxItems=1, context=context)
|
|
alt = getXML(entrancecontents, "alt", maxItems=1, context=context)
|
|
northing = getXML(entrancecontents, "northing", maxItems=1, context=context)
|
|
easting = getXML(entrancecontents, "easting", maxItems=1, context=context)
|
|
tag_station = getXML(entrancecontents, "tag_station", maxItems=1, context=context)
|
|
exact_station = getXML(entrancecontents, "exact_station", maxItems=1, context=context)
|
|
other_station = getXML(entrancecontents, "other_station", maxItems=1, context=context)
|
|
other_description = getXML(entrancecontents, "other_description", maxItems=1, context=context)
|
|
bearings = getXML(entrancecontents, "bearings", maxItems=1, context=context)
|
|
url = getXML(entrancecontents, "url", maxItems=1, context=context)
|
|
# if len(non_public) == 1 and len(slugs) >= 1 and len(name) >= 1 and len(entrance_description) == 1 and len(explorers) == 1 and len(map_description) == 1 and len(location_description) == 1 and len(lastvisit) == 1 and len(approach) == 1 and len(underground_description) == 1 and len(marking) == 1 and len(marking_comment) == 1 and len(findability) == 1 and len(findability_description) == 1 and len(alt) == 1 and len(northing) == 1 and len(easting) == 1 and len(tag_station) == 1 and len(exact_station) == 1 and len(other_station) == 1 and len(other_description) == 1 and len(bearings) == 1 and len(url) == 1:
|
|
e, state = Entrance.objects.update_or_create(
|
|
name=name[0],
|
|
non_public={
|
|
"True": True,
|
|
"False": False,
|
|
"true": True,
|
|
"false": False,
|
|
}[non_public[0]],
|
|
entrance_description=entrance_description[0],
|
|
explorers=explorers[0],
|
|
map_description=map_description[0],
|
|
location_description=location_description[0],
|
|
lastvisit=lastvisit[0],
|
|
approach=approach[0],
|
|
underground_description=underground_description[0],
|
|
photo=photo[0],
|
|
marking=marking[0],
|
|
marking_comment=marking_comment[0],
|
|
findability=findability[0],
|
|
findability_description=findability_description[0],
|
|
alt=alt[0],
|
|
northing=northing[0],
|
|
easting=easting[0],
|
|
tag_station=tag_station[0],
|
|
exact_station=exact_station[0],
|
|
other_station=other_station[0],
|
|
other_description=other_description[0],
|
|
bearings=bearings[0],
|
|
url=url[0],
|
|
filename=filename,
|
|
cached_primary_slug=slugs[0],
|
|
)
|
|
primary = True
|
|
for slug in slugs:
|
|
# print("entrance slug:{} filename:{}".format(slug, filename))
|
|
try:
|
|
EntranceSlug.objects.update_or_create(entrance=e, slug=slug, primary=primary)
|
|
except:
|
|
# need to cope with duplicates
|
|
message = f" ! FAILED to get precisely one ENTRANCE when updating using: cave_entrance/{filename}"
|
|
DataIssue.objects.create(parser="caves", message=message, url=f"/cave/{slug}/edit/")
|
|
# kents = EntranceSlug.objects.all().filter(entrance=e, slug=slug, primary=primary)
|
|
kents = EntranceSlug.objects.all().filter(entrance=e, slug=slug)
|
|
for k in kents:
|
|
message = " ! - DUPLICATE in db. entrance:" + str(k.entrance) + ", slug:" + str(k.slug())
|
|
DataIssue.objects.create(parser="caves", message=message, url=f"/cave/{slug}/edit/")
|
|
print(message)
|
|
for k in kents:
|
|
if k.slug() is not None:
|
|
print(" ! - OVERWRITING this one: slug:" + str(k.slug()))
|
|
k.notes = "DUPLICATE entrance found on import. Please fix\n" + k.notes
|
|
primary = False
|
|
# else: # more than one item in long list. But this is not an error, and the max and min have been checked by getXML
|
|
# slug = Path(filename).stem
|
|
# message = f' ! ABORT loading this entrance. in "{filename}"'
|
|
# DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/')
|
|
# print(message)
|
|
|
|
|
|
def readcave(filename):
|
|
"""Reads an enrance description from the .html file
|
|
Convoluted. Sorry.This is as I inherited it and I haven't fiddled with it. Needs rewriting
|
|
Assumes any area it hasn't seen before is a subarea of 1623
|
|
"""
|
|
global entrances_xslug
|
|
global caves_xslug
|
|
global areas_xslug
|
|
|
|
# Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
|
|
with open(os.path.join(CAVEDESCRIPTIONS, filename)) as f:
|
|
contents = f.read()
|
|
context = filename
|
|
cavecontentslist = getXML(contents, "cave", maxItems=1, context=context)
|
|
if len(cavecontentslist) != 1:
|
|
message = f'! BAD CAVE at "{filename}"'
|
|
DataIssue.objects.create(parser="caves", message=message)
|
|
print(message)
|
|
else:
|
|
cavecontents = cavecontentslist[0]
|
|
non_public = getXML(cavecontents, "non_public", maxItems=1, context=context)
|
|
slugs = getXML(cavecontents, "caveslug", maxItems=1, context=context)
|
|
official_name = getXML(cavecontents, "official_name", maxItems=1, context=context)
|
|
areas = getXML(cavecontents, "area", context=context)
|
|
kataster_code = getXML(cavecontents, "kataster_code", maxItems=1, context=context)
|
|
kataster_number = getXML(cavecontents, "kataster_number", maxItems=1, context=context)
|
|
unofficial_number = getXML(cavecontents, "unofficial_number", maxItems=1, context=context)
|
|
explorers = getXML(cavecontents, "explorers", maxItems=1, context=context)
|
|
underground_description = getXML(cavecontents, "underground_description", maxItems=1, context=context)
|
|
equipment = getXML(cavecontents, "equipment", maxItems=1, context=context)
|
|
references = getXML(cavecontents, "references", maxItems=1, context=context)
|
|
survey = getXML(cavecontents, "survey", maxItems=1, context=context)
|
|
kataster_status = getXML(cavecontents, "kataster_status", maxItems=1, context=context)
|
|
underground_centre_line = getXML(cavecontents, "underground_centre_line", maxItems=1, context=context)
|
|
notes = getXML(cavecontents, "notes", maxItems=1, context=context)
|
|
length = getXML(cavecontents, "length", maxItems=1, context=context)
|
|
depth = getXML(cavecontents, "depth", maxItems=1, context=context)
|
|
extent = getXML(cavecontents, "extent", maxItems=1, context=context)
|
|
survex_file = getXML(cavecontents, "survex_file", maxItems=1, context=context)
|
|
description_file = getXML(cavecontents, "description_file", maxItems=1, context=context)
|
|
url = getXML(cavecontents, "url", maxItems=1, context=context)
|
|
entrances = getXML(cavecontents, "entrance", context=context)
|
|
|
|
if (
|
|
len(non_public) == 1
|
|
and len(slugs) >= 1
|
|
and len(official_name) == 1
|
|
and len(areas) >= 1
|
|
and len(kataster_code) == 1
|
|
and len(kataster_number) == 1
|
|
and len(unofficial_number) == 1
|
|
and len(explorers) == 1
|
|
and len(underground_description) == 1
|
|
and len(equipment) == 1
|
|
and len(references) == 1
|
|
and len(survey) == 1
|
|
and len(kataster_status) == 1
|
|
and len(underground_centre_line) == 1
|
|
and len(notes) == 1
|
|
and len(length) == 1
|
|
and len(depth) == 1
|
|
and len(extent) == 1
|
|
and len(survex_file) == 1
|
|
and len(description_file) == 1
|
|
and len(url) == 1
|
|
):
|
|
try:
|
|
c, state = Cave.objects.update_or_create(
|
|
non_public={
|
|
"True": True,
|
|
"False": False,
|
|
"true": True,
|
|
"false": False,
|
|
}[non_public[0]],
|
|
official_name=official_name[0],
|
|
kataster_code=kataster_code[0],
|
|
kataster_number=kataster_number[0],
|
|
unofficial_number=unofficial_number[0],
|
|
explorers=explorers[0],
|
|
underground_description=underground_description[0],
|
|
equipment=equipment[0],
|
|
references=references[0],
|
|
survey=survey[0],
|
|
kataster_status=kataster_status[0],
|
|
underground_centre_line=underground_centre_line[0],
|
|
notes=notes[0],
|
|
length=length[0],
|
|
depth=depth[0],
|
|
extent=extent[0],
|
|
survex_file=survex_file[0],
|
|
description_file=description_file[0],
|
|
url=url[0],
|
|
filename=filename,
|
|
)
|
|
except:
|
|
print(" ! FAILED to get only one CAVE when updating using: " + filename)
|
|
kaves = Cave.objects.all().filter(kataster_number=kataster_number[0])
|
|
for k in kaves:
|
|
message = " ! - DUPLICATES in db. kataster:" + str(k.kataster_number) + ", slug:" + str(k.slug())
|
|
DataIssue.objects.create(parser="caves", message=message)
|
|
print(message)
|
|
for k in kaves:
|
|
if k.slug() is not None:
|
|
print(" ! - OVERWRITING this one: slug:" + str(k.slug()))
|
|
k.notes = "DUPLICATE kataster number found on import. Please fix\n" + k.notes
|
|
c = k
|
|
|
|
for area_slug in areas:
|
|
if area_slug in areas_xslug:
|
|
newArea = areas_xslug[area_slug]
|
|
else:
|
|
area = Area.objects.filter(short_name=area_slug)
|
|
if area:
|
|
newArea = area[0]
|
|
else:
|
|
newArea = Area(short_name=area_slug, super=Area.objects.get(short_name="1623"))
|
|
newArea.save()
|
|
areas_xslug[area_slug] = newArea
|
|
c.area.add(newArea)
|
|
primary = True # this sets the first thing we find to be primary=True and all the others =False
|
|
for slug in slugs:
|
|
if slug in caves_xslug:
|
|
cs = caves_xslug[slug]
|
|
else:
|
|
try: # we want to overwrite a PENDING cave if we are now importing the 1623-xxx.html file for it
|
|
cs = CaveSlug.objects.update_or_create(cave=c, slug=slug, primary=primary)
|
|
caves_xslug[slug] = cs
|
|
except Exception as ex:
|
|
# This fails to do an update! It just crashes.. to be fixed
|
|
message = f" ! Cave update/create failure : {slug}, skipping file cave_data/{context} with exception\nException: {ex.__class__}"
|
|
DataIssue.objects.create(parser="caves", message=message)
|
|
print(message)
|
|
|
|
primary = False
|
|
|
|
if not entrances or len(entrances) < 1:
|
|
# missing entrance link in cave_data/1623-* .html file
|
|
set_dummy_entrance(slug[5:], slug, c, msg="DUMMY")
|
|
else:
|
|
for entrance in entrances:
|
|
eslug = getXML(entrance, "entranceslug", maxItems=1, context=context)[0]
|
|
letter = getXML(entrance, "letter", maxItems=1, context=context)[0]
|
|
if len(entrances) == 1 and not eslug: # may be empty: <entranceslug></entranceslug>
|
|
set_dummy_entrance(slug[5:], slug, c, msg="DUMMY")
|
|
else:
|
|
try:
|
|
if eslug in entrances_xslug:
|
|
entrance = entrances_xslug[eslug]
|
|
else:
|
|
entrance = Entrance.objects.get(entranceslug__slug=eslug)
|
|
entrances_xslug[eslug] = entrance
|
|
CaveAndEntrance.objects.update_or_create(
|
|
cave=c, entrance_letter=letter, entrance=entrance
|
|
)
|
|
except:
|
|
message = f' ! Entrance setting failure, slug:"{slug}" #entrances:{len(entrances)} {entrance} letter:"{letter}" cave:"{c}" filename:"cave_data/{filename}"'
|
|
DataIssue.objects.create(parser="caves", message=message, url=f"{c.url}_edit/")
|
|
print(message)
|
|
|
|
if survex_file[0]:
|
|
if not (Path(SURVEX_DATA) / survex_file[0]).is_file():
|
|
message = f' ! {slug:12} survex filename does not exist :LOSER:"{survex_file[0]}" in "{filename}"'
|
|
DataIssue.objects.create(parser="caves", message=message, url=f"/{slug[0:4]}/{slug}_cave_edit/")
|
|
print(message)
|
|
|
|
if description_file[0]: # if not an empty string
|
|
message = f' - {slug:12} Note (not an error): complex description filename "{description_file[0]}" inside "{CAVEDESCRIPTIONS}/{filename}"'
|
|
DataIssue.objects.create(parser="caves ok", message=message, url=f"/{slug}_cave_edit/")
|
|
print(message)
|
|
|
|
if not (Path(EXPOWEB) / description_file[0]).is_file():
|
|
message = f' ! {slug:12} description filename "{EXPOWEB}/{description_file[0]}" does not refer to a real file'
|
|
DataIssue.objects.create(parser="caves", message=message, url=f"/{slug}_cave_edit/")
|
|
print(message)
|
|
# c.description_file="" # done only once, to clear out cruft.
|
|
# c.save()
|
|
else: # more than one item in long list
|
|
message = f' ! ABORT loading this cave. in "{filename}"'
|
|
DataIssue.objects.create(parser="caves", message=message, url=f"/{slug}_cave_edit/")
|
|
print(message)
|
|
|
|
|
|
def getXML(text, itemname, minItems=1, maxItems=None, printwarnings=True, context=""):
|
|
"""Reads a single XML tag"""
|
|
items = re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S)
|
|
if len(items) < minItems and printwarnings:
|
|
message = (
|
|
" ! %(count)i x %(itemname)s found, at least %(min)i expected. Load ABORT. "
|
|
% {"count": len(items), "itemname": itemname, "min": minItems}
|
|
+ " in file "
|
|
+ context
|
|
)
|
|
DataIssue.objects.create(parser="caves", message=message, url="" + context)
|
|
print(message)
|
|
|
|
if maxItems is not None and len(items) > maxItems and printwarnings:
|
|
message = (
|
|
" ! %(count)i x %(itemname)s found, no more than %(max)i expected in this XML unit. Load ABORT. "
|
|
% {"count": len(items), "itemname": itemname, "max": maxItems}
|
|
+ " in file "
|
|
+ context
|
|
)
|
|
DataIssue.objects.create(parser="caves", message=message)
|
|
print(message)
|
|
if minItems == 0:
|
|
if not items:
|
|
items = [""]
|
|
return items
|
|
|
|
|
|
def readcaves():
|
|
"""Reads the xml-format HTML files in the EXPOWEB repo, not from the loser repo."""
|
|
# For those caves which do not have cave_data/1623-xxx.html XML files even though they exist and have surveys
|
|
# should put this in a simple list
|
|
pending = set()
|
|
fpending = Path(CAVEDESCRIPTIONS, "pendingcaves.txt")
|
|
if fpending.is_file():
|
|
with open(fpending, "r") as fo:
|
|
cids = fo.readlines()
|
|
for cid in cids:
|
|
pending.add(cid.strip().rstrip("\n").upper())
|
|
|
|
with transaction.atomic():
|
|
print(" - Deleting Caves and Entrances")
|
|
# attempting to avoid MariaDB crash when doing this
|
|
try:
|
|
Area.objects.all().delete()
|
|
except:
|
|
pass
|
|
try:
|
|
Cave.objects.all().delete()
|
|
except:
|
|
pass
|
|
try:
|
|
Entrance.objects.all().delete()
|
|
except:
|
|
pass
|
|
# Clear the cave data issues and the caves as we are reloading
|
|
DataIssue.objects.filter(parser="areas").delete()
|
|
DataIssue.objects.filter(parser="caves").delete()
|
|
DataIssue.objects.filter(parser="caves ok").delete()
|
|
DataIssue.objects.filter(parser="entrances").delete()
|
|
|
|
with transaction.atomic():
|
|
area = get_area("1623")
|
|
|
|
print(" - settings.CAVEDESCRIPTIONS: ", CAVEDESCRIPTIONS)
|
|
print(" - Reading Entrances from entrance descriptions xml files")
|
|
for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files
|
|
# if filename.endswith('.html'):
|
|
# if Path(filename).stem[5:] in pending:
|
|
# print(f'Skipping pending entrance dummy file <{filename}>')
|
|
# else:
|
|
# readentrance(filename)
|
|
readentrance(filename)
|
|
|
|
print(" - Reading Caves from cave descriptions xml files")
|
|
for filename in next(os.walk(CAVEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files
|
|
if filename.endswith(".html"):
|
|
readcave(filename)
|
|
|
|
print(" - Setting up all the variously useful alias names")
|
|
GetCaveLookup()
|
|
|
|
print(" - Setting pending caves")
|
|
# Do this last, so we can detect if they are created and no longer 'pending'
|
|
|
|
with transaction.atomic():
|
|
for k in pending:
|
|
|
|
if k[0:3] == "162":
|
|
areanum = k[0:4]
|
|
url = f"{areanum}/{k[5:]}" # Note we are not appending the .htm as we are modern folks now.
|
|
else:
|
|
areanum = "1623"
|
|
url = f"1623/{k}"
|
|
|
|
area = get_area(areanum)
|
|
try:
|
|
do_pending_cave(k, url, area)
|
|
except:
|
|
message = f" ! Error. Cannot create pending cave and entrance, pending-id:{k} in area {areanum}"
|
|
DataIssue.objects.create(parser="caves", message=message)
|
|
print(message)
|
|
raise
|