2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2025-12-14 19:57:22 +00:00

Cleaning up entrance importing robustly

This commit is contained in:
2023-07-26 00:14:46 +03:00
parent 72a6b091e6
commit fab7adf079
3 changed files with 76 additions and 33 deletions

View File

@@ -91,9 +91,12 @@ def set_dummy_entrance(id, slug, cave, msg="DUMMY"):
# I am not sure if we want entrances automagically created.
# Therefore I have commented it out. MJG
# entrance = dummy_entrance(id, slug, msg="DUMMY")
# entrances_xslug[slug] = entrance
# CaveAndEntrance.objects.update_or_create(cave=cave, entrance_letter="", entrance=entrance)
# Automagic restored, as it creates a load of error message in the DataIssue log. 2023-07-25
# pending doing this properly, ie. properly creating an XML file for each of these pending caves.
entrance = dummy_entrance(id, slug, msg="DUMMY")
entrances_xslug[slug] = entrance
CaveAndEntrance.objects.update_or_create(cave=cave, entrance_letter="", entrance=entrance)
pass
except:
message = f' ! Entrance Dummy setting failure, slug:"{slug}" cave id :"{id}" '
@@ -343,6 +346,22 @@ def boolify(boolstrs):
"False": False,
"true": True,
"false": False}[boolstrs[0]]
def validate_station(station):
"""It is possible to break troggle entirely by getting this wrong.
These station identifiers are matched against other statsions using .endswith()
in parsers/locations.py
so a simple number here will match hundreds of SUrvexStation objects
It should be, e.g. "1623.p240"
"""
if station == "":
return True
dot = station.find(".")
if dot == -1:
# no full stop found. Bad station identifier.
raise
else:
return True
def read_entrance(filename, ent=None):
"""Reads an entrance description from the .html file.
@@ -440,6 +459,17 @@ def read_entrance(filename, ent=None):
ent.underground_description=underground_description[0]
ent.url=url[0]
for st in [ent.exact_station, ent.other_station, ent.tag_station]:
try:
validate_station(st)
except:
message = f" ! BAD ENTRANCE TAG '{st}' in '{filename}'. Must format like '1623.p204'. Edit file manually, click."
#http://localhost:8000/1623/2023-EBH-01/1623-2023-EBH-01:1623-2023-EBH-01_entrance_edit
DataIssue.objects.create(parser="entrances", message=message, url=f"/1623/{slug}/{slug}:{slug}_entrance_edit")
print(message)
# ent_issues = DataIssue.objects.filter(parser="entrances")
# print(f".. We now have {len(ent_issues)} entrance DataIssues")
return None
ent.save()
return ent
@@ -704,6 +734,10 @@ def readcaves():
print(" - Reading Entrances from entrance descriptions xml files")
for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files
read_entrance(filename)
# WHy is this needed ? Without it, we lose these DataIssues!
ent_issues = DataIssue.objects.filter(parser="entrances")
print(f"__ We now have {len(ent_issues)} entrance DataIssues")
print(" - Reading Caves from cave descriptions xml files")
for filename in next(os.walk(CAVEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files