Cleaning up entrance importing robustly

This commit is contained in:
Philip Sargent 2023-07-26 00:14:46 +03:00
parent 72a6b091e6
commit fab7adf079
3 changed files with 76 additions and 33 deletions

View File

@ -91,9 +91,12 @@ def set_dummy_entrance(id, slug, cave, msg="DUMMY"):
# I am not sure if we want entrances automagically created. # I am not sure if we want entrances automagically created.
# Therefore I have commented it out. MJG # Therefore I have commented it out. MJG
# entrance = dummy_entrance(id, slug, msg="DUMMY") # Automagic restored, as it creates a load of error message in the DataIssue log. 2023-07-25
# entrances_xslug[slug] = entrance # pending doing this properly, ie. properly creating an XML file for each of these pending caves.
# CaveAndEntrance.objects.update_or_create(cave=cave, entrance_letter="", entrance=entrance)
entrance = dummy_entrance(id, slug, msg="DUMMY")
entrances_xslug[slug] = entrance
CaveAndEntrance.objects.update_or_create(cave=cave, entrance_letter="", entrance=entrance)
pass pass
except: except:
message = f' ! Entrance Dummy setting failure, slug:"{slug}" cave id :"{id}" ' message = f' ! Entrance Dummy setting failure, slug:"{slug}" cave id :"{id}" '
@ -343,6 +346,22 @@ def boolify(boolstrs):
"False": False, "False": False,
"true": True, "true": True,
"false": False}[boolstrs[0]] "false": False}[boolstrs[0]]
def validate_station(station):
"""It is possible to break troggle entirely by getting this wrong.
These station identifiers are matched against other statsions using .endswith()
in parsers/locations.py
so a simple number here will match hundreds of SUrvexStation objects
It should be, e.g. "1623.p240"
"""
if station == "":
return True
dot = station.find(".")
if dot == -1:
# no full stop found. Bad station identifier.
raise
else:
return True
def read_entrance(filename, ent=None): def read_entrance(filename, ent=None):
"""Reads an entrance description from the .html file. """Reads an entrance description from the .html file.
@ -440,6 +459,17 @@ def read_entrance(filename, ent=None):
ent.underground_description=underground_description[0] ent.underground_description=underground_description[0]
ent.url=url[0] ent.url=url[0]
for st in [ent.exact_station, ent.other_station, ent.tag_station]:
try:
validate_station(st)
except:
message = f" ! BAD ENTRANCE TAG '{st}' in '{filename}'. Must format like '1623.p204'. Edit file manually, click."
#http://localhost:8000/1623/2023-EBH-01/1623-2023-EBH-01:1623-2023-EBH-01_entrance_edit
DataIssue.objects.create(parser="entrances", message=message, url=f"/1623/{slug}/{slug}:{slug}_entrance_edit")
print(message)
# ent_issues = DataIssue.objects.filter(parser="entrances")
# print(f".. We now have {len(ent_issues)} entrance DataIssues")
return None
ent.save() ent.save()
return ent return ent
@ -704,6 +734,10 @@ def readcaves():
print(" - Reading Entrances from entrance descriptions xml files") print(" - Reading Entrances from entrance descriptions xml files")
for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files
read_entrance(filename) read_entrance(filename)
# WHy is this needed ? Without it, we lose these DataIssues!
ent_issues = DataIssue.objects.filter(parser="entrances")
print(f"__ We now have {len(ent_issues)} entrance DataIssues")
print(" - Reading Caves from cave descriptions xml files") print(" - Reading Caves from cave descriptions xml files")
for filename in next(os.walk(CAVEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files for filename in next(os.walk(CAVEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files

View File

@ -53,15 +53,16 @@ class MapLocations(object):
k = ent.caveandentrance_set.all()[0].cave k = ent.caveandentrance_set.all()[0].cave
except: except:
message = f" ! Failed to get Cave linked to Entrance:{ent.name} from:{ent.filename} best:{ent.best_station()} {ent.caveandentrance_set.all()}" message = f" ! Failed to get Cave linked to Entrance:{ent.name} from:{ent.filename} best:{ent.best_station()} {ent.caveandentrance_set.all()}"
stash_data_issue(parser="entrances", message=message) stash_data_issue(parser="positions", message=message)
print(message) print(message)
continue # skip this entrance continue # skip this entrance
try: try:
areaName = k.getArea().short_name areaName = k.getArea().short_name
except: except:
message = f" ! Failed to get Area on cave '{k}' linked to Entrance:{ent.name} from:{ent.filename} best:{ent.best_station()}" message = f" ! Failed to get Area on cave '{k}' linked to Entrance:{ent.name} from:{ent.filename} best:{ent.best_station()}"
stash_data_issue(parser="entrances", message=message) stash_data_issue(parser="positions", message=message)
print(message) print(message)
store_data_issues()
raise raise
self.p.append((ent.best_station(), f"{areaName}-{str(ent)[5:]}", ent.needs_surface_work(), str(ent))) self.p.append((ent.best_station(), f"{areaName}-{str(ent)[5:]}", ent.needs_surface_work(), str(ent)))
message = f" - {len(self.p)} entrances linked to caves." message = f" - {len(self.p)} entrances linked to caves."
@ -83,6 +84,9 @@ def LoadPositions():
""" """
svx_t = 0 svx_t = 0
d3d_t = 0 d3d_t = 0
DataIssue.objects.filter(parser="positions").delete()
def runcavern3d(): def runcavern3d():
outputdir = Path(str(f"{topdata}.svx")).parent outputdir = Path(str(f"{topdata}.svx")).parent
@ -100,7 +104,7 @@ def LoadPositions():
) # check=False means exception not raised ) # check=False means exception not raised
if sp.returncode != 0: if sp.returncode != 0:
message = f" ! Error: cavern: creating {file3d} in runcavern3()" message = f" ! Error: cavern: creating {file3d} in runcavern3()"
stash_data_issue(parser="entrances", message=message) stash_data_issue(parser="positions", message=message)
print(message) print(message)
# find the errors in the 1623.log file # find the errors in the 1623.log file
@ -108,17 +112,17 @@ def LoadPositions():
["grep", "error:", f"{topdata}.log"], capture_output=True, check=False, text=True ["grep", "error:", f"{topdata}.log"], capture_output=True, check=False, text=True
) # check=False means exception not raised ) # check=False means exception not raised
message = f" ! Error: cavern: {sp.stdout} creating {file3d} " message = f" ! Error: cavern: {sp.stdout} creating {file3d} "
stash_data_issue(parser="entrances", message=message) stash_data_issue(parser="positions", message=message)
print(message) print(message)
except: except:
message = f" ! CalledProcessError 'cavern' in runcavern3() at {topdata}." message = f" ! CalledProcessError 'cavern' in runcavern3() at {topdata}."
stash_data_issue(parser="entrances", message=message) stash_data_issue(parser="positions", message=message)
print(message) print(message)
if file3d.is_file(): if file3d.is_file():
message = f" ! CalledProcessError. File permissions {file3d.stat().st_mode} on {str(file3d)}" message = f" ! CalledProcessError. File permissions {file3d.stat().st_mode} on {str(file3d)}"
stash_data_issue(parser="entrances", message=message) stash_data_issue(parser="positions", message=message)
print(message) print(message)
if file3d.is_file(): # might be an old one though if file3d.is_file(): # might be an old one though
@ -140,11 +144,11 @@ def LoadPositions():
) )
except: except:
message = f" ! CalledProcessError 'survexport' in runcavern3() at {file3d}." message = f" ! CalledProcessError 'survexport' in runcavern3() at {file3d}."
stash_data_issue(parser="entrances", message=message) stash_data_issue(parser="positions", message=message)
print(message) print(message)
else: else:
message = f" ! Failed to find {file3d} so aborting generation of new .pos, using old one if present" message = f" ! Failed to find {file3d} so aborting generation of new .pos, using old one if present"
stash_data_issue(parser="entrances", message=message) stash_data_issue(parser="positions", message=message)
print(message) print(message)
topdata = os.fspath(Path(settings.SURVEX_DATA) / settings.SURVEX_TOPNAME) topdata = os.fspath(Path(settings.SURVEX_DATA) / settings.SURVEX_TOPNAME)
@ -183,10 +187,13 @@ def LoadPositions():
for pt in MapLocations().points(): for pt in MapLocations().points():
svxid, number, point_type, label = pt svxid, number, point_type, label = pt
mappoints[svxid] = True mappoints[svxid] = True
if svxid =="1":
print(f"BOGUS {pt}") # this is now checked for when importing the entrance tags in parsers/caves.py
if not Path(pospath).is_file(): if not Path(pospath).is_file():
message = f" ! Failed to find {pospath} so aborting generation of entrance locations. " message = f" ! Failed to find {pospath} so aborting generation of entrance locations. "
stash_data_issue(parser="entrances", message=message) # DataIssue.objects.create(parser="positions", message=message, url=f"/entrance_data/{pospath}_edit")
stash_data_issue(parser="positions", message=message)
print(message) print(message)
return return
@ -202,7 +209,7 @@ def LoadPositions():
# except: # except:
# message = " ! FAILED to find root SurvexBlock" # message = " ! FAILED to find root SurvexBlock"
# print(message) # print(message)
# stash_data_issue(parser="entrances", message=message) # stash_data_issue(parser="positions", message=message)
# raise # raise
sbdict = {} sbdict = {}
dups = 0 dups = 0
@ -216,35 +223,37 @@ def LoadPositions():
dups += 1 dups += 1
message = f" ! DUPLICATE SurvexBlock identifier in .pos file '{sbid}'\n{sbs[sbid]}\n{lineno} / {line}" message = f" ! DUPLICATE SurvexBlock identifier in .pos file '{sbid}'\n{sbs[sbid]}\n{lineno} / {line}"
print(message) print(message)
stash_data_issue(parser="entrances", message=message) stash_data_issue(parser="positions", message=message)
else: else:
sbdict[sbid] = lineno sbdict[sbid] = lineno
for sid in mappoints: for sid in mappoints:
if sbid.endswith(sid): if sbid.endswith(sid):
blockpath = "." + sbid[: -len(sid)].strip(".") # only the most recent one that is mappoints blockpath = "." + sbid[: -len(sid)].strip(".") # only the most recent one that is mappoints
# print(f"# match {sid} {sbid} {blockpath}")
# But why are we doing this? Why do we want the survexblock id for each of these ? # But why are we doing this? Why do we want the survexblock id for each of these ?
# ..because mostly they don't actually appear in any SVX file. We should match them up # ..because mostly they don't actually appear in any SVX file. We should match them up
# via the cave data, not by this half-arsed syntactic match which almost never works. PMS. # via the cave data, not by this half-arsed syntactic match which almost never works. PMS.
# We are reading the .pos file so we only know the SurvexFile not the SurvexBlock. # We are reading the .pos file so we only know the SurvexFile not the SurvexBlock.
# ghastly.
if False: # if False:
try: # try:
sbqs = SurvexBlock.objects.filter(survexpath=blockpath) # sbqs = SurvexBlock.objects.filter(survexpath=blockpath)
if len(sbqs) == 1: # if len(sbqs) == 1:
sbqs[0] # sbqs[0]
if len(sbqs) > 1: # if len(sbqs) > 1:
message = f" ! MULTIPLE {len(sbqs):3} SurvexBlocks '{blockpath}' from survex files mention Entrance point '{sbid}' (line {lineno})" # message = f" ! MULTIPLE {len(sbqs):3} SurvexBlocks '{blockpath}' from survex files mention Entrance point '{sbid}' (line {lineno})"
print(message) # print(message)
stash_data_issue(parser="entrances", message=message) # stash_data_issue(parser="positions", message=message)
for b in sbqs: # for b in sbqs:
print(f" - {b}") # print(f" - {b}")
sbqs[0] # sbqs[0]
except: # except:
message = f" ! {lineno} FAIL in getting SurvexBlock matching Entrance point {blockpath} {sid}" # message = f" ! {lineno} FAIL in getting SurvexBlock matching Entrance point {blockpath} {sid}"
print(message) # print(message)
stash_data_issue(parser="entrances", message=message) # stash_data_issue(parser="positions", message=message)
try: try:
ss = SurvexStation(name=sbid) ss = SurvexStation(name=sbid)
ss.x = float(x) ss.x = float(x)
@ -255,7 +264,8 @@ def LoadPositions():
except: except:
message = f" ! {lineno} FAIL to create SurvexStation Entrance point {blockpath} {sid}" message = f" ! {lineno} FAIL to create SurvexStation Entrance point {blockpath} {sid}"
print(message) print(message)
stash_data_issue(parser="entrances", message=message) stash_data_issue(parser="positions", message=message)
store_data_issues()
raise raise
print(f" - {found} SurvexStation entrances found.") print(f" - {found} SurvexStation entrances found.")
print(f" - {dups} Duplicated SurvexStation entrances found") print(f" - {dups} Duplicated SurvexStation entrances found")

View File

@ -2558,7 +2558,6 @@ def LoadSurvexBlocks():
DataIssue.objects.filter(parser="survexleg").delete() DataIssue.objects.filter(parser="survexleg").delete()
DataIssue.objects.filter(parser="survexunits").delete() DataIssue.objects.filter(parser="survexunits").delete()
DataIssue.objects.filter(parser="survex team").delete() DataIssue.objects.filter(parser="survex team").delete()
DataIssue.objects.filter(parser="entrances").delete()
DataIssue.objects.filter(parser="xEntrances").delete() DataIssue.objects.filter(parser="xEntrances").delete()
print(" - survex Data Issues flushed") print(" - survex Data Issues flushed")
mem1 = get_process_memory() mem1 = get_process_memory()