Cleaning up entrance importing robustly

This commit is contained in:
Philip Sargent 2023-07-26 00:14:46 +03:00
parent 72a6b091e6
commit fab7adf079
3 changed files with 76 additions and 33 deletions

View File

@ -91,9 +91,12 @@ def set_dummy_entrance(id, slug, cave, msg="DUMMY"):
# I am not sure if we want entrances automagically created.
# Therefore I have commented it out. MJG
# entrance = dummy_entrance(id, slug, msg="DUMMY")
# entrances_xslug[slug] = entrance
# CaveAndEntrance.objects.update_or_create(cave=cave, entrance_letter="", entrance=entrance)
# Automagic restored, as it creates a load of error message in the DataIssue log. 2023-07-25
# pending doing this properly, ie. properly creating an XML file for each of these pending caves.
entrance = dummy_entrance(id, slug, msg="DUMMY")
entrances_xslug[slug] = entrance
CaveAndEntrance.objects.update_or_create(cave=cave, entrance_letter="", entrance=entrance)
pass
except:
message = f' ! Entrance Dummy setting failure, slug:"{slug}" cave id :"{id}" '
@ -344,6 +347,22 @@ def boolify(boolstrs):
"true": True,
"false": False}[boolstrs[0]]
def validate_station(station):
"""It is possible to break troggle entirely by getting this wrong.
These station identifiers are matched against other statsions using .endswith()
in parsers/locations.py
so a simple number here will match hundreds of SUrvexStation objects
It should be, e.g. "1623.p240"
"""
if station == "":
return True
dot = station.find(".")
if dot == -1:
# no full stop found. Bad station identifier.
raise
else:
return True
def read_entrance(filename, ent=None):
"""Reads an entrance description from the .html file.
@ -440,6 +459,17 @@ def read_entrance(filename, ent=None):
ent.underground_description=underground_description[0]
ent.url=url[0]
for st in [ent.exact_station, ent.other_station, ent.tag_station]:
try:
validate_station(st)
except:
message = f" ! BAD ENTRANCE TAG '{st}' in '{filename}'. Must format like '1623.p204'. Edit file manually, click."
#http://localhost:8000/1623/2023-EBH-01/1623-2023-EBH-01:1623-2023-EBH-01_entrance_edit
DataIssue.objects.create(parser="entrances", message=message, url=f"/1623/{slug}/{slug}:{slug}_entrance_edit")
print(message)
# ent_issues = DataIssue.objects.filter(parser="entrances")
# print(f".. We now have {len(ent_issues)} entrance DataIssues")
return None
ent.save()
return ent
@ -705,6 +735,10 @@ def readcaves():
for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files
read_entrance(filename)
# WHy is this needed ? Without it, we lose these DataIssues!
ent_issues = DataIssue.objects.filter(parser="entrances")
print(f"__ We now have {len(ent_issues)} entrance DataIssues")
print(" - Reading Caves from cave descriptions xml files")
for filename in next(os.walk(CAVEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files
if filename.endswith(".html"):

View File

@ -53,15 +53,16 @@ class MapLocations(object):
k = ent.caveandentrance_set.all()[0].cave
except:
message = f" ! Failed to get Cave linked to Entrance:{ent.name} from:{ent.filename} best:{ent.best_station()} {ent.caveandentrance_set.all()}"
stash_data_issue(parser="entrances", message=message)
stash_data_issue(parser="positions", message=message)
print(message)
continue # skip this entrance
try:
areaName = k.getArea().short_name
except:
message = f" ! Failed to get Area on cave '{k}' linked to Entrance:{ent.name} from:{ent.filename} best:{ent.best_station()}"
stash_data_issue(parser="entrances", message=message)
stash_data_issue(parser="positions", message=message)
print(message)
store_data_issues()
raise
self.p.append((ent.best_station(), f"{areaName}-{str(ent)[5:]}", ent.needs_surface_work(), str(ent)))
message = f" - {len(self.p)} entrances linked to caves."
@ -84,6 +85,9 @@ def LoadPositions():
svx_t = 0
d3d_t = 0
DataIssue.objects.filter(parser="positions").delete()
def runcavern3d():
outputdir = Path(str(f"{topdata}.svx")).parent
@ -100,7 +104,7 @@ def LoadPositions():
) # check=False means exception not raised
if sp.returncode != 0:
message = f" ! Error: cavern: creating {file3d} in runcavern3()"
stash_data_issue(parser="entrances", message=message)
stash_data_issue(parser="positions", message=message)
print(message)
# find the errors in the 1623.log file
@ -108,17 +112,17 @@ def LoadPositions():
["grep", "error:", f"{topdata}.log"], capture_output=True, check=False, text=True
) # check=False means exception not raised
message = f" ! Error: cavern: {sp.stdout} creating {file3d} "
stash_data_issue(parser="entrances", message=message)
stash_data_issue(parser="positions", message=message)
print(message)
except:
message = f" ! CalledProcessError 'cavern' in runcavern3() at {topdata}."
stash_data_issue(parser="entrances", message=message)
stash_data_issue(parser="positions", message=message)
print(message)
if file3d.is_file():
message = f" ! CalledProcessError. File permissions {file3d.stat().st_mode} on {str(file3d)}"
stash_data_issue(parser="entrances", message=message)
stash_data_issue(parser="positions", message=message)
print(message)
if file3d.is_file(): # might be an old one though
@ -140,11 +144,11 @@ def LoadPositions():
)
except:
message = f" ! CalledProcessError 'survexport' in runcavern3() at {file3d}."
stash_data_issue(parser="entrances", message=message)
stash_data_issue(parser="positions", message=message)
print(message)
else:
message = f" ! Failed to find {file3d} so aborting generation of new .pos, using old one if present"
stash_data_issue(parser="entrances", message=message)
stash_data_issue(parser="positions", message=message)
print(message)
topdata = os.fspath(Path(settings.SURVEX_DATA) / settings.SURVEX_TOPNAME)
@ -183,10 +187,13 @@ def LoadPositions():
for pt in MapLocations().points():
svxid, number, point_type, label = pt
mappoints[svxid] = True
if svxid =="1":
print(f"BOGUS {pt}") # this is now checked for when importing the entrance tags in parsers/caves.py
if not Path(pospath).is_file():
message = f" ! Failed to find {pospath} so aborting generation of entrance locations. "
stash_data_issue(parser="entrances", message=message)
# DataIssue.objects.create(parser="positions", message=message, url=f"/entrance_data/{pospath}_edit")
stash_data_issue(parser="positions", message=message)
print(message)
return
@ -202,7 +209,7 @@ def LoadPositions():
# except:
# message = " ! FAILED to find root SurvexBlock"
# print(message)
# stash_data_issue(parser="entrances", message=message)
# stash_data_issue(parser="positions", message=message)
# raise
sbdict = {}
dups = 0
@ -216,35 +223,37 @@ def LoadPositions():
dups += 1
message = f" ! DUPLICATE SurvexBlock identifier in .pos file '{sbid}'\n{sbs[sbid]}\n{lineno} / {line}"
print(message)
stash_data_issue(parser="entrances", message=message)
stash_data_issue(parser="positions", message=message)
else:
sbdict[sbid] = lineno
for sid in mappoints:
if sbid.endswith(sid):
blockpath = "." + sbid[: -len(sid)].strip(".") # only the most recent one that is mappoints
# print(f"# match {sid} {sbid} {blockpath}")
# But why are we doing this? Why do we want the survexblock id for each of these ?
# ..because mostly they don't actually appear in any SVX file. We should match them up
# via the cave data, not by this half-arsed syntactic match which almost never works. PMS.
# We are reading the .pos file so we only know the SurvexFile not the SurvexBlock.
# ghastly.
if False:
try:
sbqs = SurvexBlock.objects.filter(survexpath=blockpath)
if len(sbqs) == 1:
sbqs[0]
if len(sbqs) > 1:
message = f" ! MULTIPLE {len(sbqs):3} SurvexBlocks '{blockpath}' from survex files mention Entrance point '{sbid}' (line {lineno})"
print(message)
stash_data_issue(parser="entrances", message=message)
for b in sbqs:
print(f" - {b}")
sbqs[0]
except:
message = f" ! {lineno} FAIL in getting SurvexBlock matching Entrance point {blockpath} {sid}"
print(message)
stash_data_issue(parser="entrances", message=message)
# if False:
# try:
# sbqs = SurvexBlock.objects.filter(survexpath=blockpath)
# if len(sbqs) == 1:
# sbqs[0]
# if len(sbqs) > 1:
# message = f" ! MULTIPLE {len(sbqs):3} SurvexBlocks '{blockpath}' from survex files mention Entrance point '{sbid}' (line {lineno})"
# print(message)
# stash_data_issue(parser="positions", message=message)
# for b in sbqs:
# print(f" - {b}")
# sbqs[0]
# except:
# message = f" ! {lineno} FAIL in getting SurvexBlock matching Entrance point {blockpath} {sid}"
# print(message)
# stash_data_issue(parser="positions", message=message)
try:
ss = SurvexStation(name=sbid)
ss.x = float(x)
@ -255,7 +264,8 @@ def LoadPositions():
except:
message = f" ! {lineno} FAIL to create SurvexStation Entrance point {blockpath} {sid}"
print(message)
stash_data_issue(parser="entrances", message=message)
stash_data_issue(parser="positions", message=message)
store_data_issues()
raise
print(f" - {found} SurvexStation entrances found.")
print(f" - {dups} Duplicated SurvexStation entrances found")

View File

@ -2558,7 +2558,6 @@ def LoadSurvexBlocks():
DataIssue.objects.filter(parser="survexleg").delete()
DataIssue.objects.filter(parser="survexunits").delete()
DataIssue.objects.filter(parser="survex team").delete()
DataIssue.objects.filter(parser="entrances").delete()
DataIssue.objects.filter(parser="xEntrances").delete()
print(" - survex Data Issues flushed")
mem1 = get_process_memory()