From fab7adf07989c03289336c6f2117696129f82f19 Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Wed, 26 Jul 2023 00:14:46 +0300 Subject: [PATCH] Cleaning up entrance importing robustly --- parsers/caves.py | 40 ++++++++++++++++++++++++-- parsers/locations.py | 68 +++++++++++++++++++++++++------------------- parsers/survex.py | 1 - 3 files changed, 76 insertions(+), 33 deletions(-) diff --git a/parsers/caves.py b/parsers/caves.py index f8b39e9..93d5cff 100644 --- a/parsers/caves.py +++ b/parsers/caves.py @@ -91,9 +91,12 @@ def set_dummy_entrance(id, slug, cave, msg="DUMMY"): # I am not sure if we want entrances automagically created. # Therefore I have commented it out. MJG - # entrance = dummy_entrance(id, slug, msg="DUMMY") - # entrances_xslug[slug] = entrance - # CaveAndEntrance.objects.update_or_create(cave=cave, entrance_letter="", entrance=entrance) + # Automagic restored, as it creates a load of error message in the DataIssue log. 2023-07-25 + # pending doing this properly, ie. properly creating an XML file for each of these pending caves. + + entrance = dummy_entrance(id, slug, msg="DUMMY") + entrances_xslug[slug] = entrance + CaveAndEntrance.objects.update_or_create(cave=cave, entrance_letter="", entrance=entrance) pass except: message = f' ! Entrance Dummy setting failure, slug:"{slug}" cave id :"{id}" ' @@ -343,6 +346,22 @@ def boolify(boolstrs): "False": False, "true": True, "false": False}[boolstrs[0]] + +def validate_station(station): + """It is possible to break troggle entirely by getting this wrong. + These station identifiers are matched against other statsions using .endswith() + in parsers/locations.py + so a simple number here will match hundreds of SUrvexStation objects + It should be, e.g. "1623.p240" + """ + if station == "": + return True + dot = station.find(".") + if dot == -1: + # no full stop found. Bad station identifier. + raise + else: + return True def read_entrance(filename, ent=None): """Reads an entrance description from the .html file. @@ -440,6 +459,17 @@ def read_entrance(filename, ent=None): ent.underground_description=underground_description[0] ent.url=url[0] + for st in [ent.exact_station, ent.other_station, ent.tag_station]: + try: + validate_station(st) + except: + message = f" ! BAD ENTRANCE TAG '{st}' in '{filename}'. Must format like '1623.p204'. Edit file manually, click." + #http://localhost:8000/1623/2023-EBH-01/1623-2023-EBH-01:1623-2023-EBH-01_entrance_edit + DataIssue.objects.create(parser="entrances", message=message, url=f"/1623/{slug}/{slug}:{slug}_entrance_edit") + print(message) + # ent_issues = DataIssue.objects.filter(parser="entrances") + # print(f".. We now have {len(ent_issues)} entrance DataIssues") + return None ent.save() return ent @@ -704,6 +734,10 @@ def readcaves(): print(" - Reading Entrances from entrance descriptions xml files") for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files read_entrance(filename) + + # WHy is this needed ? Without it, we lose these DataIssues! + ent_issues = DataIssue.objects.filter(parser="entrances") + print(f"__ We now have {len(ent_issues)} entrance DataIssues") print(" - Reading Caves from cave descriptions xml files") for filename in next(os.walk(CAVEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files diff --git a/parsers/locations.py b/parsers/locations.py index b3a318c..8299a28 100644 --- a/parsers/locations.py +++ b/parsers/locations.py @@ -53,15 +53,16 @@ class MapLocations(object): k = ent.caveandentrance_set.all()[0].cave except: message = f" ! Failed to get Cave linked to Entrance:{ent.name} from:{ent.filename} best:{ent.best_station()} {ent.caveandentrance_set.all()}" - stash_data_issue(parser="entrances", message=message) + stash_data_issue(parser="positions", message=message) print(message) continue # skip this entrance try: areaName = k.getArea().short_name except: message = f" ! Failed to get Area on cave '{k}' linked to Entrance:{ent.name} from:{ent.filename} best:{ent.best_station()}" - stash_data_issue(parser="entrances", message=message) + stash_data_issue(parser="positions", message=message) print(message) + store_data_issues() raise self.p.append((ent.best_station(), f"{areaName}-{str(ent)[5:]}", ent.needs_surface_work(), str(ent))) message = f" - {len(self.p)} entrances linked to caves." @@ -83,6 +84,9 @@ def LoadPositions(): """ svx_t = 0 d3d_t = 0 + + DataIssue.objects.filter(parser="positions").delete() + def runcavern3d(): outputdir = Path(str(f"{topdata}.svx")).parent @@ -100,7 +104,7 @@ def LoadPositions(): ) # check=False means exception not raised if sp.returncode != 0: message = f" ! Error: cavern: creating {file3d} in runcavern3()" - stash_data_issue(parser="entrances", message=message) + stash_data_issue(parser="positions", message=message) print(message) # find the errors in the 1623.log file @@ -108,17 +112,17 @@ def LoadPositions(): ["grep", "error:", f"{topdata}.log"], capture_output=True, check=False, text=True ) # check=False means exception not raised message = f" ! Error: cavern: {sp.stdout} creating {file3d} " - stash_data_issue(parser="entrances", message=message) + stash_data_issue(parser="positions", message=message) print(message) except: message = f" ! CalledProcessError 'cavern' in runcavern3() at {topdata}." - stash_data_issue(parser="entrances", message=message) + stash_data_issue(parser="positions", message=message) print(message) if file3d.is_file(): message = f" ! CalledProcessError. File permissions {file3d.stat().st_mode} on {str(file3d)}" - stash_data_issue(parser="entrances", message=message) + stash_data_issue(parser="positions", message=message) print(message) if file3d.is_file(): # might be an old one though @@ -140,11 +144,11 @@ def LoadPositions(): ) except: message = f" ! CalledProcessError 'survexport' in runcavern3() at {file3d}." - stash_data_issue(parser="entrances", message=message) + stash_data_issue(parser="positions", message=message) print(message) else: message = f" ! Failed to find {file3d} so aborting generation of new .pos, using old one if present" - stash_data_issue(parser="entrances", message=message) + stash_data_issue(parser="positions", message=message) print(message) topdata = os.fspath(Path(settings.SURVEX_DATA) / settings.SURVEX_TOPNAME) @@ -183,10 +187,13 @@ def LoadPositions(): for pt in MapLocations().points(): svxid, number, point_type, label = pt mappoints[svxid] = True + if svxid =="1": + print(f"BOGUS {pt}") # this is now checked for when importing the entrance tags in parsers/caves.py if not Path(pospath).is_file(): message = f" ! Failed to find {pospath} so aborting generation of entrance locations. " - stash_data_issue(parser="entrances", message=message) + # DataIssue.objects.create(parser="positions", message=message, url=f"/entrance_data/{pospath}_edit") + stash_data_issue(parser="positions", message=message) print(message) return @@ -202,7 +209,7 @@ def LoadPositions(): # except: # message = " ! FAILED to find root SurvexBlock" # print(message) - # stash_data_issue(parser="entrances", message=message) + # stash_data_issue(parser="positions", message=message) # raise sbdict = {} dups = 0 @@ -216,35 +223,37 @@ def LoadPositions(): dups += 1 message = f" ! DUPLICATE SurvexBlock identifier in .pos file '{sbid}'\n{sbs[sbid]}\n{lineno} / {line}" print(message) - stash_data_issue(parser="entrances", message=message) + stash_data_issue(parser="positions", message=message) else: sbdict[sbid] = lineno for sid in mappoints: if sbid.endswith(sid): blockpath = "." + sbid[: -len(sid)].strip(".") # only the most recent one that is mappoints + # print(f"# match {sid} {sbid} {blockpath}") + # But why are we doing this? Why do we want the survexblock id for each of these ? # ..because mostly they don't actually appear in any SVX file. We should match them up # via the cave data, not by this half-arsed syntactic match which almost never works. PMS. # We are reading the .pos file so we only know the SurvexFile not the SurvexBlock. - # ghastly. - if False: - try: - sbqs = SurvexBlock.objects.filter(survexpath=blockpath) - if len(sbqs) == 1: - sbqs[0] - if len(sbqs) > 1: - message = f" ! MULTIPLE {len(sbqs):3} SurvexBlocks '{blockpath}' from survex files mention Entrance point '{sbid}' (line {lineno})" - print(message) - stash_data_issue(parser="entrances", message=message) - for b in sbqs: - print(f" - {b}") - sbqs[0] - except: - message = f" ! {lineno} FAIL in getting SurvexBlock matching Entrance point {blockpath} {sid}" - print(message) - stash_data_issue(parser="entrances", message=message) + + # if False: + # try: + # sbqs = SurvexBlock.objects.filter(survexpath=blockpath) + # if len(sbqs) == 1: + # sbqs[0] + # if len(sbqs) > 1: + # message = f" ! MULTIPLE {len(sbqs):3} SurvexBlocks '{blockpath}' from survex files mention Entrance point '{sbid}' (line {lineno})" + # print(message) + # stash_data_issue(parser="positions", message=message) + # for b in sbqs: + # print(f" - {b}") + # sbqs[0] + # except: + # message = f" ! {lineno} FAIL in getting SurvexBlock matching Entrance point {blockpath} {sid}" + # print(message) + # stash_data_issue(parser="positions", message=message) try: ss = SurvexStation(name=sbid) ss.x = float(x) @@ -255,7 +264,8 @@ def LoadPositions(): except: message = f" ! {lineno} FAIL to create SurvexStation Entrance point {blockpath} {sid}" print(message) - stash_data_issue(parser="entrances", message=message) + stash_data_issue(parser="positions", message=message) + store_data_issues() raise print(f" - {found} SurvexStation entrances found.") print(f" - {dups} Duplicated SurvexStation entrances found") diff --git a/parsers/survex.py b/parsers/survex.py index 2be2217..4ee5f0e 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -2558,7 +2558,6 @@ def LoadSurvexBlocks(): DataIssue.objects.filter(parser="survexleg").delete() DataIssue.objects.filter(parser="survexunits").delete() DataIssue.objects.filter(parser="survex team").delete() - DataIssue.objects.filter(parser="entrances").delete() DataIssue.objects.filter(parser="xEntrances").delete() print(" - survex Data Issues flushed") mem1 = get_process_memory()