From 5a191ee554488ff8cf3668f011440edecf671e49 Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Fri, 8 Sep 2023 19:51:04 +0300 Subject: [PATCH] Big refactor of cave lookuo & creation. Works. --- _deploy/wsl/requirements-p11d4.txt | 17 +++++++++------ core/models/caves.py | 1 + parsers/caves.py | 4 +++- parsers/survex.py | 33 ++++++++++++++++-------------- 4 files changed, 33 insertions(+), 22 deletions(-) diff --git a/_deploy/wsl/requirements-p11d4.txt b/_deploy/wsl/requirements-p11d4.txt index 4f24dad..009a3c9 100644 --- a/_deploy/wsl/requirements-p11d4.txt +++ b/_deploy/wsl/requirements-p11d4.txt @@ -1,16 +1,21 @@ -asgiref==3.6.0 +asgiref==3.6 +beautifulsoup4=4.12 black==23.1.0 +chardet=5.1 click==8.1.3 coverage==7.1.0 +deptry=0.12 Django==4.2 docutils==0.19 -isort==5.12.0 +isort==5.12 mypy-extensions==1.0.0 packaging==23.0 -pathspec==0.11.0 +pathspec==0.11 Pillow==9.4.0 -platformdirs==3.0.0 +platformdirs==3.0 pytz==2022.7 ruff==0.0.245 -sqlparse==0.4.3 -Unidecode==1.3.6 +setuptools=67.7 +soupsieve=2.5 +sqlparse==0.4 +Unidecode==1.3 diff --git a/core/models/caves.py b/core/models/caves.py index 28b9d4c..a79a275 100644 --- a/core/models/caves.py +++ b/core/models/caves.py @@ -546,6 +546,7 @@ def GetCaveLookup(): # These might alse create more duplicate entries # Yes, this should be set in, and imported from, settings.py + # yes we should move to always using the 1623- prefix too. aliases = [ ("1987-02", "267"), ("1990-01", "171"), diff --git a/parsers/caves.py b/parsers/caves.py index c16a577..982fb40 100644 --- a/parsers/caves.py +++ b/parsers/caves.py @@ -133,6 +133,7 @@ def get_area(areanum): def create_new_cave(svxpath): """This is called only when a new survex file is edited online which has a path on the :loser: repo which is not recognised as a known cave. + ALSO called by survex parser when it finds a cave it doesn't recognise """ # e.g. svxpath = "caves-1623/666/antig" print(f"Create new cave at {svxpath}") @@ -171,7 +172,8 @@ def create_new_cave(svxpath): print(message) raise - # we know what the survex file is, we don't need to use the guess + # we know what the survex file is, we don't need to use the guess. + # But this sets the survex file on he cave from the first one we find, not necessarily the best survex file for this cave cave.survex_file=survex_file cave.save() return cave diff --git a/parsers/survex.py b/parsers/survex.py index 681dc48..ef69759 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -1165,33 +1165,34 @@ class LoadingSurvex: def IdentifyCave(self, cavepath, svxid, depth): """Given a file path for a survex file, e.g. /1626/107/107.svx, or a survex-block path, - return the cave object - - REWRITE ALL THIS and make a methoid on the class + return the cave object """ - caveslist = GetCaveLookup() - if cavepath.lower() in caveslist: # will only work after we load in full paths as indexes, see below - return caveslist[cavepath.lower()] + path = cavepath.lower() + if path in self.caveslist: # primed with GCaveLookup + return self.caveslist[path] # rx_cave = re.compile(r"(?i)caves-(\d\d\d\d)/([-\d\w]+|\d\d\d\d-?\w+-\d+)") - path_match = self.rx_cave.search(cavepath) # use as Class method. + path_match = self.rx_cave.search(cavepath) if path_match: sluggy = f"{path_match.group(1)}-{path_match.group(2)}" - # guesses = [sluggy.lower(), path_match.group(2).lower()] # this looks for JUST "107" and ignores 1626.. guesses = [sluggy.lower()] # full 1626-107 search, don;t use short-forms for g in guesses: - if g in caveslist: - caveslist[cavepath] = caveslist[g] # set "caves-1626/107/107.svx" as index to cave 1626-107 - return caveslist[g] - print(f" ! Failed to find cave for {cavepath.lower()}", file=sys.stderr) + if g in self.caveslist: + self.caveslist[cavepath] = self.caveslist[g] # set "caves-1626/107/107" as index to cave 1626-107 + return self.caveslist[g] + cave = create_new_cave(cavepath) # uses the pending stuff to create pending cave descriptions + self.caveslist[cavepath] = cave + message = f" ! MAKING cave for {cavepath=} {svxid=}" + stash_data_issue(parser="survex", message=message, url=None, sb=(svxid)) + if not cavepath.startswith("caves-1624") or cavepath.startswith("caves-1626"): + print(message, file=sys.stderr) else: - # not a cave, but that is fine. - if self.is_it_already_pending(cavepath, svxid, depth): + # isn't all this pointless...?? + if self.is_it_already_pending(cavepath, svxid, depth): # but pending will already have been created as Cave objects pass else: # It is too late to add it to the pending caves list here, they were already # processed in parsers/caves.py So we have to do a bespoke creation. cave = create_new_cave(svxid) - message = f" ! Warning: cave identifier '{caveid}'or {id} (guessed from file path) is not a known cave. Need to add to expoweb/cave_data/pendingcaves.txt ? In '{includelabel}.svx' at depth:[{len(depth)}]." print("\n" + message) print("\n" + message, file=sys.stderr) @@ -1238,6 +1239,8 @@ class LoadingSurvex: if id in self.pending: print(f"! ALREADY PENDING id {id}", file=sys.stderr) return True + + return False def LoadSurvexFile(self, svxid): """Creates SurvexFile in the database, and SurvexDirectory if needed