From a0f85454f82698cdb882770ee470e39283ca4b34 Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Wed, 2 Aug 2023 18:23:04 +0300 Subject: [PATCH] detecting orphan cave ids and adding to pending list --- core/views/scans.py | 62 ++++++++++++++++++++++++++------- parsers/caves.py | 83 ++++++++++++++++++++++++++++++++------------- parsers/survex.py | 2 +- 3 files changed, 110 insertions(+), 37 deletions(-) diff --git a/core/views/scans.py b/core/views/scans.py index 83d6ac3..f7abc06 100644 --- a/core/views/scans.py +++ b/core/views/scans.py @@ -12,6 +12,7 @@ from troggle.core.models.wallets import Wallet from troggle.core.models.troggle import DataIssue, Expedition, Person from troggle.core.views.expo import getmimetype from troggle.parsers.survex import set_walletdate +from troggle.parsers.caves import add_cave_to_pending_list # from troggle.parsers.people import GetPersonExpeditionNameLookup # import parsers.surveys @@ -85,8 +86,21 @@ def fillblankpeople(w): # print(f' - {wp=} {nobody=}') populatewallet(w) +def is_cave(id): + Gcavelookup = GetCaveLookup() + id = id.strip("' []'") + if id in Gcavelookup: + return True + else: + print(f" - Failed to find cave object from id <{id}>") + if id.lower() != "unknown" and id != "": + print(f" - adding <{id}> to pendingcaves.txt list") + add_cave_to_pending_list(id) + return False def fillblankothers(w): + """This is on the way to having a many:many relationship between Caves and Wallets + """ if not w.walletdate: set_walletdate(w) @@ -98,14 +112,21 @@ def fillblankothers(w): else: if type(wcaveid) == list: for i in wcaveid: - if i in Gcavelookup: - w.caveobj = Gcavelookup[i] # just sets it to the last one found. nasty. bug waiting to happen - # print(f' - Found cave object from id {wcaveid}') + i = i.strip("' []'") + if is_cave(i): + w.caveobj = Gcavelookup[i] # just sets it to the last one found. nasty. bug waiting to happen + elif wcaveid.find(',') != -1: + # it's a list of cave ids as a string + ids = wcaveid.split(',') + for i in ids: + i = i.strip("' []'") + if is_cave(i): + w.caveobj = Gcavelookup[i] # just sets it to the last one found. nasty. bug waiting to happen else: - if wcaveid in Gcavelookup: - w.caveobj = Gcavelookup[wcaveid] - else: - print(f" - Failed to find cave object from id {wcaveid}") + if is_cave(wcaveid): + w.caveobj = Gcavelookup[wcaveid.strip("' []'")] + + def fixsurvextick(w, ticks): @@ -216,7 +237,7 @@ def walletslistyear(request, year): def cavewallets(request, caveid): """Returns all the wallets for just one cave""" - print("-cavewalletsl") + print("-cavewallets") Gcavelookup = GetCaveLookup() if caveid in Gcavelookup: @@ -233,8 +254,23 @@ def cavewallets(request, caveid): for z in zilchwallets: zcaveid = z.cave() if zcaveid: - cleanid = str(zcaveid).strip("'[]'") - if cleanid in Gcavelookup: + cleanid = str(zcaveid).strip("' []'") + + if cleanid.find(',') != -1: + # it's a list of cave ids + wurl = f"/walletedit/{z.walletname.replace('#',':')}" + message = f" ! In {z.walletname} we do not handle lists of cave ids yet '{cleanid}'" + print(message) + DataIssue.objects.update_or_create(parser="scans", message=message, url=wurl) + + # it's a list of cave ids as a string. Identify any orphan caves hidden here + ids = cleanid.split(',') + for i in ids: + i = i.strip("' []'") + if is_cave(i): + fcave = Gcavelookup[i.strip("' []'")] # just sets it to the last one found. nasty. bug waiting to happen + + elif cleanid in Gcavelookup: fcave = Gcavelookup[cleanid] if str(fcave.slug()) == caveid: # print(f' - Found one ! {z.walletname=} {zcaveid=}') @@ -245,9 +281,10 @@ def cavewallets(request, caveid): pass else: wurl = f"/walletedit/{z.walletname.replace('#',':')}" - message = f" ! In {z.walletname} there is an unrecognised cave name '{cleanid}' (out of {len(Gcavelookup):,} cave names and aliases)" + message = f" ! In {z.walletname} there is an unrecognised cave name '{cleanid}', adding to pending list." print(message) DataIssue.objects.update_or_create(parser="scans", message=message, url=wurl) + add_cave_to_pending_list(cleanid) manywallets = list(set(wallets)) for w in manywallets: @@ -277,7 +314,8 @@ def oldwallet(request, path): def scansingle(request, path, file): - """sends a single binary file to the user for display - browser decides how using mimetype""" + """sends a single binary file to the user for display - browser decides how using mimetype + This is very unsafe""" try: wallet = Wallet.objects.get(walletname=urlunquote(path)) singlescan = SingleScan.objects.get(wallet=wallet, name=file) diff --git a/parsers/caves.py b/parsers/caves.py index b4643c8..2bfd648 100644 --- a/parsers/caves.py +++ b/parsers/caves.py @@ -229,7 +229,8 @@ def do_pending_cave(k, caveid, url, area): print(message) return - default_note = "_Survex file found in loser repo but no description in expoweb


\n" + default_note = "A reference has been found to this cave id in a survex file in the loser repo, or in a wallet metadata" + default_note += " in a JSON file in the drawings repo, but no Cave Description exists in expoweb (in /cave_data/)


\n" default_note += "INSTRUCTIONS: First open 'This survex file' (link above the CaveView panel) to find the date and info. Then " default_note += '

\n\n - (0) look in the cave number index for notes on this cave, ' default_note += "

\n\n - (1) search in the survex file for the *ref to find a " @@ -495,23 +496,47 @@ def read_cave(filename, cave=None): What is Class CaveAndEntrance for? """ for e in entrances: + eslug = getXML(e, "entranceslug", maxItems=1, context=context)[0] + # if eslug.endswith(('a','b','c','d','e','f')): + # print(f"! Entrance {eslug}") + if eslug.endswith('a b'): + message = f' - Entrance has weird name slug:"{eslug}" cave:"{cave}" caveslug:"{slug}" filename:"cave_data/{filename}"' + DataIssue.objects.create(parser="xEntrances", message=message, url=f"{cave.area}/{cave.area}-{cave.url}_cave_edit/") + print(message) + letter = getXML(e, "letter", maxItems=1, context=context)[0] if len(entrances) == 1 and not eslug: # may be empty: - set_dummy_entrance(slug[5:], slug, c, msg="DUMMY: no entrance slug read from file") + msg="DUMMY: no entrance slug read from file, so assume textually same as cave slug" + set_dummy_entrance(slug[5:], slug, c, msg=msg) + print(f"! {msg}\n- {slug} {c}") else: - try: - if eslug in entrances_xslug: - entrance = entrances_xslug[eslug] - else: + if eslug in entrances_xslug: + # print(f"eslug {eslug} found eslug in xslug cache ") + entrance = entrances_xslug[eslug] + else: + # print(f"eslug {eslug} looking up entrance ") + try: entrance = Entrance.objects.get(slug=eslug) entrances_xslug[eslug] = entrance + except: + message = f"! eslug {eslug} Abort entrance loading. Failed to find entrance in db" + DataIssue.objects.create(parser="xEntrances", message=message, url=f"{cave.area}/{cave.area}-{cave.url}_cave_edit/") + print(message) + return + + if eslug != f"{entrance}": + message = f"eslug {eslug} using different entrance {entrance} to set CaveAndEntrance" + DataIssue.objects.create(parser="xEntrances", message=message, url=f"{cave.area}/{cave.area}-{cave.url}_cave_edit/") + print(message) + try: CaveAndEntrance.objects.update_or_create( cave=cave, entrance_letter=letter, entrance=entrance ) except: - message = f' ! Entrance setting failure, slug:"{slug}" #entrances:{len(entrances)} {e} letter:"{letter}" cave:"{cave}" filename:"cave_data/{filename}"' - DataIssue.objects.create(parser="entrances", message=message, url=f"{cave.url}_edit/") + print(f"! Entrance setting failure {slug}") + message = f' ! Entrance setting failure, slug:"{slug}" #entrances:{len(entrances)} letter:"{letter}" cave:"{cave}" filename:"cave_data/{filename}"\n{e}' + DataIssue.objects.create(parser="entrances", message=message, url=f"{cave.url}_cave_edit/") print(message) def reload_entrances(): """For individual re-reading of a cave_data file when editing, @@ -690,19 +715,26 @@ def read_cave(filename, cave=None): cave.save() return cave +def add_cave_to_pending_list(id): + fpending = Path(CAVEDESCRIPTIONS, "pendingcaves.txt") + if fpending.is_file(): + with open(fpending, "a") as pend: + pend.write(f"{id}\n") + def readcaves(): """Called from databaseReset mass importer. Reads the xml-format HTML 'cave' files in the EXPOWEB repo, the survex files from the loser repo. """ # Pending is for those caves which do not have cave_data/1623-xxx.html XML files even though # they exist and have surveys. - pending = set() - fpending = Path(CAVEDESCRIPTIONS, "pendingcaves.txt") - if fpending.is_file(): - with open(fpending, "r") as fo: - cids = fo.readlines() - for cid in cids: - pending.add(cid.strip().rstrip("\n").upper()) + with transaction.atomic(): + pending = set() + fpending = Path(CAVEDESCRIPTIONS, "pendingcaves.txt") + if fpending.is_file(): + with open(fpending, "r") as fo: + cids = fo.readlines() + for cid in cids: + pending.add(cid.strip().rstrip("\n").upper()) with transaction.atomic(): print(" - Deleting Caves and Entrances") @@ -719,11 +751,13 @@ def readcaves(): Entrance.objects.all().delete() except: pass - # Clear the cave data issues and the caves as we are reloading - DataIssue.objects.filter(parser="areas").delete() - DataIssue.objects.filter(parser="caves").delete() - DataIssue.objects.filter(parser="caves ok").delete() - #DataIssue.objects.filter(parser="entrances").delete() + + # Clear the cave data issues and the caves as we are reloading + DataIssue.objects.filter(parser="areas").delete() + DataIssue.objects.filter(parser="caves").delete() + DataIssue.objects.filter(parser="caves ok").delete() + #DataIssue.objects.filter(parser="entrances").delete() + #DataIssue.objects.filter(parser="xEntrances").delete() with transaction.atomic(): area = get_area("1623") @@ -731,11 +765,12 @@ def readcaves(): print(" - Reading Entrances from entrance descriptions xml files") for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files read_entrance(filename) - - # WHy is this needed ? Without it, we lose these DataIssues! - ent_issues = DataIssue.objects.filter(parser="entrances") - print(f"__ We now have {len(ent_issues)} entrance DataIssues") + # Why is this needed ? Without it, we lose these DataIssues! + ent_issues = DataIssue.objects.filter(parser="entrances") + print(f" _ We now have {len(ent_issues)} entrance DataIssues") + + with transaction.atomic(): print(" - Reading Caves from cave descriptions xml files") for filename in next(os.walk(CAVEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files if filename.endswith(".html"): diff --git a/parsers/survex.py b/parsers/survex.py index bc1efd9..59278ed 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -2564,7 +2564,7 @@ def LoadSurvexBlocks(): DataIssue.objects.filter(parser="survexleg").delete() DataIssue.objects.filter(parser="survexunits").delete() DataIssue.objects.filter(parser="survex team").delete() - DataIssue.objects.filter(parser="xEntrances").delete() + # DataIssue.objects.filter(parser="xEntrances").delete() print(" - survex Data Issues flushed") mem1 = get_process_memory() print(f" - MEM:{mem1:7.2f} MB now ", file=sys.stderr)