detecting orphan cave ids and adding to pending list

2023-08-02 18:23:04 +03:00
parent c76c09fced
commit a0f85454f8
3 changed files with 110 additions and 37 deletions
--- a/core/views/scans.py
+++ b/core/views/scans.py
@@ -12,6 +12,7 @@ from troggle.core.models.wallets import Wallet
 from troggle.core.models.troggle import DataIssue, Expedition, Person
 from troggle.core.views.expo import getmimetype
 from troggle.parsers.survex import set_walletdate
 from troggle.parsers.caves import add_cave_to_pending_list
 # from troggle.parsers.people import GetPersonExpeditionNameLookup
 # import parsers.surveys
@@ -85,8 +86,21 @@ def fillblankpeople(w):
                # print(f' - {wp=} {nobody=}')
                populatewallet(w)
 def is_cave(id):
    Gcavelookup = GetCaveLookup()
    id = id.strip("' []'")
    if id in Gcavelookup:
        return True
    else:
        print(f" - Failed to find cave object from id <{id}>")
        if id.lower() != "unknown" and id != "":
            print(f" - adding <{id}> to pendingcaves.txt list")
            add_cave_to_pending_list(id)
        return False
 def fillblankothers(w):
    """This is on the way to having a many:many relationship between Caves and Wallets
    """
    if not w.walletdate:
        set_walletdate(w)
@@ -98,14 +112,21 @@ def fillblankothers(w):
    else:
        if type(wcaveid) == list:
            for i in wcaveid:
-                if i in Gcavelookup:
+                i = i.strip("' []'")
-                    w.caveobj = Gcavelookup[i]  # just sets it to the last one found. nasty. bug waiting to happen
+                if is_cave(i):
-                    # print(f' - Found cave object from id {wcaveid}')
+                    w.caveobj = Gcavelookup[i] # just sets it to the last one found. nasty. bug waiting to happen
        elif wcaveid.find(',') != -1:
            # it's a list of cave ids as a string
            ids = wcaveid.split(',')
            for i in ids:
                i = i.strip("' []'")
                if is_cave(i):
                    w.caveobj = Gcavelookup[i]  # just sets it to the last one found. nasty. bug waiting to happen     
        else:
-            if wcaveid in Gcavelookup:
+            if is_cave(wcaveid):
-                w.caveobj = Gcavelookup[wcaveid]
+                w.caveobj = Gcavelookup[wcaveid.strip("' []'")]
-            else:
+   
-                print(f" - Failed to find cave object from id {wcaveid}")
+                
 def fixsurvextick(w, ticks):
@@ -216,7 +237,7 @@ def walletslistyear(request, year):
 def cavewallets(request, caveid):
    """Returns all the wallets for just one cave"""
-    print("-cavewalletsl")
+    print("-cavewallets")
    Gcavelookup = GetCaveLookup()
    if caveid in Gcavelookup:
@@ -233,8 +254,23 @@ def cavewallets(request, caveid):
    for z in zilchwallets:
        zcaveid = z.cave()
        if zcaveid:
-            cleanid = str(zcaveid).strip("'[]'")
+            cleanid = str(zcaveid).strip("' []'")
-            if cleanid in Gcavelookup:
+ 
            if cleanid.find(',') != -1:
                # it's a list of cave ids
                wurl = f"/walletedit/{z.walletname.replace('#',':')}"
                message = f" ! In {z.walletname} we do not handle lists of cave ids yet '{cleanid}'"
                print(message)
                DataIssue.objects.update_or_create(parser="scans", message=message, url=wurl)
                # it's a list of cave ids as a string. Identify any orphan caves hidden here
                ids = cleanid.split(',')
                for i in ids:
                    i = i.strip("' []'")
                    if is_cave(i):
                        fcave = Gcavelookup[i.strip("' []'")]  # just sets it to the last one found. nasty. bug waiting to happen 
            elif cleanid in Gcavelookup:
                fcave = Gcavelookup[cleanid]
                if str(fcave.slug()) == caveid:
                    # print(f' - Found one ! {z.walletname=} {zcaveid=}')
@@ -245,9 +281,10 @@ def cavewallets(request, caveid):
                pass
            else:
                wurl = f"/walletedit/{z.walletname.replace('#',':')}"
-                message = f" ! In {z.walletname} there is an unrecognised cave name '{cleanid}' (out of {len(Gcavelookup):,} cave names and aliases)"
+                message = f" ! In {z.walletname} there is an unrecognised cave name '{cleanid}', adding to pending list."
                print(message)
                DataIssue.objects.update_or_create(parser="scans", message=message, url=wurl)
                add_cave_to_pending_list(cleanid) 
    manywallets = list(set(wallets))
    for w in manywallets:
@@ -277,7 +314,8 @@ def oldwallet(request, path):
 def scansingle(request, path, file):
-    """sends a single binary file to the user for display - browser decides how using mimetype"""
+    """sends a single binary file to the user for display - browser decides how using mimetype
    This is very unsafe"""
    try:
        wallet = Wallet.objects.get(walletname=urlunquote(path))
        singlescan = SingleScan.objects.get(wallet=wallet, name=file)
--- a/parsers/caves.py
+++ b/parsers/caves.py
@@ -229,7 +229,8 @@ def do_pending_cave(k, caveid, url, area):
            print(message)
            return
-        default_note = "_Survex file found in loser repo but no description in expoweb <br><br><br>\n"
+        default_note = "A reference has been found to this cave id in a survex file in the loser repo, or in a wallet metadata"
        default_note += " in a JSON file in the drawings repo, but no Cave Description exists in expoweb (in /cave_data/)<br><br><br>\n"
        default_note += "INSTRUCTIONS: First open 'This survex file' (link above the CaveView panel) to find the date and info. Then "
        default_note += '<br><br>\n\n - (0) look in the <a href="/noinfo/cave-number-index">cave number index</a> for notes on this cave,  '
        default_note += "<br><br>\n\n - (1) search in the survex file for the *ref to find a "
@@ -495,23 +496,47 @@ def read_cave(filename, cave=None):
        What is Class CaveAndEntrance for?
        """
        for e in entrances:
            eslug = getXML(e, "entranceslug", maxItems=1, context=context)[0]
            # if eslug.endswith(('a','b','c','d','e','f')):
                # print(f"! Entrance {eslug}")
            if eslug.endswith('a b'):
                message = f' - Entrance has weird name slug:"{eslug}" cave:"{cave}" caveslug:"{slug}" filename:"cave_data/{filename}"'
                DataIssue.objects.create(parser="xEntrances", message=message, url=f"{cave.area}/{cave.area}-{cave.url}_cave_edit/")
                print(message)        
            letter = getXML(e, "letter", maxItems=1, context=context)[0]
            if len(entrances) == 1 and not eslug:  # may be empty: <entranceslug></entranceslug>
-                set_dummy_entrance(slug[5:], slug, c, msg="DUMMY: no entrance slug read from file")
+                msg="DUMMY: no entrance slug read from file, so assume textually same as cave slug"
                set_dummy_entrance(slug[5:], slug, c, msg=msg)
                print(f"! {msg}\n- {slug} {c}")
            else:
-                try:
+                if eslug in entrances_xslug:
-                    if eslug in entrances_xslug:
+                    # print(f"eslug {eslug} found eslug in xslug cache ")
-                        entrance = entrances_xslug[eslug]
+                    entrance = entrances_xslug[eslug]
-                    else:
+                else:
                    # print(f"eslug {eslug} looking up entrance ")
                    try:
                        entrance = Entrance.objects.get(slug=eslug)
                        entrances_xslug[eslug] = entrance
                    except:
                        message = f"! eslug {eslug} Abort entrance loading. Failed to find entrance in db"         
                        DataIssue.objects.create(parser="xEntrances", message=message, url=f"{cave.area}/{cave.area}-{cave.url}_cave_edit/")
                        print(message)   
                        return                        
                if eslug != f"{entrance}":
                    message = f"eslug {eslug} using different entrance {entrance} to set CaveAndEntrance"         
                    DataIssue.objects.create(parser="xEntrances", message=message, url=f"{cave.area}/{cave.area}-{cave.url}_cave_edit/")
                    print(message)        
                try:    
                    CaveAndEntrance.objects.update_or_create(
                        cave=cave, entrance_letter=letter, entrance=entrance
                    )
                except:
-                    message = f' ! Entrance setting failure, slug:"{slug}" #entrances:{len(entrances)} {e} letter:"{letter}" cave:"{cave}" filename:"cave_data/{filename}"'
+                    print(f"! Entrance setting failure {slug}")
-                    DataIssue.objects.create(parser="entrances", message=message, url=f"{cave.url}_edit/")
+                    message = f' ! Entrance setting failure, slug:"{slug}" #entrances:{len(entrances)} letter:"{letter}" cave:"{cave}" filename:"cave_data/{filename}"\n{e}'
                    DataIssue.objects.create(parser="entrances", message=message, url=f"{cave.url}_cave_edit/")
                    print(message)        
    def reload_entrances():
        """For individual re-reading of a cave_data file when editing,
@@ -690,19 +715,26 @@ def read_cave(filename, cave=None):
    cave.save()
    return cave
 def add_cave_to_pending_list(id):
    fpending = Path(CAVEDESCRIPTIONS, "pendingcaves.txt")
    if fpending.is_file():
        with open(fpending, "a") as pend:
            pend.write(f"{id}\n")
 def readcaves():
    """Called from databaseReset mass importer.
    Reads the xml-format HTML 'cave' files in the EXPOWEB repo, the survex files from the loser repo.
    """
    # Pending is for those caves which do not have cave_data/1623-xxx.html XML files even though 
    # they exist and have surveys. 
-    pending = set()
+    with transaction.atomic():
-    fpending = Path(CAVEDESCRIPTIONS, "pendingcaves.txt")
+        pending = set()
-    if fpending.is_file():
+        fpending = Path(CAVEDESCRIPTIONS, "pendingcaves.txt")
-        with open(fpending, "r") as fo:
+        if fpending.is_file():
-            cids = fo.readlines()
+            with open(fpending, "r") as fo:
-        for cid in cids:
+                cids = fo.readlines()
-            pending.add(cid.strip().rstrip("\n").upper())
+            for cid in cids:
                pending.add(cid.strip().rstrip("\n").upper())
    with transaction.atomic():
        print(" - Deleting Caves and Entrances")
@@ -719,11 +751,13 @@ def readcaves():
            Entrance.objects.all().delete()
        except:
            pass
-        # Clear the cave data issues and the caves as we are reloading
+            
-        DataIssue.objects.filter(parser="areas").delete()
+    # Clear the cave data issues and the caves as we are reloading
-        DataIssue.objects.filter(parser="caves").delete()
+    DataIssue.objects.filter(parser="areas").delete()
-        DataIssue.objects.filter(parser="caves ok").delete()
+    DataIssue.objects.filter(parser="caves").delete()
-        #DataIssue.objects.filter(parser="entrances").delete()
+    DataIssue.objects.filter(parser="caves ok").delete()
    #DataIssue.objects.filter(parser="entrances").delete()
    #DataIssue.objects.filter(parser="xEntrances").delete()
    with transaction.atomic():
        area = get_area("1623")
@@ -731,11 +765,12 @@ def readcaves():
        print(" - Reading Entrances from entrance descriptions xml files")
        for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]:  # Should be a better way of getting a list of files
            read_entrance(filename)
        # WHy is this needed ? Without it, we lose these DataIssues!
        ent_issues = DataIssue.objects.filter(parser="entrances")
        print(f"__ We now have  {len(ent_issues)} entrance DataIssues")
    # Why is this needed ? Without it, we lose these DataIssues!
    ent_issues = DataIssue.objects.filter(parser="entrances")
    print(f" _ We now have  {len(ent_issues)} entrance DataIssues")
    with transaction.atomic():
        print(" - Reading Caves from cave descriptions xml files")
        for filename in next(os.walk(CAVEDESCRIPTIONS))[2]:  # Should be a better way of getting a list of files
            if filename.endswith(".html"):
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -2564,7 +2564,7 @@ def LoadSurvexBlocks():
    DataIssue.objects.filter(parser="survexleg").delete()
    DataIssue.objects.filter(parser="survexunits").delete()
    DataIssue.objects.filter(parser="survex team").delete()
-    DataIssue.objects.filter(parser="xEntrances").delete()
+    # DataIssue.objects.filter(parser="xEntrances").delete()
    print("  - survex Data Issues flushed")
    mem1 = get_process_memory()
    print(f"  - MEM:{mem1:7.2f} MB now ", file=sys.stderr)