From e98c63f51c7baac3e433f57f5958810379e919eb Mon Sep 17 00:00:00 2001
From: Philip Sargent <philip.sargent@gmail.com>
Date: Thu, 28 Sep 2023 01:01:32 +0300
Subject: [PATCH] Fix odd glitches in cave parsing

---
 core/models/caves.py     |  3 +++
 parsers/caves.py         | 20 +++++++++++++++++---
 parsers/survex.py        | 27 ++++++++++++++++-----------
 templates/caveindex.html |  2 +-
 4 files changed, 37 insertions(+), 15 deletions(-)

diff --git a/core/models/caves.py b/core/models/caves.py
index d13ad70..cd8bb10 100644
--- a/core/models/caves.py
+++ b/core/models/caves.py
@@ -654,6 +654,9 @@ def GetCaveLookup():
         ("hc", "2018-dm-07"),
         ("loveshack", "1626-2018-ad-03"),
         ("crushed-garlic", "1626-2018-ad-03"),
+        ("BuzzardHole", "1626-2023-buzzardhole"),
+        ("2023-BuzzardHole", "1626-2023-buzzardhole"),
+        ("1626-2023-BuzzardHole", "1626-2023-buzzardhole"),
        
     ]
 
diff --git a/parsers/caves.py b/parsers/caves.py
index 19b5549..cc9d22d 100644
--- a/parsers/caves.py
+++ b/parsers/caves.py
@@ -160,7 +160,13 @@ def create_new_cave(svxpath, svxid=None, msg=None):
         DataIssue.objects.create(parser="caves", message=message)
         print(message)
         return caves[0]
-
+        
+    urltest = Cave.objects.filter(url=url)
+    if urltest:
+        message = f" ! Cave {urltest[0]} already exists with this url {url}. Can't create new cave {slug} from {svxpath} "
+        DataIssue.objects.create(parser="caves", message=message, url=url)
+        print(message)
+        return urltest[0]
     try:
         cave = do_pending_cave(k, caveid, url, areacode, msg)
     except:
@@ -191,7 +197,7 @@ def do_ARGE_cave(slug, caveid, areacode, svxid):
 
     urltest = Cave.objects.filter(url=url)
     if urltest:
-        message = f" ! Cave {urltest[0]} already exists with this url {url}. Can't create new ARGE cave {slug}"
+        message = f" ! Cave {urltest[0]} already exists with this url {url}. Can't create new ARGE cave {slug} from {svxid}"
         DataIssue.objects.create(parser="caves", message=message, url=url)
         print(message)
         return urltest[0]
@@ -725,10 +731,18 @@ def read_cave(filename, cave=None):
     
     contextguess  = f"/{slug[0:4]}/{slug}_cave_edit/" # guess as we havent read areacode yet
     
+    urltest = Cave.objects.filter(url=url)
+    if urltest:
+        message = f" ! - URL duplicate {urltest[0]} already exists with proposed url {url}. Should not create new cave {slug} from {filename}"
+        DataIssue.objects.create(parser="caves", message=message, url=contextguess)
+        print(message)
+
     manual_edit = True
     if not cave:
         # we are parsing using databaseReset.py not an online edit
+        # we have already checked for uniqueness so we do not need the 'update' thinggy
         manual_edit = False
+ 
         try:
             cave, state = Cave.objects.update_or_create(filename=filename) # replace with slug when CaveSlug tidied up
         except:
@@ -780,7 +794,7 @@ def read_cave(filename, cave=None):
     cave.url=url[0]
 
     check_slug(cave.areacode,cave.kataster_number, cave.unofficial_number, cave.url)
-           
+               
     entrances = getXML(cavecontents, "entrance", context=context)
     do_entrances()
     # print(f"- {entrances_xslug=}")
diff --git a/parsers/survex.py b/parsers/survex.py
index 0292452..056487d 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -47,8 +47,9 @@ ROOTBLOCK = "rootblock"
 METRESINFEET = 3.28084
 UNSEENS = "_unseens.svx"
 
-IGNOREPREFIX = ["surface", "kataster", "fixedpts", "gpx", "deprecated", "dummy_"]
-EXCEPTPREFIX = ["surface/terrain", "kataster/kataster-boundaries", "gpx/gpx_publish/essentials", "template", "docs", "deprecated", "subsections", "1623-and-1626-no-schoenberg-hs", "1623-and-1624-and-1626-and-1627", "1623-and-1626", "dummy_file"]
+IGNOREFILES  = ["dummy_file"]
+IGNOREPREFIX = ["surface", "kataster", "fixedpts", "gpx", "deprecated"]
+EXCEPTPREFIX = ["surface/terrain", "kataster/kataster-boundaries", "gpx/gpx_publish/essentials", "template", "docs", "deprecated", "subsections", "1623-and-1626-no-schoenberg-hs", "1623-and-1624-and-1626-and-1627", "1623-and-1626"]
 # ignorenoncave = [
     # "caves-1623",
     # "caves-1623/2007-NEU",
@@ -1185,12 +1186,15 @@ class LoadingSurvex:
                 cave = self.caveslist[key]
                 print(f"Cave<{cave}> -- {key}")
                 
+        for f in IGNOREFILES:
+            if svxid.lower().startswith(f):
+                return False
         for i in IGNOREPREFIX:
             if cavepath.lower().startswith(i) or cavepath[11:].lower().startswith(i):
-                message = (f" - {cavepath} starts with <IGNOREPREFIX> (while creating '{svxid}.svx' )")
+                # message = (f" - {cavepath} is an <IGNOREPREFIX> (while looking at '{svxid}.svx' )")
                 # print(message, file=sys.stderr)
                 return False
-                
+              
         if cavepath.lower() in self.caveslist: # primed with GCaveLookup
             return self.caveslist[cavepath.lower()]
 
@@ -1200,30 +1204,31 @@ class LoadingSurvex:
         if path_match:
             area = path_match.group(1)
             caveid = path_match.group(2)
-            sluggy = f"{area}-{caveid}"
-            seek = [sluggy, sluggy.replace("1623-","")] # to catch '2023-kt-02' etc . 3-digit searches only work for 1623 area
+            sluggy = f"{area}-{caveid}".lower() # GCaveLookup is all UPPER() and all lower() but not mixed
+            # if this comes from editing a survex file, we may already have loaded 3-digit aliases for 1623- from old wallets,
+            # so be careful here..
+            seek = {sluggy, sluggy.replace("1623-","")} # {} is a set
             for s in seek:
                 if s in self.caveslist:
                     self.caveslist[cavepath] = self.caveslist[s] # set "caves-1626/107/107" as index to cave 1626-107
                     return self.caveslist[s]
             
             if cavepath[6:10] in ARGEAREAS:
+                #print(f"ARGE {area=} {caveid=} {cavepath} - {cavepath[11:]}", file=sys.stderr)
                 return do_ARGE_cave(sluggy, caveid, area, svxid)
                 
-            cave = create_new_cave(cavepath, svxid, f"Cave mentioned only in a survex file {svxid=}") # uses the pending code to create pending cave descriptions
+            cave = create_new_cave(cavepath, svxid, f"Cave mentioned only in a survex file {svxid=}") # uses the pending code 
             self.caveslist[cavepath.lower()] = cave
-            message = f"\n    ! MAKING cave {sluggy} for {cavepath=} {svxid=}"
-            # stash_data_issue(parser="survex", message=message, url="/survexfile/{svxid}.svx", sb=(svxid))
             return cave
         else:
             path_match = rx_svxcollection.search(svxid) 
             if path_match:
-                message = f"    ! Recognised survex file which is not a cave at {svxid=}"
+                # message = f"    ! Recognised survex file in area {path_match.group(1)} which is not a cave at {svxid=}"
                 # stash_data_issue(parser="survex", message=message, url=None, sb=(svxid))
                 # print(message, file=sys.stderr)
                 return False
             else:
-                message = f" ! ERROR: no cave at '{svxid}.svx  - is not a known cavename format. "
+                message = f" ! ERROR: no cave at '{svxid}.svx'  {cavepath=} "
                 print("\n" + message)
                 print("\n" + message, file=sys.stderr)
                 stash_data_issue(parser="survex", message=message, url="{svxid}.svx", sb=(svxid))
diff --git a/templates/caveindex.html b/templates/caveindex.html
index ef4a432..f0f3bae 100644
--- a/templates/caveindex.html
+++ b/templates/caveindex.html
@@ -23,7 +23,7 @@
 {% endfor %}
 </ul>
 
-
+Red star <span style="color: red">*</span> against a name indicates that no survex file is explicitly associated with the cave.
   
 <h3>1623</h3>
 <div style="column-count: 3;">