From 860ce6b065d664f8084d2f3dbfb6dbb6c2a58716 Mon Sep 17 00:00:00 2001
From: Philip Sargent <philip.sargent@gmail.com>
Date: Fri, 24 Mar 2023 00:54:26 +0000
Subject: [PATCH] Creates new cave from survex file upload

---
 parsers/caves.py  | 102 +++++++++++++++++++++++++++++++++++-----------
 parsers/survex.py |  52 ++++++++++-------------
 2 files changed, 100 insertions(+), 54 deletions(-)

diff --git a/parsers/caves.py b/parsers/caves.py
index 4a14725..f12cd84 100644
--- a/parsers/caves.py
+++ b/parsers/caves.py
@@ -82,8 +82,79 @@ def set_dummy_entrance(id, slug, cave, msg="DUMMY"):
         DataIssue.objects.create(parser="caves", message=message, url=f"{cave.url}")
         print(message)
 
-def create_new_cave():
-    pass
+def make_areas():
+    print(" - Creating Areas 1623, 1624, 1627 and 1626")
+    # This crashes on the server with MariaDB even though a null parent is explicitly allowed.
+    area_1623 = Area.objects.create(short_name="1623", super=None)
+    area_1623.save()
+    area_1624 = Area.objects.create(short_name="1624", super=None)
+    area_1624.save()
+    area_1626 = Area.objects.create(short_name="1626", super=None)
+    area_1626.save()
+    area_1627 = Area.objects.create(short_name="1627", super=None)
+    area_1627.save()
+        
+def get_area(areanum):
+    """Given the number as a string, return the area object
+    """
+    a = Area.objects.all()
+    if len(a) == 0:
+        make_areas()
+        
+    area = Area.objects.get(short_name="1623") # default
+
+    if areanum == "1623":
+        area = Area.objects.get(short_name="1623")
+    if areanum == "1624":
+        area = Area.objects.get(short_name="1624")
+    if areanum == "1626":
+        area = Area.objects.get(short_name="1626")
+    if areanum == "1627":
+        area = Area.objects.get(short_name="1627")
+    return area
+    
+def create_new_cave(svxpath):
+    """This is called only when a new survex file is edited online which has a path on the
+    :loser: repo which is not recognised as a known cave.
+    """
+    # e.g. svxpath = "caves-1623/666/antig"
+    print(f"Create new cave at {svxpath}")
+    #
+    survex_file = svxpath + ".svx"
+    parts = svxpath.split("/")
+    a = parts[0][-4:]
+    caveid = parts[1]
+    print(f"parts {parts}, {a}, {caveid}")
+    # double check
+    if a[0:3] == "162":
+        areanum = a[0:4]
+        url = f"{areanum}/{a[5:]}"  # Note we are not appending the .htm as we are modern folks now.
+    else:
+        areanum = "1623"
+        url = f"1623/{k}"
+        
+    k = f"{areanum}-{caveid}"
+    area = get_area(areanum)
+
+    caves = Cave.objects.filter(unofficial_number=caveid) 
+    if caves:
+        message = f" ! Already exists, caveid:{k} in area {areanum} {caves}"
+        DataIssue.objects.create(parser="caves", message=message)
+        print(message)
+        return caves[0]
+
+    try:
+        cave = do_pending_cave(k, url, area)
+    except:
+        message = f" ! Error. Cannot create pending cave and entrance, pending-id:{k} in area {areanum}"
+        DataIssue.objects.create(parser="caves", message=message)
+        print(message)
+        raise    
+    
+    # we know what the survex file is, we don't need to use the guess
+    cave.survex_file=survex_file
+    cave.save()
+    return cave
 
 def do_pending_cave(k, url, area):
     """
@@ -194,7 +265,7 @@ def do_pending_cave(k, url, area):
             try:  # Now create a cave slug ID
                 CaveSlug.objects.update_or_create(cave=cave, slug=slug, primary=False)
             except:
-                message = f" ! {k:11s}  PENDING cave SLUG create failure"
+                message = f" ! {k:11s}  PENDING CaveSLUG {slug} create failure"
                 DataIssue.objects.create(parser="caves", message=message)
                 print(message)
         else:
@@ -214,7 +285,7 @@ def do_pending_cave(k, url, area):
             message = f" ! {k:11s}  PENDING entrance + cave UNION create failure  '{cave}' [{ent}]"
             DataIssue.objects.create(parser="caves", message=message)
             print(message)
-
+    return cave
 
 def readentrance(filename):
     """Reads an enrance description from the .html file
@@ -562,18 +633,9 @@ def readcaves():
         DataIssue.objects.filter(parser="caves ok").delete()
         DataIssue.objects.filter(parser="entrances").delete()
 
-        print(" - Creating Areas 1623, 1624, 1627 and 1626")
-        # This crashes on the server with MariaDB even though a null parent is explicitly allowed.
-        area_1623 = Area.objects.create(short_name="1623", super=None)
-        area_1623.save()
-        area_1624 = Area.objects.create(short_name="1624", super=None)
-        area_1624.save()
-        area_1626 = Area.objects.create(short_name="1626", super=None)
-        area_1626.save()
-        area_1627 = Area.objects.create(short_name="1627", super=None)
-        area_1627.save()
-
     with transaction.atomic():
+        area = get_area("1623")
+
         print(" - settings.CAVEDESCRIPTIONS: ", CAVEDESCRIPTIONS)
         print(" - Reading Entrances from entrance descriptions xml files")
         for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]:  # Should be a better way of getting a list of files
@@ -605,15 +667,7 @@ def readcaves():
                 areanum = "1623"
                 url = f"1623/{k}"
 
-            area = area_1623
-            if areanum == "1623":
-                area = area_1623
-            if areanum == "1624":
-                area = area_1624
-            if areanum == "1626":
-                area = area_1626
-            if areanum == "1627":
-                area = area_1627
+            area = get_area(areanum)
             try:
                 do_pending_cave(k, url, area)
             except:
diff --git a/parsers/survex.py b/parsers/survex.py
index a1cab9a..70fc987 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -15,6 +15,7 @@ from troggle.core.models.wallets import Wallet
 from troggle.core.models.troggle import DataIssue, Expedition
 from troggle.core.utils import chaosmonkey, get_process_memory
 #from troggle.parsers.logbooks import GetCaveLookup
+from troggle.parsers.caves import create_new_cave
 from troggle.parsers.people import GetPersonExpeditionNameLookup, known_foreigner
 
 """Imports the tree of survex files following from a defined root .svx file
@@ -1168,15 +1169,20 @@ class LoadingSurvex:
             print(f"! ALREADY PENDING {id}", file=sys.stderr)
             return
 
+        # It is too late to add it to the pending caves list here, they were already 
+        # processed in parsers/caves.py So we have to do a bespoke creation.
+        cave = create_new_cave(includelabel)
+        
         message = f" ! Warning: cave identifier '{caveid}'or {id} (guessed from file path)  is not a known cave.  Need to add to expoweb/cave_data/pending.txt ?  In '{includelabel}.svx' at depth:[{len(depth)}]."
         print("\n" + message)
         print("\n" + message, file=sys.stderr)
         print(f"{self.pending}", end="", file=sys.stderr)
         stash_data_issue(parser="survex", message=message, url=None, sb=(includelabel))
-        # print(f' # datastack in  LoadSurvexFile:{includelabel}', file=sys.stderr)
-        # for dict in self.datastack:
-        # print(f'   type: <{dict["type"].upper()}   >', file=sys.stderr)
-
+        
+        # It is too late to add it to pending caves here, they were already processed in parsers/caves.py
+        # and something else is creating them...
+        # cave = create_new_cave(includelabel)
+        
     def LoadSurvexFile(self, svxid):
         """Creates SurvexFile in the database, and SurvexDirectory if needed
         with links to 'cave'
@@ -1219,7 +1225,6 @@ class LoadingSurvex:
         newdirectory.save()
         newfile.survexdirectory = newdirectory
         self.survexdict[newdirectory].append(newfile)
-        cave = IdentifyCave(headpath)  # cave already exists in db
 
         if not newdirectory:
             message = f" ! 'None' SurvexDirectory returned from GetSurvexDirectory({headpath})"
@@ -1227,13 +1232,18 @@ class LoadingSurvex:
             print(message, file=sys.stderr)
             stash_data_issue(parser="survex", message=message, url=f"/survexfile/{svxid}")
 
+        cave = IdentifyCave(headpath)  # cave already exists in db
+        if not cave:
+            # probably a surface survey, or a cave in a new area 
+            # e.g. 1624 not previously managed, and not in the pending list
+            self.ReportNonCaveIncludes(headpath, svxid, depth)
+        #try again
+        cave = IdentifyCave(headpath)  
         if cave:
             newdirectory.cave = cave
             newfile.cave = cave
             # print(f"\n - New directory '{newdirectory}' for cave '{cave}'",file=sys.stderr)
-        else:  # probably a surface survey, or a cave in a new area e.g. 1624 not previously managed, and not in the pending list
-            self.ReportNonCaveIncludes(headpath, svxid, depth)
-
+ 
         if not newfile.survexdirectory:
             message = f" ! SurvexDirectory NOT SET in new SurvexFile {svxid} "
             print(message)
@@ -2443,30 +2453,12 @@ def MakeFileRoot(fn):
     or tries to find the primary survex file for this cave
     """
     cave = IdentifyCave(fn)
+    if not cave:
+        if fn != UNSEENS:
+            cave = create_new_cave(fn)
 
     print(f"  - Making/finding a new root survexfile for this import: {fn}")
-    
-    #this doesn't work. Using the cave primary survex file as the fileroot
-    # 1. does not parse the file correctly and does not attach the blocks in it to the surfvexfile
-    # 2. it still doesn't appear on the http://localhost:8000/survexfile/107 list
-    # 3. it does make the sb appear on http://localhost:8000/survexfile/caves-1623/107/107.svx when it shouldnt.
-    
-    # if cave == None:
-        # fileroot = SurvexFile(path=fn, cave=cave)
-        # fileroot.survexdirectory = SurvexDirectory.objects.get(id=1) # just re-use the first thing we made
-        # fileroot.save()
-        # fileroot.cave = cave
-        # print(f"   - new fileroot {type(fileroot)} for {fn} with cave {cave} - {fileroot}")
-    # else:
-        # print(f"   - {cave.survexdirectory_set.all()}")
-        # print(f"   - {cave.survexdirectory_set.filter(cave=cave)}")
-        # for sd in cave.survexdirectory_set.filter(cave=cave):
-            # print(f"   - {sd.cave} {sd.primarysurvexfile}")
-            # if f"{sd.primarysurvexfile}".replace("caves-","").startswith(f"{sd.cave}"[:4]):
-                # print(f"   - USE THIS ONE {sd.cave} {sd.primarysurvexfile}")
-                # fileroot = sd.primarysurvexfile
-        # print(f"   - old fileroot {type(fileroot)} for {fn} with cave {cave} - {fileroot}")
-        
+
     fileroot = SurvexFile(path=fn, cave=cave)
     fileroot.survexdirectory = SurvexDirectory.objects.get(id=1) # default