Survex editor now parses edited files

2026-02-08 13:10:05 +00:00 · 2023-03-23 19:05:25 +00:00
parent 562ef48f19
commit 770edd6391
4 changed files with 167 additions and 125 deletions
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -8,13 +8,13 @@ from datetime import datetime, timezone
 from pathlib import Path

 import troggle.settings as settings
-from troggle.core.models.caves import Cave, Entrance
+from troggle.core.models.caves import Cave, Entrance, GetCaveLookup
 from troggle.core.models.logbooks import QM
 from troggle.core.models.survex import SurvexBlock, SurvexDirectory, SurvexFile, SurvexPersonRole, SurvexStation
 from troggle.core.models.wallets import Wallet
 from troggle.core.models.troggle import DataIssue, Expedition
 from troggle.core.utils import chaosmonkey, get_process_memory
-from troggle.parsers.logbooks import GetCaveLookup
+#from troggle.parsers.logbooks import GetCaveLookup
 from troggle.parsers.people import GetPersonExpeditionNameLookup, known_foreigner

 """Imports the tree of survex files following from a defined root .svx file
@@ -63,6 +63,28 @@ class SurvexLeg:
    tape = 0.0
    compass = 0.0
    clino = 0.0
+    
+def IdentifyCave(cavepath):
+    """Given a file path for a survex file, or a survex-block path,
+    return the cave object
+    """
+    caveslist = GetCaveLookup()
+    if cavepath.lower() in caveslist:
+        return caveslist[cavepath.lower()]
+    # TO DO - this predates the big revision to Gcavelookup so look at this again carefully
+    path_match = LoadingSurvex.rx_cave.search(cavepath) # use as Class method
+    if path_match:
+        sluggy = f"{path_match.group(1)}-{path_match.group(2)}"
+        guesses = [sluggy.lower(), path_match.group(2).lower()]
+        for g in guesses:
+            if g in caveslist:
+                caveslist[cavepath] = caveslist[g]
+                return caveslist[g]
+        print(f"    ! Failed to find cave for {cavepath.lower()}")
+    else:
+        # not a cave, but that is fine.
+        # print(f'    ! No regex(standard identifier) cave match for {cavepath.lower()}')
+        return None

 def datewallet(w, earliest):
    """Gets the date of the youngest survexblock associated with the wallet
@@ -1093,23 +1115,6 @@ class LoadingSurvex:
                f" $ flagslist:{flags}",
            )

-    def IdentifyCave(self, cavepath):
-        if cavepath.lower() in self.caveslist:
-            return self.caveslist[cavepath.lower()]
-        # TO DO - this predates the big revision to Gcavelookup so look at this again carefully
-        path_match = self.rx_cave.search(cavepath)
-        if path_match:
-            sluggy = f"{path_match.group(1)}-{path_match.group(2)}"
-            guesses = [sluggy.lower(), path_match.group(2).lower()]
-            for g in guesses:
-                if g in self.caveslist:
-                    self.caveslist[cavepath] = self.caveslist[g]
-                    return self.caveslist[g]
-            print(f"    ! Failed to find cave for {cavepath.lower()}")
-        else:
-            # not a cave, but that is fine.
-            # print(f'    ! No regex(standard identifier) cave match for {cavepath.lower()}')
-            return None

    def GetSurvexDirectory(self, headpath):
        """This creates a SurvexDirectory if it has not been seen before, and on creation
@@ -1214,7 +1219,7 @@ class LoadingSurvex:
        newdirectory.save()
        newfile.survexdirectory = newdirectory
        self.survexdict[newdirectory].append(newfile)
-        cave = self.IdentifyCave(headpath)  # cave already exists in db
+        cave = IdentifyCave(headpath)  # cave already exists in db

        if not newdirectory:
            message = f" ! 'None' SurvexDirectory returned from GetSurvexDirectory({headpath})"
@@ -1458,28 +1463,42 @@ class LoadingSurvex:
        # ;*edulcni means we are returning from an included file
        if edulcni:
            self.ProcessEdulcniLine(edulcni)
-
-    def LoadSurvexSetup(self, survexblock, survexfile):
-        self.depthbegin = 0
-        self.datastar = self.datastardefault
-        print(
-            self.insp
-            + f"  - MEM:{get_process_memory():.3f} Reading. parent:{survexblock.survexfile.path}  <> {survexfile.path} "
-        )
-        self.lineno = 0
-        sys.stderr.flush()
-        self.callcount += 1
-        if self.callcount % 10 == 0:
-            print(".", file=sys.stderr, end="")
-        if self.callcount % 500 == 0:
-            print("\n", file=sys.stderr, end="")
-        # Try to find the cave in the DB if not use the string as before
-        path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", survexblock.survexfile.path)
+            
+    def get_cave(self, path):
+        """Read the file path to a survex file and guesses the cave
+        """
+        path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", path)
        if path_match:
            pos_cave = f"{path_match.group(1)}-{path_match.group(2)}"
            cave = getCaveByReference(pos_cave)
-            if cave:
-                survexfile.cave = cave
+            return cave
+        return None
+        
+    # def LoadSurvexSetup(self, survexblock, survexfile):
+        # """REFACTOR to use get_cave()
+        
+        # This does not seem to get run at all ?!
+        # """
+        # self.depthbegin = 0
+        # self.datastar = self.datastardefault
+        # print(
+            # self.insp
+            # + f"  - MEM:{get_process_memory():.3f} Reading. parent:{survexblock.survexfile.path}  <> {survexfile.path} "
+        # )
+        # self.lineno = 0
+        # sys.stderr.flush()
+        # self.callcount += 1
+        # if self.callcount % 10 == 0:
+            # print(".", file=sys.stderr, end="")
+        # if self.callcount % 500 == 0:
+            # print("\n", file=sys.stderr, end="")
+        # # Try to find the cave in the DB if not use the string as before
+        # path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", survexblock.survexfile.path)
+        # if path_match:
+            # pos_cave = f"{path_match.group(1)}-{path_match.group(2)}"
+            # cave = getCaveByReference(pos_cave)
+            # if cave:
+                # survexfile.cave = cave

    def LinearLoad(self, survexblock, path, collatefilename):
        """Loads a single survex file. Usually used to import all the survex files which have been collated
@@ -1497,7 +1516,7 @@ class LoadingSurvex:
        slengthtotal = 0.0
        nlegstotal = 0
        self.relativefilename = path
-        self.IdentifyCave(path)  # this will produce null for survex files which are geographic collections
+        IdentifyCave(path)  # this will produce null for survex files which are geographic collections

        self.currentsurvexfile = survexblock.survexfile
        self.currentsurvexfile.save()  # django insists on this although it is already saved !?
@@ -1637,6 +1656,7 @@ class LoadingSurvex:
                    legslength=0.0,
                )
                newsurvexblock.save()
+                print(f"SB: #{newsurvexblock.id} '{newsurvexblock}' parent:{newsurvexblock.parent} f:{newsurvexblock.survexfile}")
                newsurvexblock.title = (
                    "(" + survexblock.title + ")"
                )  # copy parent inititally, overwrite if it has its own
@@ -2306,43 +2326,48 @@ def parse_one_file(fpath): # --------------------------------------in progress--
    In the initial file parsing in databaseReset, the *include expansion is done 
    in an earlier stange than LinearLoad(). By the time LinearLoad() is called,
    all the *include expansion has happened.
-    
-    WORK IN PROGRESS. 
-    Works fine for completely new survex file.
-    
-    For an edited, pre-existing survex file, 
-    I am having great trouble getting the 'parent' block to work correctly.
-    It gets overwritten, and then nullified, on repeated SAVE & import.
-    I should learn how to step through with the debugger.
    """
-    def parse_new_svx(fpath, blockroot=None, svxfileroot=None):
+    def parse_new_svx(fpath, svx_load, cave, svxfileroot=None):
+        """We need a dummy survex block which has the survexfile being parsed
+        as its .survexfile field. But it is used in two ways, it is also
+        set as the parent block for the new blocks being created. This has to be fixed
+        later. 
+        This all needs refactoring.
+        """
        if svxfileroot == None:
-            svxfileroot = MakeFileRoot(fpath)
+            
+            svxfileroot = MakeFileRoot(fpath, cave)
            svxfileroot.save()

-        if blockroot == None:
-            newname = "adhoc_" + str(Path(str(svxfileroot)).name)
-            survexblockparent = SurvexBlock(
-                name=newname, survexpath="", survexfile=svxfileroot, legsall=0, legslength=0.0
-            )
-            survexblockparent.save()
-            blockroot = survexblockparent
+        # It is vital that the block has attached the survexfile object which is being parsed.
+        block_dummy = SurvexBlock(
+            name="dummy", survexpath="", survexfile=svxfileroot, legsall=0, legslength=0.0
+        )
+        svxfileroot.save()
+        block_dummy.save()
+        newname = f"#{block_dummy.id}_" + str(Path(str(svxfileroot)).name)
+        block_dummy.name = newname
+        block_dummy.save()
+        print(f"  -  block_dummy now '{block_dummy}' {type(block_dummy)} id={block_dummy.id} f:{block_dummy.survexfile}")
+
        
        svx_load.survexdict[svxfileroot.survexdirectory] = []
        svx_load.survexdict[svxfileroot.survexdirectory].append(svxfileroot)
        svx_load.svxdirs[""] = svxfileroot.survexdirectory
        
        # ----------------------------------------------------------------
-        svx_load.LinearLoad(blockroot, svxfileroot.path, fname)
+        svx_load.LinearLoad(block_dummy, svxfileroot.path, fname)
        # ----------------------------------------------------------------
        
+        # Now we don't need or want the dummy any more
+        block_dummy.delete()
        
+    global svx_load
    print(f"\n  - Loading One Survex file '{fpath}'", file=sys.stderr)
    svx_load = LoadingSurvex()
    svx_load.survexdict = {}
    
    fname = Path(settings.SURVEX_DATA, (fpath + ".svx"))
-    # print(f"  - {fname=}")
    
    svxs = SurvexFile.objects.filter(path=fpath)
    if svxs:
@@ -2351,67 +2376,70 @@ def parse_one_file(fpath): # --------------------------------------in progress--
            print(f"  - Aborting file parsing & import into database.")
            return False
        print(f"  - Pre-existing survexfile {svxs}.")
-        # reparse_existing_svx(svxs)
        existingsvx = SurvexFile.objects.get(path=fpath)
        existingcave = existingsvx.cave
-        print(f"  - survexfile is  {existingsvx} id={existingsvx.id} {existingcave}")
+        print(f"  - survexfile id={existingsvx.id}  {existingsvx}  {existingcave}")

        sbs = existingsvx.survexblock_set.all()
        existingparent = None
        parents =set()
        if sbs:
            for sb in sbs:
-                print(f"    - cleaning survex block {sb=}")
+                # print(f"    - {sb.id} checking survex block {sb=}")
                try:
                    if sb.parent:
                        parents.add(sb.parent)
+                        # print(f"    - adding {sb.parent=}")
                except:
-                    print(f"    ! FAILURE to access sb.parent {sb=}")
-                sb.delete()
+                    print(f"    ! FAILURE to access sb.parent {sb=}\n    ! {sb.parent_id=} ")#  \n{dir(sb)}
+                    # even though the parent_id exists.. hmm.
+            for sb in sbs:
+                # print(f"    - {sb.id} {sb.pk} {sb}")
+                sb_keep = sb
+                if sb not in parents:
+                    # print(f"    - {sb.id} Deleting survex block {sb=}")
+                    sb.delete()
+
        if parents:
-            print(f"   - set of parent blocks {parents}")
+            # print(f"   - parents get {parents}")
            if len(parents) > 1:
                print(f"   - WARNING more than one parent survex block!")
-            existingparent = parents.pop()
+            existingparent = parents.pop() # removes it
+            parents.add(existingparent) # restores it
             
-        # print(f"   - deleting survex file {existingsvx=}")
-        # existingsvx.delete()
-        print(f"  - Reloading and parsing this survexfile '{fpath}' Loading...")
-        
-        parse_new_svx(fpath, blockroot=existingparent, svxfileroot=existingsvx)
-        
-        svxs = SurvexFile.objects.filter(path=fpath)
-        if len(svxs)>1:
-            print(f"  ! Mistake? More than one survex file object in database with the same file-path {svxs}")
-            print(f"  - Aborting file parsing & import into database.")
-            return False        
-        replacesvx = SurvexFile.objects.get(path=fpath)
-        replacesvx.cave = existingcave
-        print(f"  - new/replacement survexfile {svxs}. id={replacesvx.id}")
-        replacesvx.save()
+        print(f" - Reloading and parsing this survexfile '{fpath}' Loading...")
+        # Logic is that we need an SB which links to the survexfile we are parsing for the parser
+        # to work, but we delete all those before we start parsing. Urk.
+        #===========
+        parse_new_svx(fpath, svx_load, existingsvx, svxfileroot=existingsvx)
+        #===========
        
+        print(f"  - survexfile id={existingsvx.id} update  ")
+       
        if parents:
-            sbs = replacesvx.survexblock_set.all()
+            print(f"   - parents set {parents}")
+            sbs = existingsvx.survexblock_set.all()
+            if len(sbs)<1:
+                print(f"    ! No survex blocks found. Parser failure...")
            for sb in sbs:
-                print(f"    - re-setting survex block parent{sb=}")
+                print(f"    - {sb.id} re-setting survex block parent{sb=}")
                sb.parent = existingparent # should be all the same
                sb.save()
            
    else:
-        print(f"  - Not seen this survexfile before '{fpath}' Loading...")
-        parse_new_svx(fpath)   
+        print(f"  - Not seen this survexfile before '{fpath}' Loading. ..")
+        #===========
+        parse_new_svx(fpath,svx_load, IdentifyCave(fpath))
+        #===========

-    legsnumber = svx_load.legsnumber
-
-    print(f"  - Number of SurvexDirectories: {len(svx_load.survexdict):,}")
-    print(f"  - SurvexDirectories: {svx_load.survexdict}")
-    
-    tf = 0
-    for d in svx_load.survexdict:
-        tf += len(svx_load.survexdict[d])
-    print(f"  - Number of SurvexFiles: {tf:,}")
-    print(f"  - Number of Survex legs: {legsnumber:,}")
-    print(f"  - Length of Survex legs: {svx_load.slength:.2f} m")
+    # print(f"   - Number of SurvexDirectories: {len(svx_load.survexdict):,}")
+    # tf = 0
+    # for d in svx_load.survexdict:
+        # print(f"    - SD: {d}")
+        # tf += len(svx_load.survexdict[d])
+    # print(f"   - Number of SurvexFiles: {tf:,}")
+    # print(f"   - Number of Survex legs: {svx_load.legsnumber:,}")
+    # print(f"   - Length of Survex legs: {svx_load.slength:.2f} m")
    
    svx_load = None
    return True
@@ -2432,15 +2460,18 @@ def MakeSurvexFileRoot():
    return fileroot


-def MakeFileRoot(fn):
+def MakeFileRoot(fn, cave):
    """Returns a file_object.path = _unseens.svx associated with directory_object.path = SURVEX_DATA
    
    CHANGE THIS to just use the same block root as for SURVEX_TOPNAME ?
    """
-    print(f"  - making a new root survexfile for this import: {fn}")
-    fileroot = SurvexFile(path=fn, cave=None)
+    print(f"  - Making a new root survexfile for this import: {fn}")
+    fileroot = SurvexFile(path=fn, cave=cave)
    fileroot.survexdirectory = SurvexDirectory.objects.get(id=1) # just re-use the first thing we made
    fileroot.save()
+    cave = IdentifyCave(fn)
+    fileroot.cave = cave
+    print(f"   - new fileroot {type(fileroot)} for {fn} with cave {cave}")
    return fileroot


@@ -2490,7 +2521,7 @@ def LoadSurvexBlocks():
    # sudo service mariadb start
    survexblockroot.save()

-    omitsfileroot = MakeFileRoot(UNSEENS)
+    omitsfileroot = MakeFileRoot(UNSEENS, None)
    survexomitsroot = SurvexBlock(
        name=OMITBLOCK, survexpath="", survexfile=omitsfileroot, legsall=0, legslength=0.0
    )