From 122cdd7fc8620b2348d75b1bb786ae4202db9a55 Mon Sep 17 00:00:00 2001
From: Philip Sargent <philip.sargent@klebos.com>
Date: Sun, 28 Jun 2020 01:50:34 +0100
Subject: [PATCH] replace GetCaveByReference

---
 core/models_caves.py  |  19 ++--
 core/models_survex.py |   4 +
 parsers/logbooks.py   |   8 +-
 parsers/survex.py     | 220 +++++++++++++++++++++++++-----------------
 templates/base.html   |   6 +-
 5 files changed, 155 insertions(+), 102 deletions(-)

diff --git a/core/models_caves.py b/core/models_caves.py
index 0338a62..e20b17f 100644
--- a/core/models_caves.py
+++ b/core/models_caves.py
@@ -201,15 +201,16 @@ class Cave(TroggleModel):
                     pass
         return lowestareas[0]
 
-def getCaveByReference(reference):
-    areaname, code = reference.split("-", 1)
-    area = Area.objects.get(short_name = areaname)
-    foundCaves = list(Cave.objects.filter(area = area,  kataster_number = code).all()) + list(Cave.objects.filter(area = area,  unofficial_number = code).all())
-    #print((list(foundCaves)))
-    if len(foundCaves) == 1:
-        return foundCaves[0]
-    else:
-        return False
+# This seems to be peculiarly broken, and is now replaced for logbooks.
+# def getCaveByReference(reference):
+    # areaname, code = reference.split("-", 1)
+    # area = Area.objects.get(short_name = areaname)
+    # foundCaves = list(Cave.objects.filter(area = area,  kataster_number = code).all()) + list(Cave.objects.filter(area = area,  unofficial_number = code).all())
+    # #print((list(foundCaves)))
+    # if len(foundCaves) == 1:
+        # return foundCaves[0]
+    # else:
+        # return False
 
 class OtherCaveName(TroggleModel):
     name = models.CharField(max_length=160)
diff --git a/core/models_survex.py b/core/models_survex.py
index e46baae..4cbf611 100644
--- a/core/models_survex.py
+++ b/core/models_survex.py
@@ -16,6 +16,10 @@ class SurvexDirectory(models.Model):
     class Meta:
         ordering = ('id',)
 
+    def __str__(self):
+        return str(self.path) + "-" + str(self.primarysurvexfile.path) 
+
+
 
 class SurvexFile(models.Model):
     path = models.CharField(max_length=200)
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index 6a19dad..a724394 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -12,7 +12,7 @@ from django.template.defaultfilters import slugify
 from django.utils.timezone import get_current_timezone, make_aware
 
 from troggle.core.models import DataIssue, Expedition
-from troggle.core.models_caves import Cave, OtherCaveName, getCaveByReference, LogbookEntry, PersonTrip
+from troggle.core.models_caves import Cave, OtherCaveName, LogbookEntry, PersonTrip
 from parsers.people import GetPersonExpeditionNameLookup
 from utils import save_carefully
 
@@ -91,7 +91,10 @@ def GetCaveLookup():
         if cave.kataster_number:
             Gcavelookup[cave.kataster_number] = cave
         if cave.unofficial_number:
-            Gcavelookup[cave.unofficial_number] = cave
+            Gcavelookup[cave.unofficial_number.lower()] = cave
+        if cave.filename:
+            # this is the slug - usually..
+            Gcavelookup[cave.filename.replace(".html","").lower()] = cave
     # These are exact matches! edit to check for prefix only!
     Gcavelookup["tunnocks"] = Gcavelookup["258"]
     Gcavelookup["hauchhole"] = Gcavelookup["234"]
@@ -586,6 +589,7 @@ def parseAutoLogBookEntry(filename):
     if caveMatch:
         caveRef, = caveMatch.groups()
         try:
+            # this is a slow and uncertain function:
             cave = getCaveByReference(caveRef)
         except AssertionError:
             cave = None
diff --git a/parsers/survex.py b/parsers/survex.py
index 7ac8a5e..31dff03 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -14,6 +14,7 @@ import troggle.core.models as models
 import troggle.core.models_caves as models_caves
 import troggle.core.models_survex as models_survex
 from troggle.parsers.people import GetPersonExpeditionNameLookup
+from troggle.parsers.logbooks import GetCaveLookup
 from troggle.core.views_caves import MapLocations
 
 survexblockroot = None
@@ -46,8 +47,8 @@ class LoadingSurvex():
 
     rx_cave    = re.compile(r'caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/')
     rx_comment = re.compile(r'([^;]*?)\s*(?:;\s*(.*))?\n?$')
-    rx_comminc = re.compile(r'(?i)^\s*;\*include[\s](.*)$') # inserted by linear collate ;*include
-    rx_commcni = re.compile(r'(?i)^\s*;\*edulcni[\s](.*)$') # inserted by linear collate ;*edulcni
+    rx_comminc = re.compile(r'(?i)^\*include[\s]*([-\w/]*).*$') # inserted by linear collate ;*include
+    rx_commcni = re.compile(r'(?i)^\*edulcni[\s]*([-\w/]*).*$') # inserted by linear collate ;*edulcni
     rx_include = re.compile(r'(?i)^\s*(\*include[\s].*)$')
     rx_ref     = re.compile(r'(?i)^\s*ref[\s.:]*(\d+)\s*#\s*(X)?\s*(\d+)')
     rx_star    = re.compile(r'(?i)\s*\*[\s,]*(\w+)\s*(.*?)\s*(?:;.*)?$')
@@ -60,7 +61,10 @@ class LoadingSurvex():
     depthinclude = 0
     stackbegin =[]
     stackinclude = []
-    svxfileslist =[]
+    svxfileslist = []
+    svxdirs = {}
+    svxcaves = {}
+    svxfiletitle = {}
     lineno = 0
     insp = ""
     callcount = 0
@@ -122,6 +126,9 @@ class LoadingSurvex():
         """This reads compass, clino and tape data but only keeps the tape lengths,
         the rest is discarded after error-checking.
         """
+        # Check first to see if we are in a splay and abort if so.
+        # TO DO splay abort
+        
         stardata = self.stardata
         survexleg = SurvexLeg()
 
@@ -184,7 +191,8 @@ class LoadingSurvex():
                     print(("! Compass misread in", survexblock.survexfile.path))
                     print(("  Stardata:", stardata))
                     print(("  Line:", ls))
-                    message = ' ! Value Error: line %s in %s' % (ls, survexblock.survexfile.path)
+                    message = " ! Value Error: lcompass:'{}' line {} in '{}'".format(lcompass, 
+                            ls, survexblock.survexfile.path)
                     models.DataIssue.objects.create(parser='survex', message=message)
                     survexleg.compass = 1000
                 survexleg.clino = -90.0
@@ -301,18 +309,25 @@ class LoadingSurvex():
         pass
 
     def IdentifyCave(self, cavepath):
-        path = os.path.join(os.path.split(cavepath)[0], re.sub(r"\.svx$", "", cavepath))
-        path_match = self.rx_cave.search(path)
-        print('    - Attempting cave match for %s' % path)
+        if cavepath in self.svxcaves:
+            print('    - Cave FAST matched for %s' % cavepath)
+            return self.svxcaves[cavepath]
+            
+        path_match = self.rx_cave.search(cavepath)
+        #print('    - Attempting cave match for %s' % cavepath)
         if path_match:
-            pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2))
-            cave = models_caves.getCaveByReference(pos_cave)
+            sluggy = '%s-%s'.format(path_match.group(1), path_match.group(2))
+            cave = GetCaveLookup().get(sluggy)
+            # Below is how it has been done for years: very fuzzy & slow searches
+            # ..and wrong!
+            #cave = models_caves.getCaveByReference(sluggy)
             if cave:
-                survexfile.cave = cave
-            print('    - Cave matched for %s' % path)
-            return cave
+                self.currentcave = cave
+                self.svxcaves[cavepath] = cave
+                print('    - Cave matched for %s' % cavepath)
+                return cave
         else:
-            print('    ! No cave match for %s' % path)
+            print('    ! No cave match for %s' % cavepath)
             return None
 
     def LoadSurvexFileBlock(self, survexblock, includelabel):
@@ -320,29 +335,56 @@ class LoadingSurvex():
         with links to 'cave'
         Creates a new current survexblock with valid .survexfile and valid .survexdirectory
         """
-        cave = self.IdentifyCave(self, includelabel)
-        survexdirectory = SurvexDirectory(path=dirpath, cave=cave, primarysurvexfile=self)
-        survexdirectory.save()
+        depth = " " * self.depthbegin
+        print("{:2}{}   - NEW survexfile:'{}'".format(self.depthbegin, depth, includelabel))
 
-        newsurvexfile = models_survex.SurvexFile(path=includelabel)
-        newsurvexfile.survexdirectory = survexdirectory
-        newsurvexfile.save()
+        headpath, tail = os.path.split(includelabel)
+        if headpath not in self.svxdirs:
+            self.svxdirs[headpath] = models_survex.SurvexDirectory(path=headpath, primarysurvexfile=survexblock.survexfile)
+        newsurvexdirectory = self.svxdirs[headpath]
         
-        name = includelabel
-        newsurvexblock = models_survex.SurvexBlock(name=name, parent=survexblock, 
-                            survexpath=survexblock.survexpath+"."+name, 
-                            cave=survexfile.cave, survexfile=newsurvexfile, 
-                            legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0)
-        newsurvexblock.save 
+        newsurvexfile = models_survex.SurvexFile(path=includelabel)
+        newsurvexfile.survexdirectory = newsurvexdirectory
+        
+        # Do not create a survexblock. Yes, there is a virtual block before the *begin statement but
+        # only the *title is usually in that, so just inherit the *title into the blocks.
+        # name = includelabel
+        # newsurvexblock = models_survex.SurvexBlock(name=name, parent=survexblock, 
+                            # survexpath=survexblock.survexpath+"."+name, 
+                            # survexfile=newsurvexfile, 
+                            # legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0)
+
+        cave = self.IdentifyCave(headpath)
+        if cave:
+            newsurvexdirectory.cave = cave
+            newsurvexfile.cave   = cave
+            #newsurvexblock.cave  = cave
+        newsurvexdirectory.save()
+        newsurvexfile.save()
+        #newsurvexblock.save 
 
         self.currentsurvexfile  = newsurvexfile
-        self.currentsurvexblock = newsurvexblock
+        #self.currentsurvexblock = newsurvexblock
+
+    def ProcessIncludeLine(self, survexblock, included):
+        # should do some push stuff here
+        svxid = included.groups()[0]
+        #depth = " " * self.depthbegin
+        #print("{:2}{}   - Include survexfile:'{}'".format(self.depthbegin, depth,  svxid))
+        self.LoadSurvexFileBlock(survexblock, svxid)
+
+    def ProcessEdulcniLine(self, survexblock, edulcni):
+        # should do some pop stuff here
+        svxid = edulcni.groups()[0]
+        depth = " " * self.depthbegin
+        print("{:2}{}   - Edulcni  survexfile:'{}'".format(self.depthbegin, depth, svxid))
+        self.currentsurvexblock = survexblock.parent
+        self.currentsurvexfile = survexblock.parent.survexfile
 
     def LoadSurvexComment(self, survexblock, comment):
         # ignore all comments except ;ref and ;QM and ;*include (for collated survex file)
         refline = self.rx_ref.match(comment)
         if refline:
-            #comment = comment.replace("ref","").strip()
             comment = re.sub('(?i)\s*ref[.;]?',"",comment.strip())
             self.LoadSurvexRef(survexblock, comment)
 
@@ -353,13 +395,12 @@ class LoadingSurvex():
         included = self.rx_comminc.match(comment)
         # ;*include means we have been included; not 'proceed to include' which *include means
         if included:
-            self.LoadSurvexFileBlock(survexblock, included)
+            self.ProcessIncludeLine(survexblock,included)
 
         edulcni = self.rx_commcni.match(comment)
-        # ;*include means we have been included; not 'proceed to include' which *include means
+        # ;*edulcni means we are returning from an included file
         if edulcni:
-            currentsurvexblock = currentsurvexblock.parent
-            currentsurvexfile = currentsurvexblock.parent.survexfile
+            self.ProcessEdulcniLine(survexblock,edulcni)
 
     def LoadSurvexSetup(self,survexblock, survexfile):
         self.depthbegin = 0
@@ -503,22 +544,18 @@ class LoadingSurvex():
                 else:
                     pass # ignore all other sorts of data
 
-    def LinearRecursiveLoad(self, survexblock, path, fin, skipto):
+    def LinearRecursiveLoad(self, survexblock, path, svxlines):
         """Loads a single survex file. Usually used to import all the survex files which have been collated
         into a single file. Loads the begin/end blocks recursively.
         """
         self.relativefilename = path
         cave = self.IdentifyCave(path) # this will produce null for survex files which are geographic collections
 
-        svxlines = fin.read().splitlines()
+        blockcount = 0
         for svxline in svxlines:
-            self.lineno += 1
-            if self.lineno < skipto:
-                continue # skip through file to the place we got up to
-                
             sline, comment = self.rx_comment.match(svxline.strip()).groups()
             if comment:
-                self.LoadSurvexComment(survexblock, comment)
+                self.LoadSurvexComment(survexblock, comment) # this catches the ;*include and ;*edulcni lines too
             if not sline:
                 continue # skip blank lines
 
@@ -527,57 +564,61 @@ class LoadingSurvex():
             if mstar: # yes we are reading a *cmd
                 cmd, args = mstar.groups()
                 cmd = cmd.lower()
+
+                # ------------------------BEGIN
                 if re.match("begin$(?i)", cmd):
                     self.depthbegin += 1
-                    if args:
-                        depth = " " * self.depthbegin
-                        self.stackbegin.append(args.lower())
+                    depth = " " * self.depthbegin
+                    self.stackbegin.append(args.lower())
 
-                        previousnlegs = self.survexlegsnumber
-                        name = args.lower()
-                        print('   - Begin found for:{}, creating new SurvexBlock '.format(name))
+                    previousnlegs = self.survexlegsnumber
+                    name = args.lower()
+                    print("{:2}{}   - Begin for :'{}'".format(self.depthbegin,depth, name))
+                    survexblockdown = models_survex.SurvexBlock(name=name, parent=survexblock, 
+                            survexpath=survexblock.survexpath+"."+name, 
+                            cave=self.currentcave, survexfile=self.currentsurvexfile, 
+                            legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0)
+                    survexblockdown.save()
+                    survexblock.save()
+                    survexblock = survexblockdown
 
-                        survexblockdown = models_survex.SurvexBlock(name=name, parent=survexblock, 
-                                survexpath=survexblock.survexpath+"."+name, 
-                                cave=self.currentcave, survexfile=self.currentsurvexfile, 
-                                legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0)
-                        survexblockdown.save()
-                        survexblock.save()
-                        survexblock = survexblockdown
-                    else:
-                        self.depthbegin += 1
+                    blockcount +=1
+                    if blockcount % 10 ==0 :
+                        print(".", file=sys.stderr,end='')
+                    if blockcount % 500 ==0 :
+                        print("\n", file=sys.stderr,end='')
+                    sys.stderr.flush();
 
+                # ---------------------------END
                 elif re.match("end$(?i)", cmd):
-                    # haven#t really thought this through..
-                    if survexblock:
-                        self.currentsurvexblock = survexblock.parent
-                        self.currentsurvexfile = survexblock.parent.survexfile
+                    depth = " " * self.depthbegin
+                    self.currentsurvexblock = survexblock.parent
+                    self.currentsurvexfile  = survexblock.parent.survexfile
 
-                    if self.depthbegin:
-                        print("   - End    -return from nested *begin/*end block: '{}'".format(args))
-                        self.depthbegin -= 1
-                    else:
-                        legsinblock = self.survexlegsnumber - previousnlegs
-                        print("  - LEGS: {} (previous: {}, now:{})".format(legsinblock,previousnlegs,self.survexlegsnumber))
-                        survexblock.legsall = legsinblock
-                        survexblock.save()
-                        return
+                    print("{:2}{}   - End   from:'{}'".format(self.depthbegin,depth,args))
+                    legsinblock = self.survexlegsnumber - previousnlegs
+                    print("{:2}{}   - LEGS: {} (previous: {}, now:{})".format(self.depthbegin,
+                        depth,legsinblock,previousnlegs,self.survexlegsnumber))
+                    survexblock.legsall = legsinblock
+                    survexblock.save()
+                    self.depthbegin -= 1
 
-                elif re.match("title$(?i)", cmd):
+                # -----------------------------
+                elif re.match("(?i)title$", cmd):
                     self.currenttitle = args
-                elif cmd == "ref":
+                elif re.match("(?i)ref$", cmd):
                     self.LoadSurvexRef(survexblock, args)
-                elif cmd == "flags":
+                elif re.match("(?i)flags$", cmd):
                     self.LoadSurvexFlags(args, cmd)
-                elif cmd == "data":
+                elif re.match("(?i)data$", cmd):
                     self.LoadSurvexDataCmd(survexblock, args)
-                elif re.match("date$(?i)", cmd):
+                elif re.match("(?i)date$", cmd):
                     self.LoadSurvexDate(survexblock, args)
-                elif re.match("team$(?i)", cmd):
+                elif re.match("(?i)team$", cmd):
                     self.LoadSurvexTeam(survexblock, args)
-                elif cmd == "set" and re.match("names(?i)", args):
+                elif re.match("(?i)set$", cmd) and re.match("(?i)names", args):
                     pass
-                elif re.match("include$(?i)", cmd):
+                elif re.match("(?i)include$", cmd):
                     message = " ! -ERROR *include command not expected here {}. Re-run a full Survex import.".format(path)
                     print(message)
                     print(message,file=sys.stderr)
@@ -718,7 +759,9 @@ def FindAndLoadSurvex(survexblockroot):
     finroot = survexfileroot.OpenFile()
     fcollate.write(";*include {}\n".format(survexfileroot.path))
     flinear.write("{:2} {} *include {}\n".format(svx_scan.depthinclude, indent, survexfileroot.path))
+    #----------------------------------------------------------------
     svx_scan.RecursiveScan(survexblockroot, survexfileroot, finroot, flinear, fcollate)
+    #----------------------------------------------------------------
     flinear.write("{:2} {} *edulcni {}\n".format(svx_scan.depthinclude, indent, survexfileroot.path))
     fcollate.write(";*edulcni {}\n".format(survexfileroot.path))
     mem1 = models.get_process_memory()
@@ -739,13 +782,14 @@ def FindAndLoadSurvex(survexblockroot):
     # Before doing this, it would be good to identify the *equate and *entrance we need that are relevant to the
     # entrance locations currently loaded after this by LoadPos(), but could better be done before ?
     # look in MapLocations() for how we find the entrances
-    print('\n - Loading All Survex Blocks...',file=sys.stderr)
-    
    
+    print('\n - Loading All Survex Blocks (LinearRecursive)',file=sys.stderr)
     svx_load = LoadingSurvex()
     with open(collatefilename, "r") as fcollate:
-        #svx_load.LinearRecursiveLoad(survexblockroot,survexfileroot.path,fcollate, 0)
-        pass
+            svxlines = fcollate.read().splitlines()
+    #----------------------------------------------------------------
+    svx_load.LinearRecursiveLoad(survexblockroot,survexfileroot.path, svxlines)
+    #----------------------------------------------------------------
 
     print(" - MEM:{:7.2f} MB STOP".format(mem1),file=sys.stderr)
     print(" - MEM:{:7.3f} MB USED".format(mem1-mem0),file=sys.stderr)
@@ -755,15 +799,15 @@ def FindAndLoadSurvex(survexblockroot):
     mem1 = models.get_process_memory()
     svx_load = None
 
-    print('\n - Loading All Survex Blocks...',file=sys.stderr)
-    svxlrl = LoadingSurvex()
+    print('\n - Loading All Survex Blocks (RecursiveRecursive)',file=sys.stderr)
+    # svxlrl = LoadingSurvex()
 
-    finroot = survexfileroot.OpenFile()
-    svxlrl.RecursiveRecursiveLoad(survexblockroot, survexfileroot, finroot)
-    finroot.close()
-    survexlegsnumber = svxlrl.survexlegsnumber
-    survexlegsalllength = svxlrl.survexlegsalllength
-    svxlrl = None
+    # finroot = survexfileroot.OpenFile()
+    # svxlrl.RecursiveRecursiveLoad(survexblockroot, survexfileroot, finroot)
+    # finroot.close()
+    # survexlegsnumber = svxlrl.survexlegsnumber
+    # survexlegsalllength = svxlrl.survexlegsalllength
+    # svxlrl = None
     
     # Close the logging file, Restore sys.stdout to our old saved file handle
     sys.stdout.close()
@@ -792,7 +836,7 @@ def LoadSurvexBlocks():
     # this is the first so id=1
     survexblockroot.save()
 
-    print(' - Loading All Survex Blocks...')
+    print(' - Loading Survex Blocks...')
     memstart = models.get_process_memory()
     survexlegsnumber, survexlegsalllength = FindAndLoadSurvex(survexblockroot)
     memend = models.get_process_memory()
@@ -802,7 +846,7 @@ def LoadSurvexBlocks():
     survexblockroot.legsall = survexlegsnumber
     survexblockroot.save()
     
-    print(" - total number of survex legs: {}m".format(survexlegsnumber))
+    print(" - total number of survex legs: {}".format(survexlegsnumber))
     print(" - total leg lengths loaded: {}m".format(survexlegsalllength))
     print(' - Loaded All Survex Blocks.')
 
diff --git a/templates/base.html b/templates/base.html
index 9e0d55f..c38710d 100644
--- a/templates/base.html
+++ b/templates/base.html
@@ -33,15 +33,15 @@
 <div class="toolbarlinks">
     <a href="{% url "survexcaveslist" %}">All Survex</a> |
     <a href="{% url "surveyscansfolders" %}">Scans</a> |
-    <a href="{% url "tunneldata" %}">Tunneldata</a> |
+    <a href="{% url "tunneldata" %}">Drawing files</a> |
     <a href="{% url "survexcavessingle" "caves-1623/290/290.svx" %}">290</a> |
     <a href="{% url "survexcavessingle" "caves-1623/291/291.svx" %}">291</a> |
     <a href="{% url "survexcavessingle" "caves-1626/359/359.svx" %}">359</a> |
     <a href="{% url "survexcavessingle" "caves-1623/258/258.svx" %}">258</a> |
     <a href="{% url "survexcavessingle" "caves-1623/264/264.svx" %}">264</a> |
+    <a href="{% url "survexcavessingle" "264" %}">Surveys-264</a> |
     <a href="{% url "expedition" 2018 %}">Expo2018</a> |
     <a href="{% url "expedition" 2019 %}">Expo2019</a> |
-    <a href="{% url "expedition" 2020 %}">Expo2020</a> |
  
     <a href="/admin/">Django admin</a>
     <br>
@@ -53,7 +53,7 @@
     
     <a href="{% url "frontpage" %}">tasks to do </a>  |
     <a id="cavesLink" href="{% url "caveindex" %}">caves</a>  |
-    <a id="caversLink" href="{% url "personindex" %}">cavers</a>  |
+    <a id="caversLink" href="{% url "personindex" %}">people</a>  |
     <a id="expeditionsLink" href="{% url "expeditions" %}">all expeditions</a> |
     <a href="{% url "stats" %}">statistics</a> |
     <a id="cuccLink" href="{% url "controlpanel" %}">import/export data</a>