From b461b87df646a705fd37afa41afff231d3e0d170 Mon Sep 17 00:00:00 2001
From: Philip Sargent <philip.sargent@gmail.com>
Date: Fri, 5 Nov 2021 22:59:54 +0200
Subject: [PATCH] fix unneeded runs of survex on survex mport

---
 .gitignore        |  1 +
 parsers/survex.py | 73 ++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 60 insertions(+), 14 deletions(-)

diff --git a/.gitignore b/.gitignore
index c3ec8d4..742d605 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,7 @@ syntax: glob
 *.pyc
 *.sql
 *.sqlite
+*.prof
 *~
 .idea/*
 .swp
diff --git a/parsers/survex.py b/parsers/survex.py
index 569d091..8b379ac 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -152,12 +152,14 @@ class LoadingSurvex():
     includestack = []
     stacksvxfiles = []
     svxfileslist = []
-    svxdirs = {}
+    svxdirs = {} 
+    uniquename = {}
     expos = {}
     survexdict = {} # each key is a directory, and its value is a list of files
     lineno = 0
     insp = ""
     callcount = 0
+    caverncount = 0
     ignoreprefix = ["surface", "kataster", "fixedpts", "gpx"]
     ignorenoncave = ["caves-1623", "caves-1623/2007-neu"]
     includedfilename =""
@@ -255,12 +257,15 @@ class LoadingSurvex():
 
     def LoadSurvexDate(self, survexblock, line):
         # we should make this a date RANGE for everything
+        def findexpedition(year):
+            return Expedition.objects.filter(year=year)
+            
         def setdate(year):
             # cacheing to save DB query on every block and to prepare for django-less troggle in future
             if year in self.expos:
                 expo = self.expos[year]
             else:
-                expeditions = Expedition.objects.filter(year=year)
+                expeditions = findexpedition(year)
                 if len(expeditions) != 1 :
                     message = f"! More than one expedition in year {year} '{line}' ({survexblock}) {survexblock.survexfile.path}"
                     print((self.insp+message))
@@ -1077,6 +1082,7 @@ class LoadingSurvex():
 
                     fullpath = os.path.join(settings.SURVEX_DATA, includepath + ".svx")
                     self.RunSurvexIfNeeded(os.path.join(settings.SURVEX_DATA, includepath))
+                    self.checkUniqueness(os.path.join(settings.SURVEX_DATA, includepath))
                     if os.path.isfile(fullpath):
                         #--------------------------------------------------------
                         self.depthinclude += 1
@@ -1135,6 +1141,18 @@ class LoadingSurvex():
                     flinear.write("                    {:2} {} *title {}\n".format(self.depthbegin, depth, args))
                     pass
 
+
+    def checkUniqueness(self,fullpath):
+        fn = Path(fullpath).name
+        if fn not in self.uniquename:
+            self.uniquename[fn] = 1
+        else:
+            self.uniquename[fn] += 1
+            message = f" ! NON-UNIQUE survex filename, overwriting .3d file in expowebcache '{fn}' - '{fullpath}' #{self.uniquename[fn]}"
+            print(message)
+            DataIssue.objects.create(parser='survex', message=message)
+
+    
     def RunSurvexIfNeeded(self,fullpath):
         now = time.time()
         cav_t = now - 365*24*3600
@@ -1142,12 +1160,20 @@ class LoadingSurvex():
         svx_t = now - 365*24*3600
 
         def runcavern():
-            # print(" -  Regenerating stale (or chaos-monkeyed) cavern .log and .3d for '{}'\n    days svx old: {:.1f}  cav:{:.1f}   log old: {:.1f}".
-            #   format(fullpath, (svx_t - log_t)/(24*3600), (cav_t - log_t)/(24*3600), (now - log_t)/(24*3600)))
+            '''This assumes all survex files have unique names and they are taken from many folders but the output is all put 
+            into the same folder. A serius potential bug. We should check uniquness
+            '''
+            print(" -  Regenerating stale (or chaos-monkeyed) cavern .log and .3d for '{}'\n     at '{}'\n     days svx old: {:.1f}  cav:{:.1f}   log old: {:.1f}".format(fullpath, logpath, (svx_t - log_t)/(24*3600), (cav_t - log_t)/(24*3600), (now - log_t)/(24*3600)))
+            #print(f'  -  cav_t: {cav_t/(24*3600)} -  log_t: {log_t/(24*3600)} -   svx_t: {svx_t/(24*3600)} -   now: {now}')
             subprocess.call([settings.CAVERN, "--log", "--output={}".format(settings.THREEDCACHEDIR), "{}.svx".format(fullpath)])
+            self.caverncount += 1
+            
+            # should also collect all the .err files too and create a DataIssue for each one which 
+            # - is nonzero in size
+            # - has Error greater than 5% anywhere, or some other more serious error
 
         svxpath = fullpath + ".svx"
-        logpath = fullpath + ".log"
+        logpath = Path(settings.THREEDCACHEDIR) / str(Path(fullpath).name + ".log")
 
         if not os.path.isfile(logpath):
             runcavern()
@@ -1162,7 +1188,7 @@ class LoadingSurvex():
         svx_t = os.path.getmtime(svxpath)
         now = time.time()
 
-        if svx_t - log_t > 0:          # stale, older than svx file
+        if svx_t - log_t > 0:          # stale, svx file is newer than log
             runcavern()
             return
         if now - log_t > 60 *24*60*60: # >60 days, re-run anyway
@@ -1171,7 +1197,7 @@ class LoadingSurvex():
         if cav_t - log_t > 0:          # new version of cavern
             runcavern()
             return
-        if chaosmonkey(200):
+        if chaosmonkey(400):           # one in every 400 runs
             runcavern()
 
 def FindAndLoadSurvex(survexblockroot):
@@ -1190,8 +1216,13 @@ def FindAndLoadSurvex(survexblockroot):
     svx_scan.callcount = 0
     svx_scan.depthinclude = 0
     fullpathtotop = os.path.join(survexfileroot.survexdirectory.path, survexfileroot.path)
+    
+    # Rather than do this check for the presence of the .log and .3d files synchronously here,
+    # we should instead run this in a separate thread asynchronously.
     print("  - RunSurvexIfNeeded cavern on '{}'".format(fullpathtotop), file=sys.stderr)
     svx_scan.RunSurvexIfNeeded(fullpathtotop)
+    svx_scan.checkUniqueness(fullpathtotop)
+    
     indent=""
     fcollate = open(collatefilename, 'w')
 
@@ -1204,9 +1235,20 @@ def FindAndLoadSurvex(survexblockroot):
     finroot = survexfileroot.OpenFile()
     fcollate.write(";*include {}\n".format(survexfileroot.path))
     flinear.write("{:2} {} *include {}\n".format(svx_scan.depthinclude, indent, survexfileroot.path))
+
+    import cProfile, pstats
+    from pstats import SortKey
+    pr = cProfile.Profile()
+    pr.enable()
     #----------------------------------------------------------------
     svx_scan.PushdownStackScan(survexblockroot, survexfileroot.path, finroot, flinear, fcollate)
     #----------------------------------------------------------------
+    pr.disable()
+    with open('PushdownStackScan.prof', 'w') as f:
+        ps = pstats.Stats(pr, stream=f)
+        ps.sort_stats(SortKey.CUMULATIVE)
+        ps.print_stats()
+        
     flinear.write("{:2} {} *edulcni {}\n".format(svx_scan.depthinclude, indent, survexfileroot.path))
     fcollate.write(";*edulcni {}\n".format(survexfileroot.path))
     mem1 = get_process_memory()
@@ -1216,9 +1258,11 @@ def FindAndLoadSurvex(survexblockroot):
     flinear.write("    - {:,} survex files in linear include list \n".format(len(svxfileslist)))
     flinear.close()
     fcollate.close()
-    svx_scan = None # Hmm. Does this actually delete all the instance variables if they are lists, dicts etc.?
+    print("\n -  {:,} runs of survex 'cavern' refreshing .3d files \n".format(svx_scan.caverncount),file=sys.stderr)
+    svx_scan = None # Hmm. Does this actually delete all the instance variables if they are lists, dicts etc.? 
     print("\n -  {:,} survex files in linear include list \n".format(len(svxfileslist)),file=sys.stderr)
 
+
     mem1 = get_process_memory()
     print(" - MEM:{:7.2f} MB END ".format(mem0),file=sys.stderr)
     print(" - MEM:{:7.3f} MB USED".format(mem1-mem0),file=sys.stderr)
@@ -1246,6 +1290,12 @@ def FindAndLoadSurvex(survexblockroot):
     print("\n - MEM:{:7.2f} MB STOP".format(mem1),file=sys.stderr)
     print(" - MEM:{:7.3f} MB USED".format(mem1-mem0),file=sys.stderr)
 
+    # Close the logging file, Restore sys.stdout to our old saved file handle
+    sys.stdout.close()
+    print("+", file=sys.stderr)
+    sys.stderr.flush();
+    sys.stdout = stdout_orig
+
     legsnumber = svx_load.legsnumber
     mem1 = get_process_memory()
 
@@ -1256,11 +1306,6 @@ def FindAndLoadSurvex(survexblockroot):
     print("  - Number of SurvexFiles: {}".format(tf))
     svx_load = None
 
-    # Close the logging file, Restore sys.stdout to our old saved file handle
-    sys.stdout.close()
-    print("+", file=sys.stderr)
-    sys.stderr.flush();
-    sys.stdout = stdout_orig
     return legsnumber
 
 def MakeSurvexFileRoot():
@@ -1300,7 +1345,7 @@ def LoadSurvexBlocks():
     #----------------------------------------------------------------
     memend = get_process_memory()
     print(" - MEMORY start:{:.3f} MB end:{:.3f} MB increase={:.3f} MB".format(memstart,memend, memend-memstart))
-
+    
     survexblockroot.save()
 
     print(" - total number of survex legs: {}".format(legsnumber))