Understanding and speeding up LoadPos

2025-01-19 01:12:32 +00:00 · 2020-04-28 18:26:08 +01:00 · 2020-04-28 18:26:08 +01:00 · b4c0c4d219
commit b4c0c4d219
parent 4be8c81291
3 changed files with 80 additions and 25 deletions
--- a/databaseReset.py
+++ b/databaseReset.py
@ -168,8 +168,8 @@ class JobQueue():
        self.results = {}
        self.results_order=[
            "date","runlabel","reinit", "caves", "people",
-            "logbooks", "scans", "QMs", "survexblks",
-            "tunnel", "surveyimgs", "test", "dirsredirect", "syncuser", "survexpos" ]
+            "logbooks", "QMs", "survexblks", "survexpos",
+            "tunnel", "scans", "surveyimgs", "test", "dirsredirect", "syncuser"  ]
        for k in self.results_order:
            self.results[k]=[]
        self.tfile = "import_profile.json"
@ -197,10 +197,15 @@ class JobQueue():
                print "FAILURE parsing JSON file %s" % (self.tfile)
                # Python bug: https://github.com/ShinNoNoir/twitterwebsearch/issues/12
            f.close()
-
+        
+        for j in self.results_order:
+            self.results[j].append(None) # append a placeholder
+        
        print "** Running job ", self.runlabel
        jobstart = time.time()
+        self.results["date"].pop()
        self.results["date"].append(jobstart)
+        self.results["runlabel"].pop()
        self.results["runlabel"].append(self.runlabel)

        for i in self.queue:
@ -208,6 +213,7 @@ class JobQueue():
            i[1]()    #  looks ugly but invokes function passed in the second item in the tuple
            duration = time.time()-start
            print "\n*- Ended \"",  i[0], "\"  %.1f seconds" % duration
+            self.results[i[0]].pop()  # the null item
            self.results[i[0]].append(duration)
               
        with open(self.tfile, 'w') as f:
@ -241,9 +247,9 @@ class JobQueue():
            elif k =="test":
                break
            elif k =="date":
-                print " days ago     ",
+                print "     days ago ",
            else:
-                print '%9s (s)' % k,
+                print '%10s (s)' % k,
            percen=0
            r = self.results[k]  
            #print "min=",min
@ -286,7 +292,7 @@ def usage():
             QMs       - read in the QM csv files (older caves only)
             reinit    - clear database (delete everything) and make empty tables. Import nothing.
             scans     - the survey scans in all the wallets
-             survex    - read in the survex files - all the survex blocks and the x/y/z positions
+             survex    - read in the survex files - all the survex blocks but not the x/y/z positions
             survexpos - just the x/y/z Pos out of the survex files 

             tunnel    - read in the Tunnel files - which scans the survey scans too
@ -326,9 +332,8 @@ if __name__ == "__main__":
        jq.enq("reinit",reinit_db)
        jq.enq("dirsredirect",dirsredirect)
        jq.enq("caves",import_caves)
-        jq.enq("people",import_people)
-        jq.enq("survex",import_survexblks)
-        #jq.enq("logbooks",import_logbooks)
+        jq.enq("survexblks",import_survexblks)
+        jq.enq("survexpos",import_survexpos)
    elif "caves" in sys.argv:
        jq.enq("caves",import_caves)
    elif "logbooks" in sys.argv:
--- a/parsers/QMs.py
+++ b/parsers/QMs.py
@ -77,7 +77,7 @@ def parseCaveQMs(cave,inputFile):
                    
            except QM.DoesNotExist:         #if there is no pre-existing QM, save the new one
                newQM.save() 
-                print("QM "+str(newQM) + ' added to database\r')
+                # print("QM "+str(newQM) + ' added to database\r')
                
        except KeyError: #check on this one
            continue
--- a/parsers/survex.py
+++ b/parsers/survex.py
@ -232,7 +232,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
                if cave:
                    survexfile.cave = cave
            else:
-                print('No match for %s' % includepath)
+                print('    - No match (i) for %s' % includepath)
            includesurvexfile = models.SurvexFile(path=includepath)
            includesurvexfile.save()
            includesurvexfile.SetDirectory()
@ -253,7 +253,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
                    if cave:
                        survexfile.cave = cave
                else:
-                    print('No match for %s' % newsvxpath)
+                    print('    - No match (b) for %s' % newsvxpath)

                name = line.lower()
                print('   - Begin found for: ' + name)
@ -391,21 +391,71 @@ def LoadPos():
    and if we do, then save the x/y/z coordinates.
    If we don't have it in the database, print an error message and discard it.
    """
-    print(' - Generating a list of Pos and then loading them....')
+    topdata = settings.SURVEX_DATA + settings.SURVEX_TOPNAME
+    print(' - Generating a list of Pos from %s.svx and then loading...' % (topdata))
+    
+    # Be careful with the cache file. 
+    # If LoadPos has been run before, 
+    # but without cave import being run before,
+    # then *everything* may be in the fresh  'not found' cache file. 
+    
+    cachefile = settings.SURVEX_DATA + "posnotfound"
+    notfoundbefore = {}
+    if os.path.isfile(cachefile):
+        updtsvx = os.path.getmtime(topdata + ".svx")
+        updtcache = os.path.getmtime(cachefile)
+        age = updtcache - updtsvx
+        print('   svx: %s    cache: %s    cache age: %s' % (updtsvx, updtcache, age ))
+        if age < 0 :
+            print "   cache is stale."
+            os.remove(cachefile)
+        else:
+            print "   cache is fresh."
+            try:
+                f = open(cachefile, "r")
+                for line in f:
+                    notfoundbefore[line] +=1 # should not be duplicates
+            except:
+                print "   FAILURE READ opening cache file %s" % (cachefile)
+            f.close()

-    call([settings.CAVERN, "--output=%s%s.3d" % (settings.SURVEX_DATA, settings.SURVEX_TOPNAME), "%s%s.svx" % (settings.SURVEX_DATA, settings.SURVEX_TOPNAME)])
-    call([settings.THREEDTOPOS, '%s%s.3d' % (settings.SURVEX_DATA, settings.SURVEX_TOPNAME)], cwd = settings.SURVEX_DATA)
-    posfile = open("%s%s.pos" % (settings.SURVEX_DATA, settings.SURVEX_TOPNAME))
+    
+    notfoundnow =[]
+    found = 0
+    skip = {}
+    print "\n" # extra line because cavern overwrites the text buffer somehow
+    # cavern defaults to using same cwd as supplied input file
+    call([settings.CAVERN, "--output=%s.3d" % (topdata), "%s.svx" % (topdata)])
+    call([settings.THREEDTOPOS, '%s.3d' % (topdata)], cwd = settings.SURVEX_DATA)
+    posfile = open("%s.pos" % (topdata))
    posfile.readline() #Drop header
    for line in posfile.readlines():
        r = poslineregex.match(line)
        if r:
-            x, y, z, name = r.groups()
-            try:
-                ss = models.SurvexStation.objects.lookup(name)
-                ss.x = float(x)
-                ss.y = float(y)
-                ss.z = float(z) 
-                ss.save()
-            except:
-                print "%s in %s.pos not found in lookup of SurvexStation.objects" % (name, settings.SURVEX_TOPNAME)
+            x, y, z, name = r.groups() # easting, northing, altitude
+            if name in notfoundbefore:
+                skip[name] += 1
+            else:
+                try:
+                    ss = models.SurvexStation.objects.lookup(name)
+                    ss.x = float(x)
+                    ss.y = float(y)
+                    ss.z = float(z) 
+                    ss.save()
+                    found += 1
+                except:
+                    #print "%s in %s.pos not found in lookup of SurvexStation.objects" % (name, settings.SURVEX_TOPNAME)
+                    notfoundnow.append(name)
+    print " - %s stations NOT found in lookup of SurvexStation.objects. %s found. %s skipper." % (len(notfoundnow),found, skip)
+
+    if found > 10: # i.e. a previous cave import has been done
+        try:
+            with open(cachefile, "w") as f:
+                print "   cache file opened"
+                for i in notfoundnow:
+                    f.write("%s\n" % i)
+                for j in skip:
+                    f.write("%s\n" % j) # NB skip not notfoundbefore
+        except:
+            print "   FAILURE WRITE opening cache file %s" % (cachefile)
+