2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2025-01-19 01:12:32 +00:00

Understanding and speeding up LoadPos

This commit is contained in:
Philip Sargent 2020-04-28 18:26:08 +01:00
parent 4be8c81291
commit b4c0c4d219
3 changed files with 80 additions and 25 deletions

View File

@ -168,8 +168,8 @@ class JobQueue():
self.results = {}
self.results_order=[
"date","runlabel","reinit", "caves", "people",
"logbooks", "scans", "QMs", "survexblks",
"tunnel", "surveyimgs", "test", "dirsredirect", "syncuser", "survexpos" ]
"logbooks", "QMs", "survexblks", "survexpos",
"tunnel", "scans", "surveyimgs", "test", "dirsredirect", "syncuser" ]
for k in self.results_order:
self.results[k]=[]
self.tfile = "import_profile.json"
@ -197,10 +197,15 @@ class JobQueue():
print "FAILURE parsing JSON file %s" % (self.tfile)
# Python bug: https://github.com/ShinNoNoir/twitterwebsearch/issues/12
f.close()
for j in self.results_order:
self.results[j].append(None) # append a placeholder
print "** Running job ", self.runlabel
jobstart = time.time()
self.results["date"].pop()
self.results["date"].append(jobstart)
self.results["runlabel"].pop()
self.results["runlabel"].append(self.runlabel)
for i in self.queue:
@ -208,6 +213,7 @@ class JobQueue():
i[1]() # looks ugly but invokes function passed in the second item in the tuple
duration = time.time()-start
print "\n*- Ended \"", i[0], "\" %.1f seconds" % duration
self.results[i[0]].pop() # the null item
self.results[i[0]].append(duration)
with open(self.tfile, 'w') as f:
@ -241,9 +247,9 @@ class JobQueue():
elif k =="test":
break
elif k =="date":
print " days ago ",
print " days ago ",
else:
print '%9s (s)' % k,
print '%10s (s)' % k,
percen=0
r = self.results[k]
#print "min=",min
@ -286,7 +292,7 @@ def usage():
QMs - read in the QM csv files (older caves only)
reinit - clear database (delete everything) and make empty tables. Import nothing.
scans - the survey scans in all the wallets
survex - read in the survex files - all the survex blocks and the x/y/z positions
survex - read in the survex files - all the survex blocks but not the x/y/z positions
survexpos - just the x/y/z Pos out of the survex files
tunnel - read in the Tunnel files - which scans the survey scans too
@ -326,9 +332,8 @@ if __name__ == "__main__":
jq.enq("reinit",reinit_db)
jq.enq("dirsredirect",dirsredirect)
jq.enq("caves",import_caves)
jq.enq("people",import_people)
jq.enq("survex",import_survexblks)
#jq.enq("logbooks",import_logbooks)
jq.enq("survexblks",import_survexblks)
jq.enq("survexpos",import_survexpos)
elif "caves" in sys.argv:
jq.enq("caves",import_caves)
elif "logbooks" in sys.argv:

View File

@ -77,7 +77,7 @@ def parseCaveQMs(cave,inputFile):
except QM.DoesNotExist: #if there is no pre-existing QM, save the new one
newQM.save()
print("QM "+str(newQM) + ' added to database\r')
# print("QM "+str(newQM) + ' added to database\r')
except KeyError: #check on this one
continue

View File

@ -232,7 +232,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
if cave:
survexfile.cave = cave
else:
print('No match for %s' % includepath)
print(' - No match (i) for %s' % includepath)
includesurvexfile = models.SurvexFile(path=includepath)
includesurvexfile.save()
includesurvexfile.SetDirectory()
@ -253,7 +253,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
if cave:
survexfile.cave = cave
else:
print('No match for %s' % newsvxpath)
print(' - No match (b) for %s' % newsvxpath)
name = line.lower()
print(' - Begin found for: ' + name)
@ -391,21 +391,71 @@ def LoadPos():
and if we do, then save the x/y/z coordinates.
If we don't have it in the database, print an error message and discard it.
"""
print(' - Generating a list of Pos and then loading them....')
topdata = settings.SURVEX_DATA + settings.SURVEX_TOPNAME
print(' - Generating a list of Pos from %s.svx and then loading...' % (topdata))
# Be careful with the cache file.
# If LoadPos has been run before,
# but without cave import being run before,
# then *everything* may be in the fresh 'not found' cache file.
cachefile = settings.SURVEX_DATA + "posnotfound"
notfoundbefore = {}
if os.path.isfile(cachefile):
updtsvx = os.path.getmtime(topdata + ".svx")
updtcache = os.path.getmtime(cachefile)
age = updtcache - updtsvx
print(' svx: %s cache: %s cache age: %s' % (updtsvx, updtcache, age ))
if age < 0 :
print " cache is stale."
os.remove(cachefile)
else:
print " cache is fresh."
try:
f = open(cachefile, "r")
for line in f:
notfoundbefore[line] +=1 # should not be duplicates
except:
print " FAILURE READ opening cache file %s" % (cachefile)
f.close()
call([settings.CAVERN, "--output=%s%s.3d" % (settings.SURVEX_DATA, settings.SURVEX_TOPNAME), "%s%s.svx" % (settings.SURVEX_DATA, settings.SURVEX_TOPNAME)])
call([settings.THREEDTOPOS, '%s%s.3d' % (settings.SURVEX_DATA, settings.SURVEX_TOPNAME)], cwd = settings.SURVEX_DATA)
posfile = open("%s%s.pos" % (settings.SURVEX_DATA, settings.SURVEX_TOPNAME))
notfoundnow =[]
found = 0
skip = {}
print "\n" # extra line because cavern overwrites the text buffer somehow
# cavern defaults to using same cwd as supplied input file
call([settings.CAVERN, "--output=%s.3d" % (topdata), "%s.svx" % (topdata)])
call([settings.THREEDTOPOS, '%s.3d' % (topdata)], cwd = settings.SURVEX_DATA)
posfile = open("%s.pos" % (topdata))
posfile.readline() #Drop header
for line in posfile.readlines():
r = poslineregex.match(line)
if r:
x, y, z, name = r.groups()
try:
ss = models.SurvexStation.objects.lookup(name)
ss.x = float(x)
ss.y = float(y)
ss.z = float(z)
ss.save()
except:
print "%s in %s.pos not found in lookup of SurvexStation.objects" % (name, settings.SURVEX_TOPNAME)
x, y, z, name = r.groups() # easting, northing, altitude
if name in notfoundbefore:
skip[name] += 1
else:
try:
ss = models.SurvexStation.objects.lookup(name)
ss.x = float(x)
ss.y = float(y)
ss.z = float(z)
ss.save()
found += 1
except:
#print "%s in %s.pos not found in lookup of SurvexStation.objects" % (name, settings.SURVEX_TOPNAME)
notfoundnow.append(name)
print " - %s stations NOT found in lookup of SurvexStation.objects. %s found. %s skipper." % (len(notfoundnow),found, skip)
if found > 10: # i.e. a previous cave import has been done
try:
with open(cachefile, "w") as f:
print " cache file opened"
for i in notfoundnow:
f.write("%s\n" % i)
for j in skip:
f.write("%s\n" % j) # NB skip not notfoundbefore
except:
print " FAILURE WRITE opening cache file %s" % (cachefile)