From b461b87df646a705fd37afa41afff231d3e0d170 Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Fri, 5 Nov 2021 22:59:54 +0200 Subject: [PATCH] fix unneeded runs of survex on survex mport --- .gitignore | 1 + parsers/survex.py | 73 ++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 60 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index c3ec8d4..742d605 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ syntax: glob *.pyc *.sql *.sqlite +*.prof *~ .idea/* .swp diff --git a/parsers/survex.py b/parsers/survex.py index 569d091..8b379ac 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -152,12 +152,14 @@ class LoadingSurvex(): includestack = [] stacksvxfiles = [] svxfileslist = [] - svxdirs = {} + svxdirs = {} + uniquename = {} expos = {} survexdict = {} # each key is a directory, and its value is a list of files lineno = 0 insp = "" callcount = 0 + caverncount = 0 ignoreprefix = ["surface", "kataster", "fixedpts", "gpx"] ignorenoncave = ["caves-1623", "caves-1623/2007-neu"] includedfilename ="" @@ -255,12 +257,15 @@ class LoadingSurvex(): def LoadSurvexDate(self, survexblock, line): # we should make this a date RANGE for everything + def findexpedition(year): + return Expedition.objects.filter(year=year) + def setdate(year): # cacheing to save DB query on every block and to prepare for django-less troggle in future if year in self.expos: expo = self.expos[year] else: - expeditions = Expedition.objects.filter(year=year) + expeditions = findexpedition(year) if len(expeditions) != 1 : message = f"! More than one expedition in year {year} '{line}' ({survexblock}) {survexblock.survexfile.path}" print((self.insp+message)) @@ -1077,6 +1082,7 @@ class LoadingSurvex(): fullpath = os.path.join(settings.SURVEX_DATA, includepath + ".svx") self.RunSurvexIfNeeded(os.path.join(settings.SURVEX_DATA, includepath)) + self.checkUniqueness(os.path.join(settings.SURVEX_DATA, includepath)) if os.path.isfile(fullpath): #-------------------------------------------------------- self.depthinclude += 1 @@ -1135,6 +1141,18 @@ class LoadingSurvex(): flinear.write(" {:2} {} *title {}\n".format(self.depthbegin, depth, args)) pass + + def checkUniqueness(self,fullpath): + fn = Path(fullpath).name + if fn not in self.uniquename: + self.uniquename[fn] = 1 + else: + self.uniquename[fn] += 1 + message = f" ! NON-UNIQUE survex filename, overwriting .3d file in expowebcache '{fn}' - '{fullpath}' #{self.uniquename[fn]}" + print(message) + DataIssue.objects.create(parser='survex', message=message) + + def RunSurvexIfNeeded(self,fullpath): now = time.time() cav_t = now - 365*24*3600 @@ -1142,12 +1160,20 @@ class LoadingSurvex(): svx_t = now - 365*24*3600 def runcavern(): - # print(" - Regenerating stale (or chaos-monkeyed) cavern .log and .3d for '{}'\n days svx old: {:.1f} cav:{:.1f} log old: {:.1f}". - # format(fullpath, (svx_t - log_t)/(24*3600), (cav_t - log_t)/(24*3600), (now - log_t)/(24*3600))) + '''This assumes all survex files have unique names and they are taken from many folders but the output is all put + into the same folder. A serius potential bug. We should check uniquness + ''' + print(" - Regenerating stale (or chaos-monkeyed) cavern .log and .3d for '{}'\n at '{}'\n days svx old: {:.1f} cav:{:.1f} log old: {:.1f}".format(fullpath, logpath, (svx_t - log_t)/(24*3600), (cav_t - log_t)/(24*3600), (now - log_t)/(24*3600))) + #print(f' - cav_t: {cav_t/(24*3600)} - log_t: {log_t/(24*3600)} - svx_t: {svx_t/(24*3600)} - now: {now}') subprocess.call([settings.CAVERN, "--log", "--output={}".format(settings.THREEDCACHEDIR), "{}.svx".format(fullpath)]) + self.caverncount += 1 + + # should also collect all the .err files too and create a DataIssue for each one which + # - is nonzero in size + # - has Error greater than 5% anywhere, or some other more serious error svxpath = fullpath + ".svx" - logpath = fullpath + ".log" + logpath = Path(settings.THREEDCACHEDIR) / str(Path(fullpath).name + ".log") if not os.path.isfile(logpath): runcavern() @@ -1162,7 +1188,7 @@ class LoadingSurvex(): svx_t = os.path.getmtime(svxpath) now = time.time() - if svx_t - log_t > 0: # stale, older than svx file + if svx_t - log_t > 0: # stale, svx file is newer than log runcavern() return if now - log_t > 60 *24*60*60: # >60 days, re-run anyway @@ -1171,7 +1197,7 @@ class LoadingSurvex(): if cav_t - log_t > 0: # new version of cavern runcavern() return - if chaosmonkey(200): + if chaosmonkey(400): # one in every 400 runs runcavern() def FindAndLoadSurvex(survexblockroot): @@ -1190,8 +1216,13 @@ def FindAndLoadSurvex(survexblockroot): svx_scan.callcount = 0 svx_scan.depthinclude = 0 fullpathtotop = os.path.join(survexfileroot.survexdirectory.path, survexfileroot.path) + + # Rather than do this check for the presence of the .log and .3d files synchronously here, + # we should instead run this in a separate thread asynchronously. print(" - RunSurvexIfNeeded cavern on '{}'".format(fullpathtotop), file=sys.stderr) svx_scan.RunSurvexIfNeeded(fullpathtotop) + svx_scan.checkUniqueness(fullpathtotop) + indent="" fcollate = open(collatefilename, 'w') @@ -1204,9 +1235,20 @@ def FindAndLoadSurvex(survexblockroot): finroot = survexfileroot.OpenFile() fcollate.write(";*include {}\n".format(survexfileroot.path)) flinear.write("{:2} {} *include {}\n".format(svx_scan.depthinclude, indent, survexfileroot.path)) + + import cProfile, pstats + from pstats import SortKey + pr = cProfile.Profile() + pr.enable() #---------------------------------------------------------------- svx_scan.PushdownStackScan(survexblockroot, survexfileroot.path, finroot, flinear, fcollate) #---------------------------------------------------------------- + pr.disable() + with open('PushdownStackScan.prof', 'w') as f: + ps = pstats.Stats(pr, stream=f) + ps.sort_stats(SortKey.CUMULATIVE) + ps.print_stats() + flinear.write("{:2} {} *edulcni {}\n".format(svx_scan.depthinclude, indent, survexfileroot.path)) fcollate.write(";*edulcni {}\n".format(survexfileroot.path)) mem1 = get_process_memory() @@ -1216,9 +1258,11 @@ def FindAndLoadSurvex(survexblockroot): flinear.write(" - {:,} survex files in linear include list \n".format(len(svxfileslist))) flinear.close() fcollate.close() - svx_scan = None # Hmm. Does this actually delete all the instance variables if they are lists, dicts etc.? + print("\n - {:,} runs of survex 'cavern' refreshing .3d files \n".format(svx_scan.caverncount),file=sys.stderr) + svx_scan = None # Hmm. Does this actually delete all the instance variables if they are lists, dicts etc.? print("\n - {:,} survex files in linear include list \n".format(len(svxfileslist)),file=sys.stderr) + mem1 = get_process_memory() print(" - MEM:{:7.2f} MB END ".format(mem0),file=sys.stderr) print(" - MEM:{:7.3f} MB USED".format(mem1-mem0),file=sys.stderr) @@ -1246,6 +1290,12 @@ def FindAndLoadSurvex(survexblockroot): print("\n - MEM:{:7.2f} MB STOP".format(mem1),file=sys.stderr) print(" - MEM:{:7.3f} MB USED".format(mem1-mem0),file=sys.stderr) + # Close the logging file, Restore sys.stdout to our old saved file handle + sys.stdout.close() + print("+", file=sys.stderr) + sys.stderr.flush(); + sys.stdout = stdout_orig + legsnumber = svx_load.legsnumber mem1 = get_process_memory() @@ -1256,11 +1306,6 @@ def FindAndLoadSurvex(survexblockroot): print(" - Number of SurvexFiles: {}".format(tf)) svx_load = None - # Close the logging file, Restore sys.stdout to our old saved file handle - sys.stdout.close() - print("+", file=sys.stderr) - sys.stderr.flush(); - sys.stdout = stdout_orig return legsnumber def MakeSurvexFileRoot(): @@ -1300,7 +1345,7 @@ def LoadSurvexBlocks(): #---------------------------------------------------------------- memend = get_process_memory() print(" - MEMORY start:{:.3f} MB end:{:.3f} MB increase={:.3f} MB".format(memstart,memend, memend-memstart)) - + survexblockroot.save() print(" - total number of survex legs: {}".format(legsnumber))