diff --git a/databaseReset.py b/databaseReset.py index 2400048..43d5e04 100644 --- a/databaseReset.py +++ b/databaseReset.py @@ -108,6 +108,7 @@ def import_tunnelfiles(): parsers.surveys.LoadTunnelFiles() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +# These functions moved to a different file - not used currently. #import logbooksdump #def import_auto_logbooks(): #def dumplogbooks(): @@ -172,7 +173,7 @@ class JobQueue(): return True def runqonce(self): - """Run all the jobs in the queue provided once + """Run all the jobs in the queue provided - once """ print "** Running job ", self.runlabel @@ -199,6 +200,12 @@ class JobQueue(): def run(self): + """First runs all the jobs in the queue against a scratch in-memory db + then re-runs the import against the db specified in settings.py + Default behaviour is to skip the in-memory phase. + When MySQL is the db the in-memory phase crashes as MySQL does not properly + relinquish some kind of db connection (not fixed yet) + """ self.loadprofiles() # save db settings for later dbengine = settings.DATABASES['default']['ENGINE'] @@ -214,15 +221,15 @@ class JobQueue(): else: skipmem = True + print "-- ", settings.DATABASES['default']['NAME'], settings.DATABASES['default']['ENGINE'] + #print "-- DATABASES.default", settings.DATABASES['default'] + if dbname ==":memory:": # just run, and save the sql file - print "-- ", settings.DATABASES['default']['NAME'], settings.DATABASES['default']['ENGINE'] - print "-- DATABASES.default", settings.DATABASES['default'] self.runqonce() - self.memdumpsql() + self.memdumpsql() # saved contents of scratch db, could be imported later.. self.saveprofiles() elif skipmem: - print "-- DATABASES.default", settings.DATABASES['default'] self.runqonce() self.saveprofiles() else: @@ -248,9 +255,9 @@ class JobQueue(): print "-- DATABASES.default", settings.DATABASES['default'] # but because the user may be expecting to add this to a db with lots of tables already there, - # the jobque may not start from scratch so we need to initialise the db properly first + # the jobqueue may not start from scratch so we need to initialise the db properly first # because we are using an empty :memory: database - # But initiating twice crashes, so be sure to do it once only. + # But initiating twice crashes it; so be sure to do it once only. # Damn. syncdb() is still calling MySQL somehow **conn_params not sqlite3. So crashes on expo server. @@ -259,9 +266,9 @@ class JobQueue(): if ("dirsredirect",dirsredirect) not in self.queue: dirsredirect() if ("caves",import_caves) not in self.queue: - import_caves() # sometime extract the initialising code from this and put in reinit + import_caves() # sometime extract the initialising code from this and put in reinit... if ("people",import_people) not in self.queue: - import_people() # sometime extract the initialising code from this and put in reinit + import_people() # sometime extract the initialising code from this and put in reinit... django.db.close_old_connections() # maybe not needed here @@ -284,7 +291,6 @@ class JobQueue(): django.db.close_old_connections() # magic rune. works. found by looking in django.db__init__.py #django.setup() # should this be needed? - self.runqonce() # crashes because it thinks it has no migrations to apply, when it does. self.saveprofiles() @@ -292,7 +298,8 @@ class JobQueue(): return True def showprofile(self): - """Prints out the time it took to run the jobqueue""" + """Prints out the time it took to run the jobqueue + """ for k in self.results_order: if k =="dirsredirect": break @@ -306,7 +313,6 @@ class JobQueue(): print '%10s (s)' % k, percen=0 r = self.results[k] - #print "min=",min for i in range(len(r)): if k == "runlabel": @@ -432,9 +438,9 @@ if __name__ == "__main__": #parse_descriptions() # no longer present # elif "writeCaves" in sys.argv: # writeCaves() # no longer present - elif "autologbooks" in sys.argv: + elif "autologbooks" in sys.argv: # untested in 2020 import_auto_logbooks() - elif "dumplogbooks" in sys.argv: + elif "dumplogbooks" in sys.argv: # untested in 2020 dumplogbooks() else: usage() diff --git a/parsers/survex.py b/parsers/survex.py index 42a8a00..e9421c5 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -14,6 +14,10 @@ import time from datetime import datetime, timedelta import sys +"""A 'survex block' is a *begin...*end set of cave data. +A 'survexscansfolder' is what we today call a "survey scans folder" or a "wallet". +""" + line_leg_regex = re.compile(r"[\d\-+.]+$") def LoadSurvexLineLeg(survexblock, stardata, sline, comment, cave): @@ -99,19 +103,37 @@ regex_comment = re.compile(r"([^;]*?)\s*(?:;\s*(.*))?\n?$") regex_ref = re.compile(r'.*?ref.*?(\d+)\s*#\s*(X)?\s*(\d+)') regex_star = re.compile(r'\s*\*[\s,]*(\w+)\s*(.*?)\s*(?:;.*)?$') # years from 1960 to 2039 -regex_starref = re.compile(r'^\s*\*ref[\s.:]*((?:19[6789]\d)|(?:20[0123]\d))\s*#?\s*(X)?\s*(.*?\d+.*?)$(?i)') +regex_starref = re.compile(r'^\s*\*ref[\s.:]*((?:19[6789]\d)|(?:20[0123]\d))\s*#?\s*(X)?\s*(.*?\d+.*?)$(?i)') +# regex_starref = re.compile("""?x # VERBOSE mode - can't get this to work +# ^\s*\*ref # look for *ref at start of line +# [\s.:]* # some spaces, stops or colons +# ((?:19[6789]\d)|(?:20[0123]\d)) # a date from 1960 to 2039 - captured as one field +# \s*# # spaces then hash separator +# ?\s*(X) # optional X - captured +# ?\s*(.*?\d+.*?) # maybe a space, then at least one digit in the string - captured +# $(?i)""", re.X) # the end (do the whole thing case insensitively) + + regex_team = re.compile(r"(Insts|Notes|Tape|Dog|Useless|Pics|Helper|Disto|Consultant)\s+(.*)$(?i)") regex_team_member = re.compile(r" and | / |, | & | \+ |^both$|^none$(?i)") regex_qm = re.compile(r'^\s*QM(\d)\s+?([a-dA-DxX])\s+([\w\-]+)\.(\d+)\s+(([\w\-]+)\.(\d+)|\-)\s+(.+)$') +insp = "" + def RecursiveLoad(survexblock, survexfile, fin, textlines): + """Follows the *include links in all the survex files from the root file 1623.svx + and reads in the survex blocks, other data and the wallet references (survexscansfolder) as it + goes. This part of the data import process is where the maximum memory is used and where it + crashes on memory-constrained machines. + """ iblankbegins = 0 text = [ ] stardata = stardatadefault teammembers = [ ] + global insp # uncomment to print out all files during parsing - print(" - Reading file: " + survexblock.survexfile.path) + print(insp+" - Reading file: " + survexblock.survexfile.path + " <> " + survexfile.path) stamp = datetime.now() lineno = 0 @@ -119,28 +141,28 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", survexblock.survexfile.path) if path_match: pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2)) - # print('Match') - # print(pos_cave) + # print(insp+'Match') + # print(insp+os_cave) cave = models.getCaveByReference(pos_cave) if cave: survexfile.cave = cave svxlines = '' svxlines = fin.read().splitlines() - # print('Cave - preloop ' + str(survexfile.cave)) - # print(survexblock) + # print(insp+'Cave - preloop ' + str(survexfile.cave)) + # print(insp+survexblock) for svxline in svxlines: - # print(survexblock) + # print(insp+survexblock) - # print(svxline) + # print(insp+svxline) # if not svxline: - # print(' - Not survex') + # print(insp+' - Not survex') # return # textlines.append(svxline) lineno += 1 - # print(' - Line: %d' % lineno) + # print(insp+' - Line: %d' % lineno) # break the line at the comment sline, comment = regex_comment.match(svxline.strip()).groups() @@ -155,15 +177,15 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): if len(wallet)<2: wallet = "0" + wallet refscan = "%s#%s%s" % (yr, letterx, wallet ) - #print(' - Wallet ;ref - %s - looking for survexscansfolder' % refscan) + #print(insp+' - Wallet ;ref - %s - looking for survexscansfolder' % refscan) survexscansfolders = models.SurvexScansFolder.objects.filter(walletname=refscan) if survexscansfolders: survexblock.survexscansfolder = survexscansfolders[0] #survexblock.refscandir = "%s/%s%%23%s" % (mref.group(1), mref.group(1), mref.group(2)) survexblock.save() - # print(' - Wallet ; ref - %s - found in survexscansfolders' % refscan) + # print(insp+' - Wallet ; ref - %s - found in survexscansfolders' % refscan) else: - print(' - Wallet ; ref - %s - NOT found in survexscansfolders %s-%s-%s' % (refscan,yr,letterx,wallet)) + print(insp+' - Wallet ; ref - %s - NOT found in survexscansfolders %s-%s-%s' % (refscan,yr,letterx,wallet)) # This whole section should be moved if we can have *QM become a proper survex command # Spec of QM in SVX files, currently commented out need to add to survex @@ -173,7 +195,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): # ;QM1 a hobnob_hallway_2.42 - junction of keyhole passage qmline = comment and regex_qm.match(comment) if qmline: - # print(qmline.groups()) + # print(insp+qmline.groups()) #(u'1', u'B', u'miraclemaze', u'1.17', u'-', None, u'\tcontinuation of rift') qm_no = qmline.group(1) qm_grade = qmline.group(2) @@ -183,34 +205,34 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): qm_resolve_station = qmline.group(7) qm_notes = qmline.group(8) - # print('Cave - %s' % survexfile.cave) - # print('QM no %d' % int(qm_no)) - # print('QM grade %s' % qm_grade) - # print('QM section %s' % qm_from_section) - # print('QM station %s' % qm_from_station) - # print('QM res section %s' % qm_resolve_section) - # print('QM res station %s' % qm_resolve_station) - # print('QM notes %s' % qm_notes) + # print(insp+'Cave - %s' % survexfile.cave) + # print(insp+'QM no %d' % int(qm_no)) + # print(insp+'QM grade %s' % qm_grade) + # print(insp+'QM section %s' % qm_from_section) + # print(insp+'QM station %s' % qm_from_station) + # print(insp+'QM res section %s' % qm_resolve_section) + # print(insp+'QM res station %s' % qm_resolve_station) + # print(insp+'QM notes %s' % qm_notes) # If the QM isn't resolved (has a resolving station) then load it if not qm_resolve_section or qm_resolve_section is not '-' or qm_resolve_section is not 'None': from_section = models.SurvexBlock.objects.filter(name=qm_from_section) # If we can find a section (survex note chunck, named) if len(from_section) > 0: - # print(from_section[0]) + # print(insp+from_section[0]) from_station = models.SurvexStation.objects.filter(block=from_section[0], name=qm_from_station) # If we can find a from station then we have the nearest station and can import it if len(from_station) > 0: - # print(from_station[0]) + # print(insp+from_station[0]) qm = models.QM.objects.create(number=qm_no, nearest_station=from_station[0], grade=qm_grade.upper(), location_description=qm_notes) else: - # print(' - QM found but resolved') + # print(insp+' - QM found but resolved') pass - #print('Cave -sline ' + str(cave)) + #print(insp+'Cave -sline ' + str(cave)) if not sline: continue @@ -231,24 +253,24 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): if survexscansfolders: survexblock.survexscansfolder = survexscansfolders[0] survexblock.save() - # print(' - Wallet *REF - %s - found in survexscansfolders' % refscan) + # print(insp+' - Wallet *REF - %s - found in survexscansfolders' % refscan) else: - print(' - Wallet *REF - %s - NOT found in survexscansfolders %s-%s-%s' % (refscan,yr,letterx,wallet)) + print(insp+' - Wallet *REF - %s - NOT found in survexscansfolders %s-%s-%s' % (refscan,yr,letterx,wallet)) continue # detect the star command mstar = regex_star.match(sline) if not mstar: if "from" in stardata: - # print('Cave ' + str(survexfile.cave)) - # print(survexblock) + # print(insp+'Cave ' + str(survexfile.cave)) + # print(insp+survexblock) LoadSurvexLineLeg(survexblock, stardata, sline, comment, survexfile.cave) - # print(' - From: ') - # print(stardata) + # print(insp+' - From: ') + # print(insp+stardata) pass elif stardata["type"] == "passage": LoadSurvexLinePassage(survexblock, stardata, sline, comment) - # print(' - Passage: ') + # print(insp+' - Passage: ') #Missing "station" in stardata. continue @@ -257,24 +279,26 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): cmd = cmd.lower() if re.match("include$(?i)", cmd): includepath = os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", line)) - print(' - Include path found including - ' + includepath) + print(insp+' - Include path found including - ' + includepath) # Try to find the cave in the DB if not use the string as before path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", includepath) if path_match: pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2)) - # print(pos_cave) + # print(insp+pos_cave) cave = models.getCaveByReference(pos_cave) if cave: survexfile.cave = cave else: - print(' - No match in DB (i) for %s, so loading..' % includepath) + print(insp+' - No match in DB (i) for %s, so loading..' % includepath) includesurvexfile = models.SurvexFile(path=includepath) includesurvexfile.save() includesurvexfile.SetDirectory() if includesurvexfile.exists(): survexblock.save() fininclude = includesurvexfile.OpenFile() + insp += "> " RecursiveLoad(survexblock, includesurvexfile, fininclude, textlines) + insp = insp[2:] elif re.match("begin$(?i)", cmd): if line: @@ -283,23 +307,25 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", newsvxpath) if path_match: pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2)) - # print(pos_cave) + # print(insp+pos_cave) cave = models.getCaveByReference(pos_cave) if cave: survexfile.cave = cave else: - print(' - No match (b) for %s' % newsvxpath) + print(insp+' - No match (b) for %s' % newsvxpath) name = line.lower() - print(' - Begin found for: ' + name) - # print('Block cave: ' + str(survexfile.cave)) + print(insp+' - Begin found for: ' + name) + # print(insp+'Block cave: ' + str(survexfile.cave)) survexblockdown = models.SurvexBlock(name=name, begin_char=fin.tell(), parent=survexblock, survexpath=survexblock.survexpath+"."+name, cave=survexfile.cave, survexfile=survexfile, totalleglength=0.0) survexblockdown.save() survexblock.save() survexblock = survexblockdown - # print(survexblockdown) + # print(insp+survexblockdown) textlinesdown = [ ] + insp += "> " RecursiveLoad(survexblockdown, survexfile, fin, textlinesdown) + insp = insp[2:] else: iblankbegins += 1 @@ -309,15 +335,15 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): else: survexblock.text = "".join(textlines) survexblock.save() - # print(' - End found: ') + # print(insp+' - End found: ') endstamp = datetime.now() timetaken = endstamp - stamp - # print(' - Time to process: ' + str(timetaken)) + # print(insp+' - Time to process: ' + str(timetaken)) return elif re.match("date$(?i)", cmd): if len(line) == 10: - #print(' - Date found: ' + line) + #print(insp+' - Date found: ' + line) survexblock.date = make_aware(datetime.strptime(re.sub(r"\.", "-", line), '%Y-%m-%d'), get_current_timezone()) expeditions = models.Expedition.objects.filter(year=line[:4]) if expeditions: @@ -328,7 +354,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): elif re.match("team$(?i)", cmd): pass - # print(' - Team found: ') + # print(insp+' - Team found: ') mteammember = regex_team.match(line) if mteammember: for tm in regex_team_member.split(mteammember.group(2)): @@ -343,7 +369,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): personrole.save() elif cmd == "title": - #print(' - Title found: ') + #print(insp+' - Title found: ') survextitle = models.SurvexTitle(survexblock=survexblock, title=line.strip('"'), cave=survexfile.cave) survextitle.save() pass @@ -353,11 +379,11 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): pass elif cmd == "data": - #print(' - Data found: ') + #print(insp+' - Data found: ') ls = line.lower().split() stardata = { "type":ls[0] } - #print(' - Star data: ', stardata) - #print(ls) + #print(insp+' - Star data: ', stardata) + #print(insp+ls) for i in range(0, len(ls)): stardata[stardataparamconvert.get(ls[i], ls[i])] = i - 1 if ls[0] in ["normal", "cartesian", "nosurvey"]: @@ -368,21 +394,21 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): assert ls[0] == "passage", line elif cmd == "equate": - #print(' - Equate found: ') + #print(insp+' - Equate found: ') LoadSurvexEquate(survexblock, line) elif cmd == "fix": - #print(' - Fix found: ') + #print(insp+' - Fix found: ') survexblock.MakeSurvexStation(line.split()[0]) else: - #print(' - Stuff') + #print(insp+' - Stuff') if cmd not in ["sd", "include", "units", "entrance", "data", "flags", "title", "export", "instrument", "calibrate", "set", "infer", "alias", "cs", "declination", "case"]: - print("Unrecognised command in line:", cmd, line, survexblock, survexblock.survexfile.path) + print(insp+"Unrecognised command in line:", cmd, line, survexblock, survexblock.survexfile.path) endstamp = datetime.now() timetaken = endstamp - stamp - # print(' - Time to process: ' + str(timetaken)) + # print(insp+' - Time to process: ' + str(timetaken)) def LoadAllSurvexBlocks(): @@ -448,7 +474,7 @@ def LoadPos(): notfoundbefore = {} if os.path.isfile(cachefile): # this is not a good test. 1623.svx may never change but *included files may have done. - # When the *include is unrolled, we will have a proper timestamp to use + # When the *include is unrolled, we will be able to get a proper timestamp to use # and can increase the timeout from 3 days to 30 days. updtsvx = os.path.getmtime(topdata + ".svx") updtcache = os.path.getmtime(cachefile) diff --git a/templates/survexscansfolders.html b/templates/survexscansfolders.html index 6250897..05c65ba 100644 --- a/templates/survexscansfolders.html +++ b/templates/survexscansfolders.html @@ -2,11 +2,15 @@ {% load wiki_markup %} {% load survex_markup %} -{% block title %}All Survey scans folders{% endblock %} +{% block title %}All Survey scans folders (wallets){% endblock %} {% block content %} -
Each wallet contains the scanned original in-cave survey notes and sketches of +plans and elevations. It also contains scans of centre-line survex output on which +hand-drawn passage sections are drawn. These hand-drawn passages will eventually be +traced to produce Tunnel or Therion drawings and eventually the final complete cave survey.
Scans folder | Files | Survex blocks |
---|