From bb69cc073a7cc26a3da6840841b6ec6eca665a51 Mon Sep 17 00:00:00 2001 From: Philip Sargent <philip.sargent@klebos.com> Date: Wed, 24 Jun 2020 14:10:13 +0100 Subject: [PATCH] start refactor survex import --- core/models.py | 6 + core/models_survex.py | 7 - core/views_statistics.py | 3 +- parsers/survex.py | 818 +++++++++++++++++++-------------------- 4 files changed, 415 insertions(+), 419 deletions(-) diff --git a/core/models.py b/core/models.py index 8e75005..f6ab815 100644 --- a/core/models.py +++ b/core/models.py @@ -3,6 +3,7 @@ import os import datetime import logging import re +import resource from subprocess import call from urllib.parse import urljoin @@ -22,6 +23,11 @@ from django.template import Context, loader import troggle.core.models_survex +def get_process_memory(): + usage=resource.getrusage(resource.RUSAGE_SELF) + return usage[2]/1024.0 + + def get_related_by_wikilinks(wiki_text): found=re.findall(settings.QM_PATTERN,wiki_text) res=[] diff --git a/core/models_survex.py b/core/models_survex.py index 9578e78..b71c27a 100644 --- a/core/models_survex.py +++ b/core/models_survex.py @@ -81,13 +81,6 @@ class SurvexStation(models.Model): else: return r -class SurvexLeg(): - """No longer a models.Model subclass, so no longer a database table - """ - tape = 0.0 - compass = 0.0 - clino = 0.0 - # # Single SurvexBlock # diff --git a/core/views_statistics.py b/core/views_statistics.py index 889eb30..5b85ef1 100644 --- a/core/views_statistics.py +++ b/core/views_statistics.py @@ -12,7 +12,7 @@ from django.views.generic.list import ListView from troggle.core.models import Expedition, Person, PersonExpedition from troggle.core.models_caves import Cave, LogbookEntry -from troggle.core.models_survex import SurvexLeg, SurvexBlock +from troggle.core.models_survex import SurvexBlock import troggle.settings as settings @@ -106,7 +106,6 @@ def stats(request): legsbyexpo.append((expedition, {"nsurvexlegs": "{:,}".format(legsyear), "survexleglength":"{:,.0f}".format(survexleglength)})) legsbyexpo.reverse() - #survexlegs = SurvexLeg.objects.all() renderDict = {**statsDict, **{ "nsurvexlegs": "{:,}".format(nimportlegs), "totalsurvexlength":totalsurvexlength/1000, "addupsurvexlength":addupsurvexlength/1000, "legsbyexpo":legsbyexpo }} # new syntax return render(request,'statistics.html', renderDict) diff --git a/parsers/survex.py b/parsers/survex.py index ebf1cb0..c4d9609 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -2,7 +2,6 @@ import sys import os import re import time -import resource from datetime import datetime, timedelta from subprocess import call, Popen, PIPE @@ -17,471 +16,467 @@ import troggle.core.models_survex as models_survex from troggle.parsers.people import GetPersonExpeditionNameLookup from troggle.core.views_caves import MapLocations - -"""A 'survex block' is a *begin...*end set of cave data. -A 'scansfolder' is what we today call a "survey scans folder" or a "wallet". -""" - -rx_braskets= re.compile(r"[()]") -rx_line_length = re.compile(r"[\d\-+.]+$") -survexlegsalllength = 0.0 -survexlegsnumber = 0 survexblockroot = None ROOTBLOCK = "rootblock" - -def LoadSurvexLineLeg(survexblock, stardata, sline, comment, cave): - """This reads compass, clino and tape data but only keeps the tape lengths, - the rest is discarded after error-checking. +class SurvexLeg(): + """No longer a models.Model subclass, so no longer a database table """ - global survexlegsalllength - global survexlegsnumber - ls = sline.lower().split() - survexleg = models_survex.SurvexLeg() - # this next fails for two surface survey svx files which use / for decimal point - # e.g. '29/09' in the tape measurement, or use decimals but in brackets, e.g. (06.05) - if stardata["type"] == "normal": - tape = rx_braskets.sub("",ls[stardata["tape"]]) - tape = tape.replace("/",".") - try: - survexleg.tape = float(tape) - survexlegsnumber += 1 - except ValueError: - print(("! Tape misread in", survexblock.survexfile.path)) - print((" Stardata:", stardata)) - print((" Line:", ls)) - message = ' ! Value Error: Tape misread in line %s in %s' % (ls, survexblock.survexfile.path) - models.DataIssue.objects.create(parser='survex', message=message) - survexleg.tape = 0 - try: - lclino = ls[stardata["clino"]] - except: - print(("! Clino misread in", survexblock.survexfile.path)) - print((" Stardata:", stardata)) - print((" Line:", ls)) - message = ' ! Value Error: Clino misread in line %s in %s' % (ls, survexblock.survexfile.path) - models.DataIssue.objects.create(parser='survex', message=message) - lclino = error - try: - lcompass = ls[stardata["compass"]] - except: - print(("! Compass misread in", survexblock.survexfile.path)) - print((" Stardata:", stardata)) - print((" Line:", ls)) - message = ' ! Value Error: Compass misread in line %s in %s' % (ls, survexblock.survexfile.path) - models.DataIssue.objects.create(parser='survex', message=message) - lcompass = error - if lclino == "up": - survexleg.compass = 0.0 - survexleg.clino = 90.0 - elif lclino == "down": - survexleg.compass = 0.0 - survexleg.clino = -90.0 - elif lclino == "-" or lclino == "level": + tape = 0.0 + compass = 0.0 + clino = 0.0 + +class LoadSurvex(): + """A 'survex block' is a *begin...*end set of cave data. + A survex file can contain many begin-end blocks, which can be nested, and which can *include + other survex files. + A 'scansfolder' is what we today call a "survey scans folder" or a "wallet". + """ + + # This interprets the survex "*data normal" command which sets out the order of the fields in the data, e.g. + # *DATA normal from to length gradient bearing ignore ignore ignore ignore + stardatadefault = {"type":"normal", "t":"leg", "from":0, "to":1, "tape":2, "compass":3, "clino":4} + stardataparamconvert = {"length":"tape", "bearing":"compass", "gradient":"clino"} + + rx_braskets= re.compile(r"[()]") + rx_linelen = re.compile(r"[\d\-+.]+$") + rx_team = re.compile(r"(?i)(Insts|Notes|Tape|Dog|Useless|Pics|Helper|Disto|Consultant)\s+(.*)$") + rx_person = re.compile(r"(?i) and | / |, | & | \+ |^both$|^none$") + rx_qm = re.compile(r'^\s*QM(\d)\s+?([a-dA-DxX])\s+([\w\-]+)\.(\d+)\s+(([\w\-]+)\.(\d+)|\-)\s+(.+)$') +# remember there is also QM_PATTERN used in views_other and set in settings.py + + rx_comment = re.compile(r"([^;]*?)\s*(?:;\s*(.*))?\n?$") + rx_ref = re.compile(r'.*?ref.*?(\d+)\s*#\s*(X)?\s*(\d+)') + rx_star = re.compile(r'\s*\*[\s,]*(\w+)\s*(.*?)\s*(?:;.*)?$') + # years from 1960 to 2039 + rx_starref = re.compile(r'(?i)^\s*\*ref[\s.:]*((?:19[6789]\d)|(?:20[0123]\d))\s*#?\s*(X)?\s*(.*?\d+.*?)$') + # rx_starref = re.compile("""?x # VERBOSE mode - can't get this to work + # ^\s*\*ref # look for *ref at start of line + # [\s.:]* # some spaces, stops or colons + # ((?:19[6789]\d)|(?:20[0123]\d)) # a date from 1960 to 2039 - captured as one field + # \s*# # spaces then hash separator + # ?\s*(X) # optional X - captured + # ?\s*(.*?\d+.*?) # maybe a space, then at least one digit in the string - captured + # $(?i)""", re.X) # the end (do the whole thing case insensitively) + + survexlegsalllength = 0.0 + survexlegsnumber = 0 + insp = "" + callcount = 0 + + def __init__(self): + pass + + def LoadSurvexLineLeg(self,survexblock, stardata, sline, comment): + """This reads compass, clino and tape data but only keeps the tape lengths, + the rest is discarded after error-checking. + """ + ls = sline.lower().split() + survexleg = SurvexLeg() + # this next fails for two surface survey svx files which use / for decimal point + # e.g. '29/09' in the tape measurement, or use decimals but in brackets, e.g. (06.05) + if stardata["type"] == "normal": + tape = self.rx_braskets.sub("",ls[stardata["tape"]]) + tape = tape.replace("/",".") try: - survexleg.compass = float(lcompass) + survexleg.tape = float(tape) + self.survexlegsnumber += 1 except ValueError: + print(("! Tape misread in", survexblock.survexfile.path)) + print((" Stardata:", stardata)) + print((" Line:", ls)) + message = ' ! Value Error: Tape misread in line %s in %s' % (ls, survexblock.survexfile.path) + models.DataIssue.objects.create(parser='survex', message=message) + survexleg.tape = 0 + try: + lclino = ls[stardata["clino"]] + except: + print(("! Clino misread in", survexblock.survexfile.path)) + print((" Stardata:", stardata)) + print((" Line:", ls)) + message = ' ! Value Error: Clino misread in line %s in %s' % (ls, survexblock.survexfile.path) + models.DataIssue.objects.create(parser='survex', message=message) + lclino = error + try: + lcompass = ls[stardata["compass"]] + except: print(("! Compass misread in", survexblock.survexfile.path)) print((" Stardata:", stardata)) print((" Line:", ls)) - message = ' ! Value Error: line %s in %s' % (ls, survexblock.survexfile.path) + message = ' ! Value Error: Compass misread in line %s in %s' % (ls, survexblock.survexfile.path) models.DataIssue.objects.create(parser='survex', message=message) - survexleg.compass = 1000 - survexleg.clino = -90.0 - else: - assert rx_line_length.match(lcompass), ls - assert rx_line_length.match(lclino) and lclino != "-", ls - survexleg.compass = float(lcompass) - survexleg.clino = float(lclino) - - if cave: - survexleg.cave = cave - - # No need to save as we are measuring lengths only on parsing now. - # delete the object so that django autosaving doesn't save it. - survexleg = None - - itape = stardata.get("tape") - if itape: - try: - survexblock.totalleglength += float(ls[itape]) - survexlegsalllength += float(ls[itape]) - except ValueError: - print("! Length not added") - # No need to save as we are measuring lengths only on parsing now. - - -# def LoadSurvexEquate(survexblock, sline): - # #print sline # - # stations = sline.split() - # assert len(stations) > 1 - # for station in stations: - # survexblock.MakeSurvexStation(station) - - -def LoadSurvexLinePassage(survexblock, stardata, sline, comment): - # do not import this: *data passage.. data which is LRUD not tape/compass/clino - pass - -# This interprets the survex "*data normal" command which sets out the order of the fields in the data, e.g. -# *DATA normal from to length gradient bearing ignore ignore ignore ignore -stardatadefault = {"type":"normal", "t":"leg", "from":0, "to":1, "tape":2, "compass":3, "clino":4} -stardataparamconvert = {"length":"tape", "bearing":"compass", "gradient":"clino"} - -rx_comment = re.compile(r"([^;]*?)\s*(?:;\s*(.*))?\n?$") -rx_ref = re.compile(r'.*?ref.*?(\d+)\s*#\s*(X)?\s*(\d+)') -rx_star = re.compile(r'\s*\*[\s,]*(\w+)\s*(.*?)\s*(?:;.*)?$') -# years from 1960 to 2039 -rx_starref = re.compile(r'(?i)^\s*\*ref[\s.:]*((?:19[6789]\d)|(?:20[0123]\d))\s*#?\s*(X)?\s*(.*?\d+.*?)$') -# rx_starref = re.compile("""?x # VERBOSE mode - can't get this to work -# ^\s*\*ref # look for *ref at start of line -# [\s.:]* # some spaces, stops or colons -# ((?:19[6789]\d)|(?:20[0123]\d)) # a date from 1960 to 2039 - captured as one field -# \s*# # spaces then hash separator -# ?\s*(X) # optional X - captured -# ?\s*(.*?\d+.*?) # maybe a space, then at least one digit in the string - captured -# $(?i)""", re.X) # the end (do the whole thing case insensitively) - -rx_team = re.compile(r"(?i)(Insts|Notes|Tape|Dog|Useless|Pics|Helper|Disto|Consultant)\s+(.*)$") -rx_person = re.compile(r"(?i) and | / |, | & | \+ |^both$|^none$") -rx_qm = re.compile(r'^\s*QM(\d)\s+?([a-dA-DxX])\s+([\w\-]+)\.(\d+)\s+(([\w\-]+)\.(\d+)|\-)\s+(.+)$') - -insp = "" -callcount = 0 -def RecursiveLoad(survexblock, survexfile, fin): - """Follows the *include links in all the survex files from the root file 1623.svx - and reads in the survex blocks, other data and the wallet references (scansfolder) as it - goes. This part of the data import process is where the maximum memory is used and where it - crashes on memory-constrained machines. - """ - iblankbegins = 0 - text = [ ] - stardata = stardatadefault - teammembers = [ ] - global insp - global callcount - global survexlegsnumber - - print(insp+" - MEM:{:.3f} Reading. parent:{} <> {} ".format(get_process_memory(),survexblock.survexfile.path,survexfile.path)) - stamp = datetime.now() - lineno = 0 - - sys.stderr.flush(); - callcount +=1 - if callcount >=10: - callcount=0 - print(".", file=sys.stderr,end='') - - # Try to find the cave in the DB if not use the string as before - path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", survexblock.survexfile.path) - if path_match: - pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2)) - cave = models_caves.getCaveByReference(pos_cave) - if cave: - survexfile.cave = cave - svxlines = '' - svxlines = fin.read().splitlines() - # cannot close file now as it may be recursively called with the same file id fin if nested *begin - # occurs. - for svxline in svxlines: - lineno += 1 - # break the line at the comment - sline, comment = rx_comment.match(svxline.strip()).groups() - # detect ref line pointing to the scans directory - mref = comment and rx_ref.match(comment) - if mref: - yr, letterx, wallet = mref.groups() - if not letterx: - letterx = "" + lcompass = error + if lclino == "up": + survexleg.compass = 0.0 + survexleg.clino = 90.0 + elif lclino == "down": + survexleg.compass = 0.0 + survexleg.clino = -90.0 + elif lclino == "-" or lclino == "level": + try: + survexleg.compass = float(lcompass) + except ValueError: + print(("! Compass misread in", survexblock.survexfile.path)) + print((" Stardata:", stardata)) + print((" Line:", ls)) + message = ' ! Value Error: line %s in %s' % (ls, survexblock.survexfile.path) + models.DataIssue.objects.create(parser='survex', message=message) + survexleg.compass = 1000 + survexleg.clino = -90.0 else: - letterx = "X" - if len(wallet)<2: - wallet = "0" + wallet - refscan = "%s#%s%s" % (yr, letterx, wallet ) - manyscansfolders = models_survex.ScansFolder.objects.filter(walletname=refscan) - if manyscansfolders: - survexblock.scansfolder = manyscansfolders[0] - survexblock.save() - else: - message = ' ! Wallet ; ref {} - NOT found in manyscansfolders {}'.format(refscan, survexblock.survexfile.path) - print((insp+message)) - models.DataIssue.objects.create(parser='survex', message=message) + assert self.rx_linelen.match(lcompass), ls + assert self.rx_linelen.match(lclino) and lclino != "-", ls + survexleg.compass = float(lcompass) + survexleg.clino = float(lclino) - # This whole section should be moved if we can have *QM become a proper survex command - # Spec of QM in SVX files, currently commented out need to add to survex - # needs to match rx_qm - # ;Serial number grade(A/B/C/D/X) nearest-station resolution-station description - # ;QM1 a hobnob_hallway_2.42 hobnob-hallway_3.42 junction of keyhole passage - # ;QM1 a hobnob_hallway_2.42 - junction of keyhole passage - qmline = comment and rx_qm.match(comment) - if qmline: - qm_no = qmline.group(1) - qm_grade = qmline.group(2) - qm_from_section = qmline.group(3) - qm_from_station = qmline.group(4) - qm_resolve_section = qmline.group(6) - qm_resolve_station = qmline.group(7) - qm_notes = qmline.group(8) + # No need to save as we are measuring lengths only on parsing now. + # delete the object so that django autosaving doesn't save it. + survexleg = None - # print(insp+'Cave - %s' % survexfile.cave) - # print(insp+'QM no %d' % int(qm_no)) - # print(insp+'QM grade %s' % qm_grade) - # print(insp+'QM section %s' % qm_from_section) - # print(insp+'QM station %s' % qm_from_station) - # print(insp+'QM res section %s' % qm_resolve_section) - # print(insp+'QM res station %s' % qm_resolve_station) - # print(insp+'QM notes %s' % qm_notes) + itape = stardata.get("tape") + if itape: + try: + survexblock.totalleglength += float(ls[itape]) + self.survexlegsalllength += float(ls[itape]) + except ValueError: + print("! Length not added") + # No need to save as we are measuring lengths only on parsing now. - # If the QM isn't resolved (has a resolving station) then load it - if not qm_resolve_section or qm_resolve_section != '-' or qm_resolve_section != 'None': - from_section = models_survex.SurvexBlock.objects.filter(name=qm_from_section) - # If we can find a section (survex note chunck, named) - if len(from_section) > 0: - from_station = models_survex.SurvexStation.objects.filter(block=from_section[0], name=qm_from_station) - # If we can find a from station then we have the nearest station and can import it - if len(from_station) > 0: - qm = models_caves.QM.objects.create(number=qm_no, - nearest_station=from_station[0], - grade=qm_grade.upper(), - location_description=qm_notes) - else: - # print(insp+' - QM found but resolved') - pass - if not sline: - continue + def LoadSurvexLinePassage(self,survexblock, stardata, sline, comment): + # do not import this: *data passage.. data which is LRUD not tape/compass/clino + pass - # detect the star ref command - mstar = rx_starref.match(sline) - if mstar: - yr,letterx,wallet = mstar.groups() - if not letterx: - letterx = "" - else: - letterx = "X" - if len(wallet)<2: - wallet = "0" + wallet - assert (int(yr)>1960 and int(yr)<2039), "Wallet year out of bounds: %s" % yr - assert (int(wallet)<100), "Wallet number more than 100: %s" % wallet - refscan = "%s#%s%s" % (yr, letterx, wallet) - manyscansfolders = models_survex.ScansFolder.objects.filter(walletname=refscan) - if manyscansfolders: - survexblock.scansfolder = manyscansfolders[0] - survexblock.save() - else: - message = ' ! Wallet *REF {} - NOT found in manyscansfolders {}'.format(refscan, survexblock.survexfile.path) - print((insp+message)) - models.DataIssue.objects.create(parser='survex', message=message) - continue + def RecursiveLoad(self,survexblock, survexfile, fin): + """Follows the *include links in all the survex files from the root file 1623.svx + and reads in the survex blocks, other data and the wallet references (scansfolder) as it + goes. This part of the data import process is where the maximum memory is used and where it + crashes on memory-constrained machines. + """ + iblankbegins = 0 + text = [ ] + stardata = self.stardatadefault + teammembers = [ ] + insp =self.insp + blocklegs = self.survexlegsnumber - # detect the star command - mstar = rx_star.match(sline) - if not mstar: - if "from" in stardata: - LoadSurvexLineLeg(survexblock, stardata, sline, comment, survexfile.cave) - pass - elif stardata["type"] == "passage": - LoadSurvexLinePassage(survexblock, stardata, sline, comment) - #Missing "station" in stardata. - continue + print(insp+" - MEM:{:.3f} Reading. parent:{} <> {} ".format(models.get_process_memory(),survexblock.survexfile.path,survexfile.path)) + stamp = datetime.now() + lineno = 0 + + sys.stderr.flush(); + self.callcount +=1 + if self.callcount >=10: + self.callcount=0 + print(".", file=sys.stderr,end='') - # detect the star command - cmd, line = mstar.groups() - cmd = cmd.lower() - if re.match("include$(?i)", cmd): - includepath = os.path.normpath(os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", line))) - print((insp+' - Include path found, including - ' + includepath)) - # Try to find the cave in the DB. if not, use the string as before - path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", includepath) - if path_match: - pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2)) - print((insp+' - Match in DB (i) for cave {}.'.format(pos_cave))) - cave = models_caves.getCaveByReference(pos_cave) - if cave: - survexfile.cave = cave - else: - print((insp+' - NO Match in DB (i) for a cave for {}'.format(includepath))) + # Try to find the cave in the DB if not use the string as before + path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", survexblock.survexfile.path) + if path_match: + pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2)) + cave = models_caves.getCaveByReference(pos_cave) + if cave: + survexfile.cave = cave + svxlines = '' + svxlines = fin.read().splitlines() + # cannot close file now as it may be recursively called with the same file id fin if nested *begin + # occurs. + for svxline in svxlines: + lineno += 1 + # break the line at the comment + sline, comment = self.rx_comment.match(svxline.strip()).groups() + # detect ref line pointing to the scans directory + mref = comment and self.rx_ref.match(comment) + if mref: + yr, letterx, wallet = mref.groups() + if not letterx: + letterx = "" + else: + letterx = "X" + if len(wallet)<2: + wallet = "0" + wallet + refscan = "%s#%s%s" % (yr, letterx, wallet ) + manyscansfolders = models_survex.ScansFolder.objects.filter(walletname=refscan) + if manyscansfolders: + survexblock.scansfolder = manyscansfolders[0] + survexblock.save() + else: + message = ' ! Wallet ; ref {} - NOT found in manyscansfolders {}'.format(refscan, survexblock.survexfile.path) + print((insp+message)) + models.DataIssue.objects.create(parser='survex', message=message) - includesurvexfile = models_survex.SurvexFile(path=includepath) - includesurvexfile.save() - includesurvexfile.SetDirectory() - if includesurvexfile.exists(): - survexblock.save() - fininclude = includesurvexfile.OpenFile() - insp += "> " - RecursiveLoad(survexblock, includesurvexfile, fininclude) - #-------------------------------------------------------- - fininclude.close() - insp = insp[2:] - else: - print((insp+' ! ERROR *include file not found for %s' % includesurvexfile)) + # This whole section should be moved if we can have *QM become a proper survex command + # Spec of QM in SVX files, currently commented out need to add to survex + # needs to match self.rx_qm + # ;Serial number grade(A/B/C/D/X) nearest-station resolution-station description + # ;QM1 a hobnob_hallway_2.42 hobnob-hallway_3.42 junction of keyhole passage + # ;QM1 a hobnob_hallway_2.42 - junction of keyhole passage + qmline = comment and self.rx_qm.match(comment) + if qmline: + qm_no = qmline.group(1) + qm_grade = qmline.group(2) + qm_from_section = qmline.group(3) + qm_from_station = qmline.group(4) + qm_resolve_section = qmline.group(6) + qm_resolve_station = qmline.group(7) + qm_notes = qmline.group(8) - elif re.match("begin$(?i)", cmd): - # On a *begin statement we start a new survexblock. - # There should not be any *include inside a begin-end block, so this is a simple - # load not a recursive fileload. But there may be many blocks nested to any depth in one file. - if line: - newsvxpath = os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", line)) - # Try to find the cave in the DB if not use the string as before - path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", newsvxpath) + # print(insp+'Cave - %s' % survexfile.cave) + # print(insp+'QM no %d' % int(qm_no)) + # print(insp+'QM grade %s' % qm_grade) + # print(insp+'QM section %s' % qm_from_section) + # print(insp+'QM station %s' % qm_from_station) + # print(insp+'QM res section %s' % qm_resolve_section) + # print(insp+'QM res station %s' % qm_resolve_station) + # print(insp+'QM notes %s' % qm_notes) + + # If the QM isn't resolved (has a resolving station) then load it + if not qm_resolve_section or qm_resolve_section != '-' or qm_resolve_section != 'None': + from_section = models_survex.SurvexBlock.objects.filter(name=qm_from_section) + # If we can find a section (survex note chunck, named) + if len(from_section) > 0: + from_station = models_survex.SurvexStation.objects.filter(block=from_section[0], name=qm_from_station) + # If we can find a from station then we have the nearest station and can import it + if len(from_station) > 0: + qm = models_caves.QM.objects.create(number=qm_no, + nearest_station=from_station[0], + grade=qm_grade.upper(), + location_description=qm_notes) + else: + # print(insp+' - QM found but resolved') + pass + + if not sline: + continue + + # detect the star ref command + mstar = self.rx_starref.match(sline) + if mstar: + yr,letterx,wallet = mstar.groups() + if not letterx: + letterx = "" + else: + letterx = "X" + if len(wallet)<2: + wallet = "0" + wallet + assert (int(yr)>1960 and int(yr)<2039), "Wallet year out of bounds: %s" % yr + assert (int(wallet)<100), "Wallet number more than 100: %s" % wallet + refscan = "%s#%s%s" % (yr, letterx, wallet) + manyscansfolders = models_survex.ScansFolder.objects.filter(walletname=refscan) + if manyscansfolders: + survexblock.scansfolder = manyscansfolders[0] + survexblock.save() + else: + message = ' ! Wallet *REF {} - NOT found in manyscansfolders {}'.format(refscan, survexblock.survexfile.path) + print((insp+message)) + models.DataIssue.objects.create(parser='survex', message=message) + continue + + # detect the star command + mstar = self.rx_star.match(sline) + if not mstar: + if "from" in stardata: + self.LoadSurvexLineLeg(survexblock, stardata, sline, comment) + pass + elif stardata["type"] == "passage": + pass + #self.LoadSurvexLinePassage(survexblock, stardata, sline, comment) + #Missing "station" in stardata. + continue + + # detect the star command + cmd, line = mstar.groups() + cmd = cmd.lower() + if re.match("include$(?i)", cmd): + includepath = os.path.normpath(os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", line))) + print((insp+' - Include path found, including - ' + includepath)) + # Try to find the cave in the DB. if not, use the string as before + path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", includepath) if path_match: pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2)) - # print(insp+pos_cave) + print((insp+' - Match in DB (i) for cave {}.'.format(pos_cave))) cave = models_caves.getCaveByReference(pos_cave) if cave: survexfile.cave = cave else: - print((insp+' - No match (b) for %s' % newsvxpath)) + print((insp+' - NO Match in DB (i) for a cave for {}'.format(includepath))) - previousnlegs = survexlegsnumber - name = line.lower() - print(insp+' - Begin found for:{}, creating new SurvexBlock '.format(name)) - survexblockdown = models_survex.SurvexBlock(name=name, parent=survexblock, - survexpath=survexblock.survexpath+"."+name, - cave=survexfile.cave, survexfile=survexfile, - legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0) - survexblockdown.save() - survexblock.save() - survexblock = survexblockdown - print(insp+" - ENTERING nested *begin/*end block: {}".format(name)) - insp += "> " - RecursiveLoad(survexblockdown, survexfile, fin) - #-------------------------------------------------------- - # do not close the file as there may be more blocks in this one - # and it is re-read afresh with every nested begin-end block. - insp = insp[2:] - else: - iblankbegins += 1 - - elif re.match("end$(?i)", cmd): - if iblankbegins: - print(insp+" - RETURNING from nested *begin/*end block: {}".format(line)) - iblankbegins -= 1 - else: - legsinblock = survexlegsnumber - previousnlegs - print(insp+" - LEGS: {} (previous: {}, now:{})".format(legsinblock,previousnlegs,survexlegsnumber)) - survexblock.legsall = legsinblock - survexblock.save() - endstamp = datetime.now() - timetaken = endstamp - stamp - return - - elif re.match("date$(?i)", cmd): - if len(line) == 10: - survexblock.date = make_aware(datetime.strptime(re.sub(r"\.", "-", line), '%Y-%m-%d'), get_current_timezone()) - expeditions = models.Expedition.objects.filter(year=line[:4]) - if expeditions: - assert len(expeditions) == 1 - survexblock.expedition = expeditions[0] - survexblock.expeditionday = survexblock.expedition.get_expedition_day(survexblock.date) + includesurvexfile = models_survex.SurvexFile(path=includepath) + includesurvexfile.save() + includesurvexfile.SetDirectory() + if includesurvexfile.exists(): survexblock.save() + fininclude = includesurvexfile.OpenFile() + self.survexlegsnumber = blocklegs + self.insp += "> " + self.RecursiveLoad(survexblock, includesurvexfile, fininclude) + #-------------------------------------------------------- + fininclude.close() + self.insp = self.insp[2:] + insp = self.insp + blocklegs = self.survexlegsnumber + else: + print((insp+' ! ERROR *include file not found for %s' % includesurvexfile)) - elif re.match("team$(?i)", cmd): - pass - # print(insp+' - Team found: ') - mteammember = rx_team.match(line) - if mteammember: - for tm in rx_person.split(mteammember.group(2)): - if tm: - personexpedition = survexblock.expedition and GetPersonExpeditionNameLookup(survexblock.expedition).get(tm.lower()) - if (personexpedition, tm) not in teammembers: - teammembers.append((personexpedition, tm)) - personrole = models_survex.SurvexPersonRole(survexblock=survexblock, nrole=mteammember.group(1).lower(), personexpedition=personexpedition, personname=tm) - personrole.expeditionday = survexblock.expeditionday - if personexpedition: - personrole.person=personexpedition.person - personrole.save() + elif re.match("begin$(?i)", cmd): + # On a *begin statement we start a new survexblock. + # There should not be any *include inside a begin-end block, so this is a simple + # load not a recursive fileload. But there may be many blocks nested to any depth in one file. + if line: + newsvxpath = os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", line)) + # Try to find the cave in the DB if not use the string as before + path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", newsvxpath) + if path_match: + pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2)) + # print(insp+pos_cave) + cave = models_caves.getCaveByReference(pos_cave) + if cave: + survexfile.cave = cave + else: + print((insp+' - No match (b) for %s' % newsvxpath)) - elif cmd == "title": - # unused in troggle. - #survextitle = models_survex.SurvexTitle(survexblock=survexblock, title=line.strip('"'), cave=survexfile.cave) - #survextitle.save() - pass + previousnlegs = blocklegs + name = line.lower() + print(insp+' - Begin found for:{}, creating new SurvexBlock '.format(name)) + survexblockdown = models_survex.SurvexBlock(name=name, parent=survexblock, + survexpath=survexblock.survexpath+"."+name, + cave=survexfile.cave, survexfile=survexfile, + legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0) + survexblockdown.save() + survexblock.save() + survexblock = survexblockdown + print(insp+" - ENTERING nested *begin/*end block: {}".format(name)) + self.survexlegsnumber = blocklegs + self.insp += "> " + self.RecursiveLoad(survexblockdown, survexfile, fin) + #-------------------------------------------------------- + # do not close the file as there may be more blocks in this one + # and it is re-read afresh with every nested begin-end block. + self.insp = self.insp[2:] + insp = self.insp + blocklegs = self.survexlegsnumber + else: + iblankbegins += 1 - elif cmd == "require": - # should we check survex version available for processing? - pass + elif re.match("end$(?i)", cmd): + if iblankbegins: + print(insp+" - RETURNING from nested *begin/*end block: {}".format(line)) + iblankbegins -= 1 + else: + legsinblock = self.survexlegsnumber - previousnlegs + print(insp+" - LEGS: {} (previous: {}, now:{})".format(legsinblock,previousnlegs,self.survexlegsnumber)) + survexblock.legsall = legsinblock + survexblock.save() + endstamp = datetime.now() + timetaken = endstamp - stamp + return - elif cmd == "data": - ls = line.lower().split() - stardata = { "type":ls[0] } - for i in range(0, len(ls)): - stardata[stardataparamconvert.get(ls[i], ls[i])] = i - 1 - if ls[0] in ["normal", "cartesian", "nosurvey"]: - assert (("from" in stardata and "to" in stardata) or "station" in stardata), line - elif ls[0] == "default": - stardata = stardatadefault + elif re.match("date$(?i)", cmd): + if len(line) == 10: + survexblock.date = make_aware(datetime.strptime(re.sub(r"\.", "-", line), '%Y-%m-%d'), get_current_timezone()) + expeditions = models.Expedition.objects.filter(year=line[:4]) + if expeditions: + assert len(expeditions) == 1 + survexblock.expedition = expeditions[0] + survexblock.expeditionday = survexblock.expedition.get_expedition_day(survexblock.date) + survexblock.save() + + elif re.match("team$(?i)", cmd): + pass + # print(insp+' - Team found: ') + mteammember = self.rx_team.match(line) + if mteammember: + for tm in self.rx_person.split(mteammember.group(2)): + if tm: + personexpedition = survexblock.expedition and GetPersonExpeditionNameLookup(survexblock.expedition).get(tm.lower()) + if (personexpedition, tm) not in teammembers: + teammembers.append((personexpedition, tm)) + personrole = models_survex.SurvexPersonRole(survexblock=survexblock, nrole=mteammember.group(1).lower(), personexpedition=personexpedition, personname=tm) + personrole.expeditionday = survexblock.expeditionday + if personexpedition: + personrole.person=personexpedition.person + personrole.save() + + elif cmd == "title": + # unused in troggle today - but will become text list on SurvexBlock + pass + + elif cmd == "require": + # should we check survex version available for processing? + pass + + elif cmd == "data": + ls = line.lower().split() + stardata = { "type":ls[0] } + for i in range(0, len(ls)): + stardata[self.stardataparamconvert.get(ls[i], ls[i])] = i - 1 + if ls[0] in ["normal", "cartesian", "nosurvey"]: + assert (("from" in stardata and "to" in stardata) or "station" in stardata), line + elif ls[0] == "default": + stardata = self.stardatadefault + else: + assert ls[0] == "passage", line + + elif cmd == "equate": + #LoadSurvexEquate(survexblock, line) + pass + + elif cmd == "set" and re.match("names(?i)", line): + pass + elif cmd == "flags": + # Here we could set on/off 'splay', 'not splay', 'surface', 'not surface', or 'duplicate' + # but this data is only used for sense-checking not to actually calculate anything important + pass + elif cmd == "fix": + # troggle does not use survex stations except for entrances which are loaded elsewhere + pass + elif cmd in ["alias", "calibrate", "cs","entrance", "export", "case", + "declination", "infer","instrument", "sd", "units"]: + # we ignore all these, which is fine. + pass else: - assert ls[0] == "passage", line - - elif cmd == "equate": - #LoadSurvexEquate(survexblock, line) - pass - - elif cmd == "set" and re.match("names(?i)", line): - pass - elif cmd == "flags": - # Here we could set on/off 'splay', 'not splay', 'surface', 'not surface', or 'duplicate' - # but this data is only used for sense-checking not to actually calculate anything important - pass - elif cmd == "fix": - # troggle does not use survex stations - #survexblock.MakeSurvexStation(line.split()[0]) - pass - elif cmd in ["alias", "calibrate", "cs","entrance", "export", "case", - "declination", "infer","instrument", "sd", "units"]: - # we ignore all these, which is fine. - pass - else: - if cmd not in ["include", "data", "flags", "title", "set", "ref"]: - message = "! Bad svx command: [*{}] {} ({}) {}".format(cmd, line, survexblock, survexblock.survexfile.path) - print((insp+message)) - models.DataIssue.objects.create(parser='survex', message=message) - else: - message = "! Unparsed [*{}]: '{}' {}".format(cmd, line, survexblock.survexfile.path) - print((insp+message)) - models.DataIssue.objects.create(parser='survex', message=message) - - endstamp = datetime.now() - timetaken = endstamp - stamp - # print(insp+' - Time to process: ' + str(timetaken)) - -def get_process_memory(): - usage=resource.getrusage(resource.RUSAGE_SELF) - return usage[2]/1024.0 + if cmd not in ["include", "data", "flags", "title", "set", "ref"]: + message = "! Bad svx command: [*{}] {} ({}) {}".format(cmd, line, survexblock, survexblock.survexfile.path) + print((insp+message)) + models.DataIssue.objects.create(parser='survex', message=message) + else: + message = "! Unparsed [*{}]: '{}' {}".format(cmd, line, survexblock.survexfile.path) + print((insp+message)) + models.DataIssue.objects.create(parser='survex', message=message) def FindAndLoadAllSurvex(survexblockroot, survexfileroot): """Follows the *include links recursively to find files """ - print(' - redirecting stdout to loadsurvexblks.log...') + print(' - redirecting stdout to svxblks.log...') stdout_orig = sys.stdout # Redirect sys.stdout to the file - sys.stdout = open('loadsurvexblks.log', 'w') + sys.stdout = open('svxblks.log', 'w') + + svxl = LoadSurvex() finroot = survexfileroot.OpenFile() - RecursiveLoad(survexblockroot, survexfileroot, finroot) + svxl.RecursiveLoad(survexblockroot, survexfileroot, finroot) finroot.close() - # Close the logging file + survexlegsnumber = svxl.survexlegsnumber + survexlegsalllength = svxl.survexlegsalllength + + # Close the logging file, Restore sys.stdout to our old saved file handle sys.stdout.close() print("+", file=sys.stderr) sys.stderr.flush(); - # Restore sys.stdout to our old saved file handler sys.stdout = stdout_orig + return (survexlegsnumber, survexlegsalllength) def LoadAllSurvexBlocks(): - global survexlegsalllength - global survexlegsnumber print(' - Flushing All Survex Blocks...') models_survex.SurvexBlock.objects.all().delete() models_survex.SurvexFile.objects.all().delete() models_survex.SurvexDirectory.objects.all().delete() -# models_survex.SurvexEquate.objects.all().delete() -# models_survex.SurvexTitle.objects.all().delete() models_survex.SurvexPersonRole.objects.all().delete() models_survex.SurvexStation.objects.all().delete() @@ -497,7 +492,10 @@ def LoadAllSurvexBlocks(): survexblockroot.save() print(' - Loading All Survex Blocks...') - FindAndLoadAllSurvex(survexblockroot, survexfileroot) + memstart = models.get_process_memory() + survexlegsnumber, survexlegsalllength = FindAndLoadAllSurvex(survexblockroot, survexfileroot) + memend = models.get_process_memory() + print(" - MEMORY start:{:.3f} MB end:{:.3f} MB increase={:.3f} MB",format(memstart,memend,memend-memstart)) survexblockroot.totalleglength = survexlegsalllength survexblockroot.legsall = survexlegsnumber