From 122cdd7fc8620b2348d75b1bb786ae4202db9a55 Mon Sep 17 00:00:00 2001 From: Philip Sargent <philip.sargent@klebos.com> Date: Sun, 28 Jun 2020 01:50:34 +0100 Subject: [PATCH] replace GetCaveByReference --- core/models_caves.py | 19 ++-- core/models_survex.py | 4 + parsers/logbooks.py | 8 +- parsers/survex.py | 220 +++++++++++++++++++++++++----------------- templates/base.html | 6 +- 5 files changed, 155 insertions(+), 102 deletions(-) diff --git a/core/models_caves.py b/core/models_caves.py index 0338a62..e20b17f 100644 --- a/core/models_caves.py +++ b/core/models_caves.py @@ -201,15 +201,16 @@ class Cave(TroggleModel): pass return lowestareas[0] -def getCaveByReference(reference): - areaname, code = reference.split("-", 1) - area = Area.objects.get(short_name = areaname) - foundCaves = list(Cave.objects.filter(area = area, kataster_number = code).all()) + list(Cave.objects.filter(area = area, unofficial_number = code).all()) - #print((list(foundCaves))) - if len(foundCaves) == 1: - return foundCaves[0] - else: - return False +# This seems to be peculiarly broken, and is now replaced for logbooks. +# def getCaveByReference(reference): + # areaname, code = reference.split("-", 1) + # area = Area.objects.get(short_name = areaname) + # foundCaves = list(Cave.objects.filter(area = area, kataster_number = code).all()) + list(Cave.objects.filter(area = area, unofficial_number = code).all()) + # #print((list(foundCaves))) + # if len(foundCaves) == 1: + # return foundCaves[0] + # else: + # return False class OtherCaveName(TroggleModel): name = models.CharField(max_length=160) diff --git a/core/models_survex.py b/core/models_survex.py index e46baae..4cbf611 100644 --- a/core/models_survex.py +++ b/core/models_survex.py @@ -16,6 +16,10 @@ class SurvexDirectory(models.Model): class Meta: ordering = ('id',) + def __str__(self): + return str(self.path) + "-" + str(self.primarysurvexfile.path) + + class SurvexFile(models.Model): path = models.CharField(max_length=200) diff --git a/parsers/logbooks.py b/parsers/logbooks.py index 6a19dad..a724394 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -12,7 +12,7 @@ from django.template.defaultfilters import slugify from django.utils.timezone import get_current_timezone, make_aware from troggle.core.models import DataIssue, Expedition -from troggle.core.models_caves import Cave, OtherCaveName, getCaveByReference, LogbookEntry, PersonTrip +from troggle.core.models_caves import Cave, OtherCaveName, LogbookEntry, PersonTrip from parsers.people import GetPersonExpeditionNameLookup from utils import save_carefully @@ -91,7 +91,10 @@ def GetCaveLookup(): if cave.kataster_number: Gcavelookup[cave.kataster_number] = cave if cave.unofficial_number: - Gcavelookup[cave.unofficial_number] = cave + Gcavelookup[cave.unofficial_number.lower()] = cave + if cave.filename: + # this is the slug - usually.. + Gcavelookup[cave.filename.replace(".html","").lower()] = cave # These are exact matches! edit to check for prefix only! Gcavelookup["tunnocks"] = Gcavelookup["258"] Gcavelookup["hauchhole"] = Gcavelookup["234"] @@ -586,6 +589,7 @@ def parseAutoLogBookEntry(filename): if caveMatch: caveRef, = caveMatch.groups() try: + # this is a slow and uncertain function: cave = getCaveByReference(caveRef) except AssertionError: cave = None diff --git a/parsers/survex.py b/parsers/survex.py index 7ac8a5e..31dff03 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -14,6 +14,7 @@ import troggle.core.models as models import troggle.core.models_caves as models_caves import troggle.core.models_survex as models_survex from troggle.parsers.people import GetPersonExpeditionNameLookup +from troggle.parsers.logbooks import GetCaveLookup from troggle.core.views_caves import MapLocations survexblockroot = None @@ -46,8 +47,8 @@ class LoadingSurvex(): rx_cave = re.compile(r'caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/') rx_comment = re.compile(r'([^;]*?)\s*(?:;\s*(.*))?\n?$') - rx_comminc = re.compile(r'(?i)^\s*;\*include[\s](.*)$') # inserted by linear collate ;*include - rx_commcni = re.compile(r'(?i)^\s*;\*edulcni[\s](.*)$') # inserted by linear collate ;*edulcni + rx_comminc = re.compile(r'(?i)^\*include[\s]*([-\w/]*).*$') # inserted by linear collate ;*include + rx_commcni = re.compile(r'(?i)^\*edulcni[\s]*([-\w/]*).*$') # inserted by linear collate ;*edulcni rx_include = re.compile(r'(?i)^\s*(\*include[\s].*)$') rx_ref = re.compile(r'(?i)^\s*ref[\s.:]*(\d+)\s*#\s*(X)?\s*(\d+)') rx_star = re.compile(r'(?i)\s*\*[\s,]*(\w+)\s*(.*?)\s*(?:;.*)?$') @@ -60,7 +61,10 @@ class LoadingSurvex(): depthinclude = 0 stackbegin =[] stackinclude = [] - svxfileslist =[] + svxfileslist = [] + svxdirs = {} + svxcaves = {} + svxfiletitle = {} lineno = 0 insp = "" callcount = 0 @@ -122,6 +126,9 @@ class LoadingSurvex(): """This reads compass, clino and tape data but only keeps the tape lengths, the rest is discarded after error-checking. """ + # Check first to see if we are in a splay and abort if so. + # TO DO splay abort + stardata = self.stardata survexleg = SurvexLeg() @@ -184,7 +191,8 @@ class LoadingSurvex(): print(("! Compass misread in", survexblock.survexfile.path)) print((" Stardata:", stardata)) print((" Line:", ls)) - message = ' ! Value Error: line %s in %s' % (ls, survexblock.survexfile.path) + message = " ! Value Error: lcompass:'{}' line {} in '{}'".format(lcompass, + ls, survexblock.survexfile.path) models.DataIssue.objects.create(parser='survex', message=message) survexleg.compass = 1000 survexleg.clino = -90.0 @@ -301,18 +309,25 @@ class LoadingSurvex(): pass def IdentifyCave(self, cavepath): - path = os.path.join(os.path.split(cavepath)[0], re.sub(r"\.svx$", "", cavepath)) - path_match = self.rx_cave.search(path) - print(' - Attempting cave match for %s' % path) + if cavepath in self.svxcaves: + print(' - Cave FAST matched for %s' % cavepath) + return self.svxcaves[cavepath] + + path_match = self.rx_cave.search(cavepath) + #print(' - Attempting cave match for %s' % cavepath) if path_match: - pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2)) - cave = models_caves.getCaveByReference(pos_cave) + sluggy = '%s-%s'.format(path_match.group(1), path_match.group(2)) + cave = GetCaveLookup().get(sluggy) + # Below is how it has been done for years: very fuzzy & slow searches + # ..and wrong! + #cave = models_caves.getCaveByReference(sluggy) if cave: - survexfile.cave = cave - print(' - Cave matched for %s' % path) - return cave + self.currentcave = cave + self.svxcaves[cavepath] = cave + print(' - Cave matched for %s' % cavepath) + return cave else: - print(' ! No cave match for %s' % path) + print(' ! No cave match for %s' % cavepath) return None def LoadSurvexFileBlock(self, survexblock, includelabel): @@ -320,29 +335,56 @@ class LoadingSurvex(): with links to 'cave' Creates a new current survexblock with valid .survexfile and valid .survexdirectory """ - cave = self.IdentifyCave(self, includelabel) - survexdirectory = SurvexDirectory(path=dirpath, cave=cave, primarysurvexfile=self) - survexdirectory.save() + depth = " " * self.depthbegin + print("{:2}{} - NEW survexfile:'{}'".format(self.depthbegin, depth, includelabel)) - newsurvexfile = models_survex.SurvexFile(path=includelabel) - newsurvexfile.survexdirectory = survexdirectory - newsurvexfile.save() + headpath, tail = os.path.split(includelabel) + if headpath not in self.svxdirs: + self.svxdirs[headpath] = models_survex.SurvexDirectory(path=headpath, primarysurvexfile=survexblock.survexfile) + newsurvexdirectory = self.svxdirs[headpath] - name = includelabel - newsurvexblock = models_survex.SurvexBlock(name=name, parent=survexblock, - survexpath=survexblock.survexpath+"."+name, - cave=survexfile.cave, survexfile=newsurvexfile, - legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0) - newsurvexblock.save + newsurvexfile = models_survex.SurvexFile(path=includelabel) + newsurvexfile.survexdirectory = newsurvexdirectory + + # Do not create a survexblock. Yes, there is a virtual block before the *begin statement but + # only the *title is usually in that, so just inherit the *title into the blocks. + # name = includelabel + # newsurvexblock = models_survex.SurvexBlock(name=name, parent=survexblock, + # survexpath=survexblock.survexpath+"."+name, + # survexfile=newsurvexfile, + # legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0) + + cave = self.IdentifyCave(headpath) + if cave: + newsurvexdirectory.cave = cave + newsurvexfile.cave = cave + #newsurvexblock.cave = cave + newsurvexdirectory.save() + newsurvexfile.save() + #newsurvexblock.save self.currentsurvexfile = newsurvexfile - self.currentsurvexblock = newsurvexblock + #self.currentsurvexblock = newsurvexblock + + def ProcessIncludeLine(self, survexblock, included): + # should do some push stuff here + svxid = included.groups()[0] + #depth = " " * self.depthbegin + #print("{:2}{} - Include survexfile:'{}'".format(self.depthbegin, depth, svxid)) + self.LoadSurvexFileBlock(survexblock, svxid) + + def ProcessEdulcniLine(self, survexblock, edulcni): + # should do some pop stuff here + svxid = edulcni.groups()[0] + depth = " " * self.depthbegin + print("{:2}{} - Edulcni survexfile:'{}'".format(self.depthbegin, depth, svxid)) + self.currentsurvexblock = survexblock.parent + self.currentsurvexfile = survexblock.parent.survexfile def LoadSurvexComment(self, survexblock, comment): # ignore all comments except ;ref and ;QM and ;*include (for collated survex file) refline = self.rx_ref.match(comment) if refline: - #comment = comment.replace("ref","").strip() comment = re.sub('(?i)\s*ref[.;]?',"",comment.strip()) self.LoadSurvexRef(survexblock, comment) @@ -353,13 +395,12 @@ class LoadingSurvex(): included = self.rx_comminc.match(comment) # ;*include means we have been included; not 'proceed to include' which *include means if included: - self.LoadSurvexFileBlock(survexblock, included) + self.ProcessIncludeLine(survexblock,included) edulcni = self.rx_commcni.match(comment) - # ;*include means we have been included; not 'proceed to include' which *include means + # ;*edulcni means we are returning from an included file if edulcni: - currentsurvexblock = currentsurvexblock.parent - currentsurvexfile = currentsurvexblock.parent.survexfile + self.ProcessEdulcniLine(survexblock,edulcni) def LoadSurvexSetup(self,survexblock, survexfile): self.depthbegin = 0 @@ -503,22 +544,18 @@ class LoadingSurvex(): else: pass # ignore all other sorts of data - def LinearRecursiveLoad(self, survexblock, path, fin, skipto): + def LinearRecursiveLoad(self, survexblock, path, svxlines): """Loads a single survex file. Usually used to import all the survex files which have been collated into a single file. Loads the begin/end blocks recursively. """ self.relativefilename = path cave = self.IdentifyCave(path) # this will produce null for survex files which are geographic collections - svxlines = fin.read().splitlines() + blockcount = 0 for svxline in svxlines: - self.lineno += 1 - if self.lineno < skipto: - continue # skip through file to the place we got up to - sline, comment = self.rx_comment.match(svxline.strip()).groups() if comment: - self.LoadSurvexComment(survexblock, comment) + self.LoadSurvexComment(survexblock, comment) # this catches the ;*include and ;*edulcni lines too if not sline: continue # skip blank lines @@ -527,57 +564,61 @@ class LoadingSurvex(): if mstar: # yes we are reading a *cmd cmd, args = mstar.groups() cmd = cmd.lower() + + # ------------------------BEGIN if re.match("begin$(?i)", cmd): self.depthbegin += 1 - if args: - depth = " " * self.depthbegin - self.stackbegin.append(args.lower()) + depth = " " * self.depthbegin + self.stackbegin.append(args.lower()) - previousnlegs = self.survexlegsnumber - name = args.lower() - print(' - Begin found for:{}, creating new SurvexBlock '.format(name)) + previousnlegs = self.survexlegsnumber + name = args.lower() + print("{:2}{} - Begin for :'{}'".format(self.depthbegin,depth, name)) + survexblockdown = models_survex.SurvexBlock(name=name, parent=survexblock, + survexpath=survexblock.survexpath+"."+name, + cave=self.currentcave, survexfile=self.currentsurvexfile, + legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0) + survexblockdown.save() + survexblock.save() + survexblock = survexblockdown - survexblockdown = models_survex.SurvexBlock(name=name, parent=survexblock, - survexpath=survexblock.survexpath+"."+name, - cave=self.currentcave, survexfile=self.currentsurvexfile, - legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0) - survexblockdown.save() - survexblock.save() - survexblock = survexblockdown - else: - self.depthbegin += 1 + blockcount +=1 + if blockcount % 10 ==0 : + print(".", file=sys.stderr,end='') + if blockcount % 500 ==0 : + print("\n", file=sys.stderr,end='') + sys.stderr.flush(); + # ---------------------------END elif re.match("end$(?i)", cmd): - # haven#t really thought this through.. - if survexblock: - self.currentsurvexblock = survexblock.parent - self.currentsurvexfile = survexblock.parent.survexfile + depth = " " * self.depthbegin + self.currentsurvexblock = survexblock.parent + self.currentsurvexfile = survexblock.parent.survexfile - if self.depthbegin: - print(" - End -return from nested *begin/*end block: '{}'".format(args)) - self.depthbegin -= 1 - else: - legsinblock = self.survexlegsnumber - previousnlegs - print(" - LEGS: {} (previous: {}, now:{})".format(legsinblock,previousnlegs,self.survexlegsnumber)) - survexblock.legsall = legsinblock - survexblock.save() - return + print("{:2}{} - End from:'{}'".format(self.depthbegin,depth,args)) + legsinblock = self.survexlegsnumber - previousnlegs + print("{:2}{} - LEGS: {} (previous: {}, now:{})".format(self.depthbegin, + depth,legsinblock,previousnlegs,self.survexlegsnumber)) + survexblock.legsall = legsinblock + survexblock.save() + self.depthbegin -= 1 - elif re.match("title$(?i)", cmd): + # ----------------------------- + elif re.match("(?i)title$", cmd): self.currenttitle = args - elif cmd == "ref": + elif re.match("(?i)ref$", cmd): self.LoadSurvexRef(survexblock, args) - elif cmd == "flags": + elif re.match("(?i)flags$", cmd): self.LoadSurvexFlags(args, cmd) - elif cmd == "data": + elif re.match("(?i)data$", cmd): self.LoadSurvexDataCmd(survexblock, args) - elif re.match("date$(?i)", cmd): + elif re.match("(?i)date$", cmd): self.LoadSurvexDate(survexblock, args) - elif re.match("team$(?i)", cmd): + elif re.match("(?i)team$", cmd): self.LoadSurvexTeam(survexblock, args) - elif cmd == "set" and re.match("names(?i)", args): + elif re.match("(?i)set$", cmd) and re.match("(?i)names", args): pass - elif re.match("include$(?i)", cmd): + elif re.match("(?i)include$", cmd): message = " ! -ERROR *include command not expected here {}. Re-run a full Survex import.".format(path) print(message) print(message,file=sys.stderr) @@ -718,7 +759,9 @@ def FindAndLoadSurvex(survexblockroot): finroot = survexfileroot.OpenFile() fcollate.write(";*include {}\n".format(survexfileroot.path)) flinear.write("{:2} {} *include {}\n".format(svx_scan.depthinclude, indent, survexfileroot.path)) + #---------------------------------------------------------------- svx_scan.RecursiveScan(survexblockroot, survexfileroot, finroot, flinear, fcollate) + #---------------------------------------------------------------- flinear.write("{:2} {} *edulcni {}\n".format(svx_scan.depthinclude, indent, survexfileroot.path)) fcollate.write(";*edulcni {}\n".format(survexfileroot.path)) mem1 = models.get_process_memory() @@ -739,13 +782,14 @@ def FindAndLoadSurvex(survexblockroot): # Before doing this, it would be good to identify the *equate and *entrance we need that are relevant to the # entrance locations currently loaded after this by LoadPos(), but could better be done before ? # look in MapLocations() for how we find the entrances - print('\n - Loading All Survex Blocks...',file=sys.stderr) - + print('\n - Loading All Survex Blocks (LinearRecursive)',file=sys.stderr) svx_load = LoadingSurvex() with open(collatefilename, "r") as fcollate: - #svx_load.LinearRecursiveLoad(survexblockroot,survexfileroot.path,fcollate, 0) - pass + svxlines = fcollate.read().splitlines() + #---------------------------------------------------------------- + svx_load.LinearRecursiveLoad(survexblockroot,survexfileroot.path, svxlines) + #---------------------------------------------------------------- print(" - MEM:{:7.2f} MB STOP".format(mem1),file=sys.stderr) print(" - MEM:{:7.3f} MB USED".format(mem1-mem0),file=sys.stderr) @@ -755,15 +799,15 @@ def FindAndLoadSurvex(survexblockroot): mem1 = models.get_process_memory() svx_load = None - print('\n - Loading All Survex Blocks...',file=sys.stderr) - svxlrl = LoadingSurvex() + print('\n - Loading All Survex Blocks (RecursiveRecursive)',file=sys.stderr) + # svxlrl = LoadingSurvex() - finroot = survexfileroot.OpenFile() - svxlrl.RecursiveRecursiveLoad(survexblockroot, survexfileroot, finroot) - finroot.close() - survexlegsnumber = svxlrl.survexlegsnumber - survexlegsalllength = svxlrl.survexlegsalllength - svxlrl = None + # finroot = survexfileroot.OpenFile() + # svxlrl.RecursiveRecursiveLoad(survexblockroot, survexfileroot, finroot) + # finroot.close() + # survexlegsnumber = svxlrl.survexlegsnumber + # survexlegsalllength = svxlrl.survexlegsalllength + # svxlrl = None # Close the logging file, Restore sys.stdout to our old saved file handle sys.stdout.close() @@ -792,7 +836,7 @@ def LoadSurvexBlocks(): # this is the first so id=1 survexblockroot.save() - print(' - Loading All Survex Blocks...') + print(' - Loading Survex Blocks...') memstart = models.get_process_memory() survexlegsnumber, survexlegsalllength = FindAndLoadSurvex(survexblockroot) memend = models.get_process_memory() @@ -802,7 +846,7 @@ def LoadSurvexBlocks(): survexblockroot.legsall = survexlegsnumber survexblockroot.save() - print(" - total number of survex legs: {}m".format(survexlegsnumber)) + print(" - total number of survex legs: {}".format(survexlegsnumber)) print(" - total leg lengths loaded: {}m".format(survexlegsalllength)) print(' - Loaded All Survex Blocks.') diff --git a/templates/base.html b/templates/base.html index 9e0d55f..c38710d 100644 --- a/templates/base.html +++ b/templates/base.html @@ -33,15 +33,15 @@ <div class="toolbarlinks"> <a href="{% url "survexcaveslist" %}">All Survex</a> | <a href="{% url "surveyscansfolders" %}">Scans</a> | - <a href="{% url "tunneldata" %}">Tunneldata</a> | + <a href="{% url "tunneldata" %}">Drawing files</a> | <a href="{% url "survexcavessingle" "caves-1623/290/290.svx" %}">290</a> | <a href="{% url "survexcavessingle" "caves-1623/291/291.svx" %}">291</a> | <a href="{% url "survexcavessingle" "caves-1626/359/359.svx" %}">359</a> | <a href="{% url "survexcavessingle" "caves-1623/258/258.svx" %}">258</a> | <a href="{% url "survexcavessingle" "caves-1623/264/264.svx" %}">264</a> | + <a href="{% url "survexcavessingle" "264" %}">Surveys-264</a> | <a href="{% url "expedition" 2018 %}">Expo2018</a> | <a href="{% url "expedition" 2019 %}">Expo2019</a> | - <a href="{% url "expedition" 2020 %}">Expo2020</a> | <a href="/admin/">Django admin</a> <br> @@ -53,7 +53,7 @@ <a href="{% url "frontpage" %}">tasks to do </a> | <a id="cavesLink" href="{% url "caveindex" %}">caves</a> | - <a id="caversLink" href="{% url "personindex" %}">cavers</a> | + <a id="caversLink" href="{% url "personindex" %}">people</a> | <a id="expeditionsLink" href="{% url "expeditions" %}">all expeditions</a> | <a href="{% url "stats" %}">statistics</a> | <a id="cuccLink" href="{% url "controlpanel" %}">import/export data</a>