diff --git a/parsers/caves.py b/parsers/caves.py index 3c5d98e..ff87bcd 100644 --- a/parsers/caves.py +++ b/parsers/caves.py @@ -1,18 +1,27 @@ -# -*- coding: utf-8 -*- import os import re from django.conf import settings -import troggle.core.models as models +from troggle.core.models import DataIssue, get_process_memory import troggle.core.models_caves as models_caves def readcaves(): - # Clear the cave data issues as we are reloading - models.DataIssue.objects.filter(parser='caves').delete() + print(" - Deleting Caves and Entrances") + models_caves.Cave.objects.all().delete() + models_caves.Entrance.objects.all().delete() + # Clear the cave data issues and the caves as we are reloading + DataIssue.objects.filter(parser='caves').delete() + DataIssue.objects.filter(parser='entrances').delete() + # Note: these are HTML files in the EXPOWEB repo, not from the loser repo. + area_1623 = models_caves.Area.objects.update_or_create(short_name = "1623", parent = None) + area_1626 = models_caves.Area.objects.update_or_create(short_name = "1626", parent = None) + + print (" - Setting pending caves") # Do this first, so that these empty entries are overwritten as they get properly created. # For those caves which do not have XML files even though they exist and have surveys + # also needs to be done *before* entrances so that the entrance-cave links work properly. forgotten = ["2007-04", "2007-05", "2007-06", "2007-07", "2007-12", "2009-01", "2009-02", "2010-06", "2010-07", "2012-ns-01", "2012-ns-02", "2010-04", "2012-ns-05", "2012-ns-06", "2012-ns-07", "2012-ns-08", "2012-ns-12", "2012-ns-14", "2012-ns-15", "2014-bl888", @@ -21,32 +30,30 @@ def readcaves(): try: cave = models_caves.Cave( unofficial_number = k, - official_name = "Mislaid cave - created as empty object. No XML available at this time.", + official_name = "Pending cave write-up - creating as empty object. No XML file available yet.", notes="_Survex file found in loser repo but no description in expoweb") if cave: - print("{} {}".format(cave.unofficial_number, cave.official_name)) + cave.save() # must save to have id before foreign keys work + cave.area = area_1623 cave.save() + message = " ! {} {}".format(cave.unofficial_number, cave.official_name) + DataIssue.objects.create(parser='caves', message=message) + print(message) else: print("Failed to create cave {} ".format(k)) except: - message = " ! Forgotten cave error, slug: %s forgotten-id: %s" % (slug, k) - models.DataIssue.objects.create(parser='caves', message=message) + message = " ! Error. Cannot create pending cave, pending-id:{}".format(k) + DataIssue.objects.create(parser='caves', message=message) print(message) - - # Note: these are HTML files in the EXPOWEB repo, not from the loser repo. - area_1623 = models_caves.Area.objects.update_or_create(short_name = "1623", parent = None) - area_1626 = models_caves.Area.objects.update_or_create(short_name = "1626", parent = None) - print(" - Reading Entrances") + raise + print(" - Reading Entrances from entrance descriptions xml files") for filename in next(os.walk(settings.ENTRANCEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files if filename.endswith('.html'): readentrance(filename) - print (" - Reading Caves") + print(" - Reading Caves from cave descriptions xml files") for filename in next(os.walk(settings.CAVEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files if filename.endswith('.html'): readcave(filename) - - - def readentrance(filename): # Note: these are HTML files in the EXPOWEB repo, not from the loser repo. @@ -120,7 +127,7 @@ def readentrance(filename): primary = primary) for k in kents: message = " ! - DUPLICATE in db. entrance:"+ str(k.entrance) + ", slug:" + str(k.slug()) - models.DataIssue.objects.create(parser='caves', message=message) + DataIssue.objects.create(parser='caves', message=message) print(message) for k in kents: if k.slug() != None: @@ -189,7 +196,7 @@ def readcave(filename): kaves = models_caves.Cave.objects.all().filter(kataster_number=kataster_number[0]) for k in kaves: message = " ! - DUPLICATES in db. kataster:"+ str(k.kataster_number) + ", slug:" + str(k.slug()) - models.DataIssue.objects.create(parser='caves', message=message) + DataIssue.objects.create(parser='caves', message=message) print(message) for k in kaves: if k.slug() != None: @@ -213,7 +220,7 @@ def readcave(filename): primary = primary) except: message = " ! Cave update/create failure: %s, skipping file %s" % (slug, context) - models.DataIssue.objects.create(parser='caves', message=message) + DataIssue.objects.create(parser='caves', message=message) print(message) primary = False @@ -225,7 +232,7 @@ def readcave(filename): ce = models_caves.CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance) except: message = " ! Entrance setting failure, slug: %s letter: %s" % (slug, letter) - models.DataIssue.objects.create(parser='caves', message=message) + DataIssue.objects.create(parser='caves', message=message) print(message) @@ -235,13 +242,13 @@ def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, message = " ! %(count)i %(itemname)s found, at least %(min)i expected" % {"count": len(items), "itemname": itemname, "min": minItems} + context - models.DataIssue.objects.create(parser='caves', message=message) + DataIssue.objects.create(parser='caves', message=message) print(message) if maxItems is not None and len(items) > maxItems and printwarnings: message = " ! %(count)i %(itemname)s found, no more than %(max)i expected" % {"count": len(items), "itemname": itemname, "max": maxItems} + context - models.DataIssue.objects.create(parser='caves', message=message) + DataIssue.objects.create(parser='caves', message=message) print(message) return items \ No newline at end of file diff --git a/parsers/survex.py b/parsers/survex.py index 8bcbea2..6d266a3 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -68,6 +68,8 @@ class LoadingSurvex(): insp = "" callcount = 0 stardata ={} + ignoreprefix = ["surface", "kataster", "fixedpts", "gpx"] + ignorenoncave = ["caves-1623", "caves-1623/2007-neu"] includedfilename ="" currentsurvexblock = None currentsurvexfile = None @@ -308,7 +310,8 @@ class LoadingSurvex(): def IdentifyCave(self, cavepath): if cavepath.lower() in self.caveslist: return self.caveslist[cavepath.lower()] - + # TO DO - some of this is already done in generating self.caveslist so simplify this + # esp. as it is in a loop. path_match = self.rx_cave.search(cavepath) if path_match: sluggy = '{}-{}'.format(path_match.group(1), path_match.group(2)) @@ -329,6 +332,19 @@ class LoadingSurvex(): self.svxdirs[headpath.lower()] = models_survex.SurvexDirectory(path=headpath, primarysurvexfile=self.currentsurvexfile) return self.svxdirs[headpath.lower()] + def ReportNonCaveIncludes(self, headpath, includelabel): + """Ignore surface, kataser and gps *include survex files + """ + if headpath in self.ignorenoncave: + return + for i in self.ignoreprefix: + if headpath.startswith(i): + return + message = " ! {} is not a cave. (while creating {} sfile & sdirectory)".format(headpath, includelabel) + print(message) + print(message,file=sys.stderr) + models.DataIssue.objects.create(parser='survex', message=message) + def LoadSurvexFile(self, includelabel): """Creates SurvexFile in the database, and SurvexDirectory if needed with links to 'cave' @@ -338,28 +354,37 @@ class LoadingSurvex(): depth = " " * self.depthbegin print("{:2}{} - NEW survexfile:'{}'".format(self.depthbegin, depth, includelabel)) - newsurvexfile = models_survex.SurvexFile(path=includelabel) + newfile = models_survex.SurvexFile(path=includelabel) headpath, tail = os.path.split(includelabel) - newsurvexdirectory = self.GetSurvexDirectory(headpath) - newsurvexfile.survexdirectory = newsurvexdirectory + newdirectory = self.GetSurvexDirectory(headpath) + if not newdirectory: + message = " ! 'None' SurvexDirectory returned from GetSurvexDirectory({})".format(headpath) + print(message) + print(message,file=sys.stderr) + models.DataIssue.objects.create(parser='survex', message=message) + newfile.survexdirectory = newdirectory cave = self.IdentifyCave(headpath) if cave: - newsurvexdirectory.cave = cave - newsurvexfile.cave = cave - # else: - # message = " ! Cannot identify cave from {} when creating sfile & sdirectory".format(headpath) - # print(message) - # print(message,file=sys.stderr) - # models.DataIssue.objects.create(parser='survex', message=message) + newdirectory.cave = cave + newfile.cave = cave + else: + self.ReportNonCaveIncludes(headpath, includelabel) + + + if not newfile.survexdirectory: + message = " ! SurvexDirectory NOT SET in new SurvexFile {} ".format(includelabel) + print(message) + print(message,file=sys.stderr) + models.DataIssue.objects.create(parser='survex', message=message) self.currentsurvexfile.save() # django insists on this although it is already saved !? try: - newsurvexdirectory.save() + newdirectory.save() except: - print(newsurvexdirectory, file=sys.stderr) - print(newsurvexdirectory.primarysurvexfile, file=sys.stderr) + print(newdirectory, file=sys.stderr) + print(newdirectory.primarysurvexfile, file=sys.stderr) raise - self.currentsurvexfile = newsurvexfile + self.currentsurvexfile = newfile def ProcessIncludeLine(self, included): svxid = included.groups()[0] @@ -418,129 +443,6 @@ class LoadingSurvex(): if cave: survexfile.cave = cave - def RecursiveRecursiveLoad(self, survexblock, survexfile, fin): - """Follows the *include links in all the survex files from the root file 1623.svx - and reads in the survex blocks, other data and the wallet references (scansfolder) as it - goes. This part of the data include process is where the maximum memory is used and where it - crashes on memory-constrained machines. Begin-end blocks may also be nested. - """ - # self.LoadSurvexSetup(survexblock, survexfile) - # insp =self.insp - # previousnlegs = 0 - - # svxlines = fin.read().splitlines() - # # cannot close file now as may be recursively called with the same fin if nested *begin-end - - # for svxline in svxlines: - # self.lineno += 1 - # sline, comment = self.rx_comment.match(svxline.strip()).groups() - # if comment: - # self.LoadSurvexComment(survexblock, comment) - # if not sline: - # continue # skip blank lines - - # # detect the star command - # mstar = self.rx_star.match(sline) - # if mstar: # yes we are reading a *cmd - # cmd, args = mstar.groups() - # cmd = cmd.lower() - # if re.match("include$(?i)", cmd): - # cave = self.IdentifyCave(args) - # if cave: - # survexfile.cave = cave - - # includepath = os.path.normpath(os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", args))) - # print((insp+' - INCLUDE-go path found, including - ' + args)) - - # includesurvexfile = models_survex.SurvexFile(path=includepath) - # includesurvexfile.save() - # includesurvexfile.SetDirectory() - # if includesurvexfile.exists(): - # survexblock.save() - # self.insp += "> " - # #-------------------------------------------------------- - # fininclude = includesurvexfile.OpenFile() - # self.RecursiveRecursiveLoad(survexblock, includesurvexfile, fininclude) - # fininclude.close() - # #-------------------------------------------------------- - # self.insp = self.insp[2:] - # insp = self.insp - # print((insp+' - INCLUDE-return from include - ' + includepath)) - # else: - # print((insp+' ! ERROR *include file not found for %s' % includesurvexfile)) - - # elif re.match("begin$(?i)", cmd): - # # On a *begin statement we start a new survexblock. - # # There should not be any *include inside a begin-end block, so this is a simple - # # load not a recursive fileload. But there may be many blocks nested to any depth in one file. - # if args: - # newsvxpath = os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", args)) - # # Try to find the cave in the DB if not use the string as before - # path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", newsvxpath) - # if path_match: - # pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2)) - # # print(insp+pos_cave) - # cave = models_caves.getCaveByReference(pos_cave) - # if cave: - # survexfile.cave = cave - # else: - # print((insp+' - No match (b) for %s' % newsvxpath)) - - # previousnlegs = self.survexlegsnumber - # name = args.lower() - # print(insp+' - Begin found for:{}, creating new SurvexBlock '.format(name)) - # # the recursive call re-reads the entire file. This is wasteful. We should pass in only - # # the un-parsed part of the file. - # survexblockdown = models_survex.SurvexBlock(name=name, parent=survexblock, - # survexpath=survexblock.survexpath+"."+name, - # cave=survexfile.cave, survexfile=survexfile, - # legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0) - # survexblockdown.save() - # survexblock.save() - # survexblock = survexblockdown - # print(insp+" - BLOCK-enter nested *begin/*end block: '{}'".format(name)) - # self.insp += "> " - # #-------------------------------------------------------- - # self.RecursiveRecursiveLoad(survexblockdown, survexfile, fin) - # #-------------------------------------------------------- - # # do not close the file as there may be more blocks in this one - # # and it is re-read afresh with every nested begin-end block. - # self.insp = self.insp[2:] - # insp = self.insp - # else: - # self.depthbegin += 1 - - # elif re.match("end$(?i)", cmd): - # if self.depthbegin: - # print(insp+" - BLOCK-return from nested *begin/*end block: '{}'".format(args)) - # self.depthbegin -= 1 - # else: - # legsinblock = self.survexlegsnumber - previousnlegs - # print(insp+" - LEGS: {} (previous: {}, now:{})".format(legsinblock,previousnlegs,self.survexlegsnumber)) - # survexblock.legsall = legsinblock - # survexblock.save() - # return - # elif cmd == "ref": - # self.LoadSurvexRef(survexblock, args) - # elif cmd == "flags": - # self.LoadSurvexFlags(args, cmd) - # elif cmd == "data": - # self.LoadSurvexDataCmd(survexblock, args) - # elif cmd == "set" and re.match("names(?i)", args): - # pass - # elif re.match("date$(?i)", cmd): - # self.LoadSurvexDate(survexblock, args) - # elif re.match("team$(?i)", cmd): - # self.LoadSurvexTeam(survexblock, args) - # else: - # self.LoadSurvexIgnore(survexblock, args, cmd) - # else: # not a *cmd so we are reading data OR rx_comment failed - # if "from" in self.stardata: # only interested in survey legs - # self.LoadSurvexLineLeg(survexblock, svxline, sline, comment) - # else: - # pass # ignore all other sorts of data - pass - def LinearRecursiveLoad(self, survexblock, path, svxlines): """Loads a single survex file. Usually used to import all the survex files which have been collated into a single file. Loads the begin/end blocks recursively. @@ -702,7 +604,9 @@ class LoadingSurvex(): flinear.write("{:2} {} *include {}\n".format(self.depthinclude, indent, includesurvexfile.path)) push = includesurvexfile.path.lower() self.stackinclude.append(push) + #----------------- self.RecursiveScan(survexblock, includesurvexfile, fininclude, flinear, fcollate) + #----------------- pop = self.stackinclude.pop() if pop != push: message = "!!!!!!! ERROR pop != push {} != {} {}".format(pop, push, self.stackinclude) @@ -710,8 +614,9 @@ class LoadingSurvex(): print(message,file=flinear) print(message,file=sys.stderr) models.DataIssue.objects.create(parser='survex', message=message) - flinear.write("{:2} {} *edulcni {}\n".format(self.depthinclude, indent, includesurvexfile.path)) - fcollate.write(";*edulcni {}\n".format(includesurvexfile.path)) + includesurvexfile = None + flinear.write("{:2} {} *edulcni {}\n".format(self.depthinclude, indent, pop)) + fcollate.write(";*edulcni {}\n".format(pop)) fininclude.close() self.depthinclude -= 1 #-------------------------------------------------------- @@ -820,16 +725,6 @@ def FindAndLoadSurvex(survexblockroot): mem1 = models.get_process_memory() svx_load = None - # print('\n - Loading All Survex Blocks (RecursiveRecursive)',file=sys.stderr) - # svxlrl = LoadingSurvex() - - # finroot = survexfileroot.OpenFile() - # svxlrl.RecursiveRecursiveLoad(survexblockroot, survexfileroot, finroot) - # finroot.close() - # survexlegsnumber = svxlrl.survexlegsnumber - # survexlegsalllength = svxlrl.survexlegsalllength - # svxlrl = None - # Close the logging file, Restore sys.stdout to our old saved file handle sys.stdout.close() print("+", file=sys.stderr)