diff --git a/core/models/survex.py b/core/models/survex.py index 469628a..cdc8a80 100644 --- a/core/models/survex.py +++ b/core/models/survex.py @@ -27,8 +27,11 @@ class SurvexDirectory(models.Model): ordering = ("id",) verbose_name_plural = "Survex directories" + def contents(self): + return "[SvxDir:" + str(self.path) + " | Primary svx:" + str(self.primarysurvexfile.path) + ".svx ]" + def __str__(self): - return "[SurvexDirectory:" + str(self.path) + " | Primary svx:" + str(self.primarysurvexfile.path) + ".svx ]" + return "[SvxDir:" + str(self.path)+ "]" class SurvexFile(models.Model): @@ -44,13 +47,11 @@ class SurvexFile(models.Model): # return "[SurvexFile:"+str(self.path) + "-" + str(self.survexdirectory) + "-" + str(self.cave)+"]" def exists(self): + """This is only used within the Django templates + """ fname = Path(settings.SURVEX_DATA, self.path + ".svx") return fname.is_file() - def OpenFile(self): - fname = os.path.join(settings.SURVEX_DATA, self.path + ".svx") - return open(fname) - def SetDirectory(self): dirpath = os.path.split(self.path)[0] # pointless search every time we import a survex file if we know there are no duplicates.. @@ -64,6 +65,10 @@ class SurvexFile(models.Model): self.survexdirectory = survexdirectory self.save() + # Don't change from the default as that breaks troggle webpages and internal referencing! + # def __str__(self): + # return "[SurvexFile:"+str(self.path) + "-" + str(self.survexdirectory) + "-" + str(self.cave)+"]" + def __str__(self): return self.path diff --git a/core/views/survex.py b/core/views/survex.py index 57a8d1a..3ab7dd9 100644 --- a/core/views/survex.py +++ b/core/views/survex.py @@ -40,9 +40,14 @@ even though there are dozens of surveys. - Save and re-parse an edited survexfile which already exists in the db, and update all its dependencies (work in progress) + +- overlapping and cross-calling when things fail make this hard to undersand, e.g. svx() and + survexcavessingle() can get called for a survex file depending on whether the URL ends in ".svx" or not, + but each tries to handle the other case too. + """ -survexdatasetpath = Path(settings.SURVEX_DATA) +SVXPATH = Path(settings.SURVEX_DATA) # NB this template text must be identical to that in :loser:/templates/template.svx survextemplatefile = """; *** THIS IS A TEMPLATE FILE NOT WHAT YOU MIGHT BE EXPECTING *** @@ -160,7 +165,7 @@ class SvxForm(forms.Form): template = False def GetDiscCode(self): - fname = survexdatasetpath / (self.data["filename"] + ".svx") + fname = SVXPATH / (self.data["filename"] + ".svx") if not fname.is_file(): print(">>> >>> WARNING - svx file not found, showing TEMPLATE SVX", fname, flush=True) self.template = True @@ -186,7 +191,7 @@ class SvxForm(forms.Form): return difflist def SaveCode(self, rcode): - fname = survexdatasetpath / (self.data["filename"] + ".svx") + fname = SVXPATH / (self.data["filename"] + ".svx") if not fname.is_file(): if re.search(r"\[|\]", rcode): errmsg = "Error: remove all []s from the text.\nEverything inside [] are only template guidance.\n\n" @@ -203,7 +208,7 @@ class SvxForm(forms.Form): fout = open(fname, "w", encoding="utf8", newline="\n") except FileNotFoundError: pth = os.path.dirname(self.data["filename"]) - newpath = survexdatasetpath / pth + newpath = SVXPATH / pth if not os.path.exists(newpath): os.makedirs(newpath) fout = open(fname, "w", encoding="utf8", newline="\n") @@ -232,8 +237,8 @@ class SvxForm(forms.Form): def Process(self): print(">>>>....\n....Processing\n") - froox = os.fspath(survexdatasetpath / (self.data["filename"] + ".svx")) - froog = os.fspath(survexdatasetpath / (self.data["filename"] + ".log")) + froox = os.fspath(SVXPATH / (self.data["filename"] + ".svx")) + froog = os.fspath(SVXPATH / (self.data["filename"] + ".log")) cwd = os.getcwd() os.chdir(os.path.split(froox)[0]) os.system(settings.CAVERN + " --log " + froox) @@ -248,7 +253,7 @@ class SvxForm(forms.Form): # print(message) # print(f'stderr:\n\n' + str(sp.stderr) + '\n\n' + str(sp.stdout) + '\n\nreturn code: ' + str(sp.returncode)) - filepatherr = Path(survexdatasetpath / str(self.data["filename"] + ".err")) + filepatherr = Path(SVXPATH / str(self.data["filename"] + ".err")) if filepatherr.is_file(): if filepatherr.stat().st_size == 0: filepatherr.unlink() # delete empty closure error file @@ -281,11 +286,14 @@ def svx(request, survex_file): also has no difflist. Needs refactoring. Too many piecemeal edits and odd state dependencies. + + On Get does the SAME THING as svxcavesingle but is called when the .svx suffix is MISSING """ warning = False print(survex_file) if survex_file.lower().endswith(".svx"): + #cope with ".svx.svx" bollox survex_file = survex_file[:-4] print(survex_file) @@ -361,7 +369,7 @@ def svx(request, survex_file): # collect all the survex blocks which actually have a valid date if svxfile: - has_3d = (Path(survexdatasetpath) / Path(survex_file + ".3d")).is_file() + has_3d = (Path(SVXPATH) / Path(survex_file + ".3d")).is_file() try: svxblocks = svxfile.survexblock_set.filter(date__isnull=False).order_by('date') except: @@ -438,9 +446,9 @@ def events_on_dates(svxblocks): # The cavern running function. This is NOT where it is run inside the form! see SvxForm.Process() for that def process(survex_file): """This runs cavern only where a .3d, .log or .err file is requested.""" - filepathsvx = survexdatasetpath / str(survex_file + ".svx") + filepathsvx = SVXPATH / str(survex_file + ".svx") cwd = os.getcwd() - os.chdir(os.path.split(os.fspath(survexdatasetpath / survex_file))[0]) + os.chdir(os.path.split(os.fspath(SVXPATH / survex_file))[0]) os.system(settings.CAVERN + " --log " + str(filepathsvx)) os.chdir(cwd) @@ -453,27 +461,27 @@ def process(survex_file): # print(message) # print(f'stderr:\n\n' + str(sp.stderr) + '\n\n' + str(sp.stdout) + '\n\nreturn code: ' + str(sp.returncode)) - filepatherr = Path(survexdatasetpath / str(survex_file + ".err")) + filepatherr = Path(SVXPATH / str(survex_file + ".err")) if filepatherr.is_file(): if filepatherr.stat().st_size == 0: filepatherr.unlink() # delete empty closure error file def threed(request, survex_file): - filepath3d = survexdatasetpath / str(survex_file + ".3d") - survexdatasetpath / str(survex_file + ".log") + filepath3d = SVXPATH / str(survex_file + ".3d") + SVXPATH / str(survex_file + ".log") if filepath3d.is_file(): threed = open(filepath3d, "rb") return HttpResponse(threed, content_type="application/x-aven") else: process(survex_file) # should not need to do this if it already exists, as it should. - log = open(survexdatasetpath / str(survex_file + ".log"), "r", encoding="utf-8") + log = open(SVXPATH / str(survex_file + ".log"), "r", encoding="utf-8") return HttpResponse(log, content_type="text") def svxlog(request, survex_file): """Used for rendering .log files from survex outputtype""" - filepathlog = survexdatasetpath / str(survex_file + ".log") + filepathlog = SVXPATH / str(survex_file + ".log") if not filepathlog.is_file(): process(survex_file) log = open(filepathlog, "r") @@ -481,7 +489,7 @@ def svxlog(request, survex_file): def err(request, survex_file): - filepatherr = survexdatasetpath / str(survex_file + ".err") + filepatherr = SVXPATH / str(survex_file + ".err") if not filepatherr.is_file(): # probably not there because it was empty, but re-run anyway process(survex_file) process(survex_file) @@ -547,7 +555,7 @@ def identifycavedircontents(gcavedir): def get_survexareapath(area): - return survexdatasetpath / str("caves-" + area) + return SVXPATH / str("caves-" + area) # direct local non-database browsing through the svx file repositories @@ -653,7 +661,7 @@ def survexcavesingle(request, survex_cave): # maybe - and _ mixed up, or CUCC-2017- instead of 2017-CUCC-, or CUCC2015DL01 . Let's not get carried away.. # or it might be an exact search for a specific survefile but just missing the '.svx. - if (Path(survexdatasetpath) / Path(survex_cave + ".svx")).is_file(): + if (SVXPATH / Path(survex_cave + ".svx")).is_file(): return svx(request, survex_cave) for unoff in [sc, sc.replace("-", "_"), sc.replace("_", "-"), sc.replace("-", ""), sc.replace("_", "")]: diff --git a/parsers/logbooks.py b/parsers/logbooks.py index 6211630..5a05e0f 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -24,8 +24,6 @@ Parses and imports logbooks in all their wonderful confusion todo = """ - Most of the time is during the database writing (6s out of 8s). -- this is a slow and uncertain function too: cave = getCaveByReference(caveRef) - - profile the code to find bad repetitive things, of which there are many. - attach or link a DataIssue to an individual expo (logbook) so that it can be found and deleted diff --git a/parsers/survex.py b/parsers/survex.py index 0156140..e7340a4 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -8,13 +8,13 @@ from datetime import datetime, timezone from pathlib import Path import troggle.settings as settings -from troggle.core.models.caves import Cave, Entrance +from troggle.core.models.caves import Cave, Entrance, GetCaveLookup from troggle.core.models.logbooks import QM from troggle.core.models.survex import SurvexBlock, SurvexDirectory, SurvexFile, SurvexPersonRole, SurvexStation from troggle.core.models.wallets import Wallet from troggle.core.models.troggle import DataIssue, Expedition from troggle.core.utils import chaosmonkey, get_process_memory -from troggle.parsers.logbooks import GetCaveLookup +#from troggle.parsers.logbooks import GetCaveLookup from troggle.parsers.people import GetPersonExpeditionNameLookup, known_foreigner """Imports the tree of survex files following from a defined root .svx file @@ -63,6 +63,28 @@ class SurvexLeg: tape = 0.0 compass = 0.0 clino = 0.0 + +def IdentifyCave(cavepath): + """Given a file path for a survex file, or a survex-block path, + return the cave object + """ + caveslist = GetCaveLookup() + if cavepath.lower() in caveslist: + return caveslist[cavepath.lower()] + # TO DO - this predates the big revision to Gcavelookup so look at this again carefully + path_match = LoadingSurvex.rx_cave.search(cavepath) # use as Class method + if path_match: + sluggy = f"{path_match.group(1)}-{path_match.group(2)}" + guesses = [sluggy.lower(), path_match.group(2).lower()] + for g in guesses: + if g in caveslist: + caveslist[cavepath] = caveslist[g] + return caveslist[g] + print(f" ! Failed to find cave for {cavepath.lower()}") + else: + # not a cave, but that is fine. + # print(f' ! No regex(standard identifier) cave match for {cavepath.lower()}') + return None def datewallet(w, earliest): """Gets the date of the youngest survexblock associated with the wallet @@ -1093,23 +1115,6 @@ class LoadingSurvex: f" $ flagslist:{flags}", ) - def IdentifyCave(self, cavepath): - if cavepath.lower() in self.caveslist: - return self.caveslist[cavepath.lower()] - # TO DO - this predates the big revision to Gcavelookup so look at this again carefully - path_match = self.rx_cave.search(cavepath) - if path_match: - sluggy = f"{path_match.group(1)}-{path_match.group(2)}" - guesses = [sluggy.lower(), path_match.group(2).lower()] - for g in guesses: - if g in self.caveslist: - self.caveslist[cavepath] = self.caveslist[g] - return self.caveslist[g] - print(f" ! Failed to find cave for {cavepath.lower()}") - else: - # not a cave, but that is fine. - # print(f' ! No regex(standard identifier) cave match for {cavepath.lower()}') - return None def GetSurvexDirectory(self, headpath): """This creates a SurvexDirectory if it has not been seen before, and on creation @@ -1214,7 +1219,7 @@ class LoadingSurvex: newdirectory.save() newfile.survexdirectory = newdirectory self.survexdict[newdirectory].append(newfile) - cave = self.IdentifyCave(headpath) # cave already exists in db + cave = IdentifyCave(headpath) # cave already exists in db if not newdirectory: message = f" ! 'None' SurvexDirectory returned from GetSurvexDirectory({headpath})" @@ -1458,28 +1463,42 @@ class LoadingSurvex: # ;*edulcni means we are returning from an included file if edulcni: self.ProcessEdulcniLine(edulcni) - - def LoadSurvexSetup(self, survexblock, survexfile): - self.depthbegin = 0 - self.datastar = self.datastardefault - print( - self.insp - + f" - MEM:{get_process_memory():.3f} Reading. parent:{survexblock.survexfile.path} <> {survexfile.path} " - ) - self.lineno = 0 - sys.stderr.flush() - self.callcount += 1 - if self.callcount % 10 == 0: - print(".", file=sys.stderr, end="") - if self.callcount % 500 == 0: - print("\n", file=sys.stderr, end="") - # Try to find the cave in the DB if not use the string as before - path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", survexblock.survexfile.path) + + def get_cave(self, path): + """Read the file path to a survex file and guesses the cave + """ + path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", path) if path_match: pos_cave = f"{path_match.group(1)}-{path_match.group(2)}" cave = getCaveByReference(pos_cave) - if cave: - survexfile.cave = cave + return cave + return None + + # def LoadSurvexSetup(self, survexblock, survexfile): + # """REFACTOR to use get_cave() + + # This does not seem to get run at all ?! + # """ + # self.depthbegin = 0 + # self.datastar = self.datastardefault + # print( + # self.insp + # + f" - MEM:{get_process_memory():.3f} Reading. parent:{survexblock.survexfile.path} <> {survexfile.path} " + # ) + # self.lineno = 0 + # sys.stderr.flush() + # self.callcount += 1 + # if self.callcount % 10 == 0: + # print(".", file=sys.stderr, end="") + # if self.callcount % 500 == 0: + # print("\n", file=sys.stderr, end="") + # # Try to find the cave in the DB if not use the string as before + # path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", survexblock.survexfile.path) + # if path_match: + # pos_cave = f"{path_match.group(1)}-{path_match.group(2)}" + # cave = getCaveByReference(pos_cave) + # if cave: + # survexfile.cave = cave def LinearLoad(self, survexblock, path, collatefilename): """Loads a single survex file. Usually used to import all the survex files which have been collated @@ -1497,7 +1516,7 @@ class LoadingSurvex: slengthtotal = 0.0 nlegstotal = 0 self.relativefilename = path - self.IdentifyCave(path) # this will produce null for survex files which are geographic collections + IdentifyCave(path) # this will produce null for survex files which are geographic collections self.currentsurvexfile = survexblock.survexfile self.currentsurvexfile.save() # django insists on this although it is already saved !? @@ -1637,6 +1656,7 @@ class LoadingSurvex: legslength=0.0, ) newsurvexblock.save() + print(f"SB: #{newsurvexblock.id} '{newsurvexblock}' parent:{newsurvexblock.parent} f:{newsurvexblock.survexfile}") newsurvexblock.title = ( "(" + survexblock.title + ")" ) # copy parent inititally, overwrite if it has its own @@ -2306,43 +2326,48 @@ def parse_one_file(fpath): # --------------------------------------in progress-- In the initial file parsing in databaseReset, the *include expansion is done in an earlier stange than LinearLoad(). By the time LinearLoad() is called, all the *include expansion has happened. - - WORK IN PROGRESS. - Works fine for completely new survex file. - - For an edited, pre-existing survex file, - I am having great trouble getting the 'parent' block to work correctly. - It gets overwritten, and then nullified, on repeated SAVE & import. - I should learn how to step through with the debugger. """ - def parse_new_svx(fpath, blockroot=None, svxfileroot=None): + def parse_new_svx(fpath, svx_load, cave, svxfileroot=None): + """We need a dummy survex block which has the survexfile being parsed + as its .survexfile field. But it is used in two ways, it is also + set as the parent block for the new blocks being created. This has to be fixed + later. + This all needs refactoring. + """ if svxfileroot == None: - svxfileroot = MakeFileRoot(fpath) + + svxfileroot = MakeFileRoot(fpath, cave) svxfileroot.save() - if blockroot == None: - newname = "adhoc_" + str(Path(str(svxfileroot)).name) - survexblockparent = SurvexBlock( - name=newname, survexpath="", survexfile=svxfileroot, legsall=0, legslength=0.0 - ) - survexblockparent.save() - blockroot = survexblockparent + # It is vital that the block has attached the survexfile object which is being parsed. + block_dummy = SurvexBlock( + name="dummy", survexpath="", survexfile=svxfileroot, legsall=0, legslength=0.0 + ) + svxfileroot.save() + block_dummy.save() + newname = f"#{block_dummy.id}_" + str(Path(str(svxfileroot)).name) + block_dummy.name = newname + block_dummy.save() + print(f" - block_dummy now '{block_dummy}' {type(block_dummy)} id={block_dummy.id} f:{block_dummy.survexfile}") + svx_load.survexdict[svxfileroot.survexdirectory] = [] svx_load.survexdict[svxfileroot.survexdirectory].append(svxfileroot) svx_load.svxdirs[""] = svxfileroot.survexdirectory # ---------------------------------------------------------------- - svx_load.LinearLoad(blockroot, svxfileroot.path, fname) + svx_load.LinearLoad(block_dummy, svxfileroot.path, fname) # ---------------------------------------------------------------- + # Now we don't need or want the dummy any more + block_dummy.delete() + global svx_load print(f"\n - Loading One Survex file '{fpath}'", file=sys.stderr) svx_load = LoadingSurvex() svx_load.survexdict = {} fname = Path(settings.SURVEX_DATA, (fpath + ".svx")) - # print(f" - {fname=}") svxs = SurvexFile.objects.filter(path=fpath) if svxs: @@ -2351,67 +2376,70 @@ def parse_one_file(fpath): # --------------------------------------in progress-- print(f" - Aborting file parsing & import into database.") return False print(f" - Pre-existing survexfile {svxs}.") - # reparse_existing_svx(svxs) existingsvx = SurvexFile.objects.get(path=fpath) existingcave = existingsvx.cave - print(f" - survexfile is {existingsvx} id={existingsvx.id} {existingcave}") + print(f" - survexfile id={existingsvx.id} {existingsvx} {existingcave}") sbs = existingsvx.survexblock_set.all() existingparent = None parents =set() if sbs: for sb in sbs: - print(f" - cleaning survex block {sb=}") + # print(f" - {sb.id} checking survex block {sb=}") try: if sb.parent: parents.add(sb.parent) + # print(f" - adding {sb.parent=}") except: - print(f" ! FAILURE to access sb.parent {sb=}") - sb.delete() + print(f" ! FAILURE to access sb.parent {sb=}\n ! {sb.parent_id=} ")# \n{dir(sb)} + # even though the parent_id exists.. hmm. + for sb in sbs: + # print(f" - {sb.id} {sb.pk} {sb}") + sb_keep = sb + if sb not in parents: + # print(f" - {sb.id} Deleting survex block {sb=}") + sb.delete() + if parents: - print(f" - set of parent blocks {parents}") + # print(f" - parents get {parents}") if len(parents) > 1: print(f" - WARNING more than one parent survex block!") - existingparent = parents.pop() + existingparent = parents.pop() # removes it + parents.add(existingparent) # restores it - # print(f" - deleting survex file {existingsvx=}") - # existingsvx.delete() - print(f" - Reloading and parsing this survexfile '{fpath}' Loading...") - - parse_new_svx(fpath, blockroot=existingparent, svxfileroot=existingsvx) - - svxs = SurvexFile.objects.filter(path=fpath) - if len(svxs)>1: - print(f" ! Mistake? More than one survex file object in database with the same file-path {svxs}") - print(f" - Aborting file parsing & import into database.") - return False - replacesvx = SurvexFile.objects.get(path=fpath) - replacesvx.cave = existingcave - print(f" - new/replacement survexfile {svxs}. id={replacesvx.id}") - replacesvx.save() + print(f" - Reloading and parsing this survexfile '{fpath}' Loading...") + # Logic is that we need an SB which links to the survexfile we are parsing for the parser + # to work, but we delete all those before we start parsing. Urk. + #=========== + parse_new_svx(fpath, svx_load, existingsvx, svxfileroot=existingsvx) + #=========== + print(f" - survexfile id={existingsvx.id} update ") + if parents: - sbs = replacesvx.survexblock_set.all() + print(f" - parents set {parents}") + sbs = existingsvx.survexblock_set.all() + if len(sbs)<1: + print(f" ! No survex blocks found. Parser failure...") for sb in sbs: - print(f" - re-setting survex block parent{sb=}") + print(f" - {sb.id} re-setting survex block parent{sb=}") sb.parent = existingparent # should be all the same sb.save() else: - print(f" - Not seen this survexfile before '{fpath}' Loading...") - parse_new_svx(fpath) + print(f" - Not seen this survexfile before '{fpath}' Loading. ..") + #=========== + parse_new_svx(fpath,svx_load, IdentifyCave(fpath)) + #=========== - legsnumber = svx_load.legsnumber - - print(f" - Number of SurvexDirectories: {len(svx_load.survexdict):,}") - print(f" - SurvexDirectories: {svx_load.survexdict}") - - tf = 0 - for d in svx_load.survexdict: - tf += len(svx_load.survexdict[d]) - print(f" - Number of SurvexFiles: {tf:,}") - print(f" - Number of Survex legs: {legsnumber:,}") - print(f" - Length of Survex legs: {svx_load.slength:.2f} m") + # print(f" - Number of SurvexDirectories: {len(svx_load.survexdict):,}") + # tf = 0 + # for d in svx_load.survexdict: + # print(f" - SD: {d}") + # tf += len(svx_load.survexdict[d]) + # print(f" - Number of SurvexFiles: {tf:,}") + # print(f" - Number of Survex legs: {svx_load.legsnumber:,}") + # print(f" - Length of Survex legs: {svx_load.slength:.2f} m") svx_load = None return True @@ -2432,15 +2460,18 @@ def MakeSurvexFileRoot(): return fileroot -def MakeFileRoot(fn): +def MakeFileRoot(fn, cave): """Returns a file_object.path = _unseens.svx associated with directory_object.path = SURVEX_DATA CHANGE THIS to just use the same block root as for SURVEX_TOPNAME ? """ - print(f" - making a new root survexfile for this import: {fn}") - fileroot = SurvexFile(path=fn, cave=None) + print(f" - Making a new root survexfile for this import: {fn}") + fileroot = SurvexFile(path=fn, cave=cave) fileroot.survexdirectory = SurvexDirectory.objects.get(id=1) # just re-use the first thing we made fileroot.save() + cave = IdentifyCave(fn) + fileroot.cave = cave + print(f" - new fileroot {type(fileroot)} for {fn} with cave {cave}") return fileroot @@ -2490,7 +2521,7 @@ def LoadSurvexBlocks(): # sudo service mariadb start survexblockroot.save() - omitsfileroot = MakeFileRoot(UNSEENS) + omitsfileroot = MakeFileRoot(UNSEENS, None) survexomitsroot = SurvexBlock( name=OMITBLOCK, survexpath="", survexfile=omitsfileroot, legsall=0, legslength=0.0 )