From 28d1092956bf53519c7aed29181c146c323338a7 Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Fri, 8 Sep 2023 01:26:01 +0300 Subject: [PATCH] Removing unneeded svx from parse list --- .gitignore | 1 + core/models/survex.py | 61 +-------------- parsers/logbooks.py | 2 +- parsers/survex.py | 167 ++++++++++++++++++----------------------- settings.py | 11 +-- templates/svxfile.html | 2 +- 6 files changed, 81 insertions(+), 163 deletions(-) diff --git a/.gitignore b/.gitignore index e629ee9..68c1219 100644 --- a/.gitignore +++ b/.gitignore @@ -72,3 +72,4 @@ pydebianbullseye javascript +_troggle_import_root.svx diff --git a/core/models/survex.py b/core/models/survex.py index 84255d7..9a5145c 100644 --- a/core/models/survex.py +++ b/core/models/survex.py @@ -10,31 +10,6 @@ from django.urls import reverse # from troggle.core.models.troggle import DataIssue # circular import. Hmm - -# class SurvexDirectory(models.Model): - # """This relates a survexfile (identified by path) to the primary SurvexFile - # which is the 'head' of the survex tree for that cave. - # Surely this could just be a property of Cave ? No. Several subdirectories - # all relate to the same Cave. - - # But it *could* be a property of SurvexFile - # """ - # path = models.CharField(max_length=200) - # primarysurvexfile = models.ForeignKey( - # "SurvexFile", related_name="primarysurvexfile", blank=True, null=True, on_delete=models.SET_NULL - # ) - - # class Meta: - # ordering = ("id",) - # verbose_name_plural = "Survex directories" - - # def contents(self): - # return "[SvxDir:" + str(self.path) + " | Primary svx:" + str(self.primarysurvexfile.path) + ".svx ]" - - # def __str__(self): - # return "[SvxDir:" + str(self.path)+ "]" - - class SurvexFile(models.Model): path = models.CharField(max_length=200) #survexdirectory = models.ForeignKey("SurvexDirectory", blank=True, null=True, on_delete=models.SET_NULL) @@ -56,19 +31,6 @@ class SurvexFile(models.Model): fname = Path(settings.SURVEX_DATA, self.path + ".svx") return fname.is_file() - # def SetDirectory(self): - # dirpath = os.path.split(self.path)[0] - # # pointless search every time we import a survex file if we know there are no duplicates.. - # # don't use this for initial import. - # survexdirectorylist = SurvexDirectory.objects.filter(cave=self.cave, path=dirpath) - # if survexdirectorylist: - # self.survexdirectory = survexdirectorylist[0] - # else: - # survexdirectory = SurvexDirectory(path=dirpath, cave=self.cave, primarysurvexfile=self) - # survexdirectory.save() - # self.survexdirectory = survexdirectory - # self.save() - # Don't change from the default as that breaks troggle webpages and internal referencing! # def __str__(self): # return "[SurvexFile:"+str(self.path) + "-" + str(self.survexdirectory) + "-" + str(self.cave)+"]" @@ -97,17 +59,6 @@ class SurvexStation(models.Model): y = models.FloatField(blank=True, null=True) z = models.FloatField(blank=True, null=True) - # def path(self): - # r = self.name - # b = self.block - # while True: - # if b.name: - # r = b.name + "." + r - # if b.parent: - # b = b.parent - # else: - # return r - class Meta: ordering = ("id",) @@ -119,7 +70,7 @@ class SurvexStation(models.Model): import math -def utmToLatLng(zone, easting, northing, northernHemisphere=True): +def utmToLatLng(zone, easting, northing, northernHemisphere=True): # move this to utils.py ? if not northernHemisphere: northing = 10000000 - northing @@ -206,10 +157,10 @@ class SurvexBlock(models.Model): date = models.DateField(blank=True, null=True) expedition = models.ForeignKey("Expedition", blank=True, null=True, on_delete=models.SET_NULL) - # if the survexfile object is deleted, then all teh suvex-blocks in it should be too, + # if the survexfile object is deleted, then all the suvex-blocks in it should be too, # though a block can span more than one file... survexfile = models.ForeignKey("SurvexFile", blank=True, null=True, on_delete=models.CASCADE) - survexpath = models.CharField(max_length=200) # the path for the survex stations + # survexpath = models.CharField(max_length=200, blank=True, null=True) No need for this anymore scanswallet = models.ForeignKey( "Wallet", null=True, on_delete=models.SET_NULL @@ -221,9 +172,6 @@ class SurvexBlock(models.Model): class Meta: ordering = ("id",) - # def __str__(self): - # return "[SurvexBlock:" + str(self.name) + "-path:" + str(self.survexpath) + "-cave:" + str(self.cave) + "]" - def __str__(self): return self.name and str(self.name) or "no_name-#" + str(self.id) @@ -244,7 +192,6 @@ class SurvexBlock(models.Model): if index not in range(0, mx): print(f"DayIndex: More than {mx-1} SurvexBlock items on one day '{index}' {self}, restarting colour sequence.") index = index % mx - # return list(self.survexblock_set.all()).index(self) return index @@ -253,7 +200,7 @@ class SurvexPersonRole(models.Model): is deleted too """ survexblock = models.ForeignKey("SurvexBlock", on_delete=models.CASCADE) - # increasing levels of precision, Surely we only need survexblock and person now that we have no link to a logbook entry? + # increasing levels of precision, Surely we only need survexblock and (either person or personexpedition)? personname = models.CharField(max_length=100) person = models.ForeignKey("Person", blank=True, null=True, on_delete=models.CASCADE) # not needed personexpedition = models.ForeignKey("PersonExpedition", blank=True, null=True, on_delete=models.SET_NULL) diff --git a/parsers/logbooks.py b/parsers/logbooks.py index 11a7ad2..50324a1 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -160,7 +160,7 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None): guests.append(nickname_used) if known_foreigner(nickname_used): message = f" ! - {expedition.year} Known foreigner: '{nickname_used}' in entry {tid=}" - print(message) + # print(message) else: message = f" ! - {expedition.year} No name match for: '{nickname_used}' in entry {tid=} for this year." print(message) diff --git a/parsers/survex.py b/parsers/survex.py index fb2bbbc..d712394 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -4,7 +4,7 @@ import re import subprocess import sys import time -from datetime import datetime, timezone +from datetime import date, datetime, timezone from pathlib import Path import troggle.settings as settings @@ -44,9 +44,7 @@ todo = """ Also it will be a tiny bit more accurate as these leg lengths are after loop closure fixup. """ survexblockroot = None -survexomitsroot = None ROOTBLOCK = "rootblock" -OMITBLOCK = "omitblock" METRESINFEET = 3.28084 UNSEENS = "_unseens.svx" @@ -211,7 +209,7 @@ def confirm_team_on_trip(survexblock): def check_team_cache(): global trip_team_cache - message = f"! *team CACHEFAIL check_team_cache() called " + message = f"! check_team_cache() called.. " print(message) for block in trip_team_cache: @@ -643,7 +641,7 @@ class LoadingSurvex: expo = self.expos[year] else: expeditions = Expedition.objects.filter(year=year) - if len(expeditions) != 1: + if len(expeditions) > 1: message = ( f"! More than one expedition in year {year} '{line}' ({survexblock}) {survexblock.survexfile.path}" ) @@ -708,11 +706,13 @@ class LoadingSurvex: perps = get_people_on_trip(survexblock) # What, you don't know Judge Dredd slang ? if len(line) > 10: - message = "! DATE Warning LONG DATE '{}' ({}) {}".format(oline, survexblock, survexblock.survexfile.path) - print(self.insp+message) - stash_data_issue(parser='svxdate', message=message, url=None, sb=(survexblock.survexfile.path)) if line[10] == "-": # ie a range, just look at first date line = line[0:10] + else: + message = f"! DATE Warning LONG DATE '{oline}' ({survexblock}) {survexblock.survexfile.path}" + print(self.insp+message) + stash_data_issue(parser='svxdate', message=message, url=None, sb=(survexblock.survexfile.path)) + if len(line) == 10: year = line[:4] @@ -1220,18 +1220,15 @@ class LoadingSurvex: # It is too late to add it to the pending caves list here, they were already # processed in parsers/caves.py So we have to do a bespoke creation. - cave = create_new_cave(includelabel) + svxpath= includelabel + cave = create_new_cave(svxpath) message = f" ! Warning: cave identifier '{caveid}'or {id} (guessed from file path) is not a known cave. Need to add to expoweb/cave_data/pendingcaves.txt ? In '{includelabel}.svx' at depth:[{len(depth)}]." print("\n" + message) print("\n" + message, file=sys.stderr) print(f"{self.pending}", end="", file=sys.stderr) stash_data_issue(parser="survex", message=message, url=None, sb=(includelabel)) - - # It is too late to add it to pending caves here, they were already processed in parsers/caves.py - # and something else is creating them... - # cave = create_new_cave(includelabel) - + def LoadSurvexFile(self, svxid): """Creates SurvexFile in the database, and SurvexDirectory if needed Creates a new current survexfile and valid .survexdirectory @@ -1408,7 +1405,7 @@ class LoadingSurvex: expoyear = str(survexblock.date.year) except: print(f">> why is survexblock not set ?! in LoadSurvexQM()/n {survexblock.survexfile.path}") - expoyear = "1970" + expoyear = settings.EPOCH.year # 1970 @@ -1667,7 +1664,7 @@ class LoadingSurvex: newsurvexblock = SurvexBlock( name=blkid, parent=survexblock, - survexpath=pathlist, + # survexpath=pathlist, # use the debug file, not this, for debugging survexfile=self.currentsurvexfile, legsall=0, legslength=0.0, @@ -1913,20 +1910,16 @@ class LoadingSurvex: if path in self.svxfileslist: # We have already used os.normpath() so this is OK. "/../" and "//" have been simplified already. - if stop_dup_warning: - # print("D",end="", file=sys.stderr) - pass - else: - message = f" * Warning. Duplicate detected. We have already seen this *include '{path}' from another survex file. Detected at callcount:{self.callcount} depth:{self.depthinclude}" - print(message) - print(message, file=flinear) - # print(message,file=sys.stderr) - stash_data_issue(parser="survex", message=message, url=None, sb=(path)) + message = f" * Warning. Duplicate detected. We have already seen this *include '{path}' from another survex file. Detected at callcount:{self.callcount} depth:{self.depthinclude}" + print(message) + print(message, file=flinear) + # print(message,file=sys.stderr) + stash_data_issue(parser="survex", message=message, url=None, sb=(path)) if self.svxfileslist.count(path) > 2: message = f" ! ERROR. Should have been caught before this. Survex file already *included 2x. Probably an infinite loop so fix your *include statements that include this. Aborting. {path}" print(message) print(message, file=flinear) - # print(message,file=sys.stderr) + print(message,file=sys.stderr) stash_data_issue(parser="survex", message=message, url=None, sb=(path)) return return @@ -2001,9 +1994,9 @@ class LoadingSurvex: text=True, ) if sp.returncode != 0: - message = f" ! Error running {settings.CAVERN}: {fullpath}" + message = f" ! Error when running {settings.CAVERN}: {fullpath}" url = f"/survexfile{fullpath}.svx".replace(str(settings.SURVEX_DATA), "") - stash_data_issue(parser="xEntrances", message=message, url=url) + stash_data_issue(parser="survex", message=message, url=url) print(message) print( "stderr:\n\n" + str(sp.stderr) + "\n\n" + str(sp.stdout) + "\n\nreturn code: " + str(sp.returncode) @@ -2069,21 +2062,37 @@ class LoadingSurvex: runcavern() -def FindAndLoadSurvex(survexblockroot): +def FindAndLoadSurvex(): """Follows the *include links successively to find survex files This proceeds in 3 phases: 1. The root survex file is read and all the *include files are found, using PushdownStackScan() 2. All the other survex files in the :loser: repo are found, and their *includes found, using another PushdownStackScan() [duplicates omitted] 3. The combined expanded file containing all the survex data is parsed as a single file, - using LinearLoad()""" - global stop_dup_warning + using LinearLoad() + """ + + def make_survexblockroot(): + survexfileroot = SurvexFile( + path=settings.SURVEX_TOPNAME, cave=None) + survexfileroot.save() + survexblockroot = SurvexBlock( + name=ROOTBLOCK, survexfile=survexfileroot, legsall=0, legslength=0.0) + # crashes here sometimes on MariaDB complaining that cave_id should not be null. But it should be. + # django.db.utils.IntegrityError: (1048, "Column 'cave_id' cannot be null") + # fix by restarting db on server + # sudo service mariadb stop + # sudo service mariadb start + survexblockroot.save() + return survexblockroot + print(" - redirecting stdout to svxblks.log...") stdout_orig = sys.stdout # Redirect sys.stdout to the file sys.stdout = open("svxblks.log", "w") print(f" - Scanning Survex Blocks tree from {settings.SURVEX_TOPNAME}.svx ...", file=sys.stderr) + survexblockroot = make_survexblockroot() survexfileroot = survexblockroot.survexfile # i.e. SURVEX_TOPNAME only collatefilename = "_" + survexfileroot.path + ".svx" @@ -2169,24 +2178,29 @@ def FindAndLoadSurvex(survexblockroot): file=sys.stderr, ) + # These exceptions WILL be parsed if the are *included by any file which is not excepted unseensroot = re.sub(r"\.svx$", "", UNSEENS) - excpts = ["surface/terrain", "kataster/kataster-boundaries", "template", "docs", "deprecated", "subsections", unseensroot] - removals = [] + excpts = ["surface/terrain", "kataster/kataster-boundaries", "gpx/gpx_publish/essentials", "template", "docs", "deprecated", "subsections", "1623-and-1626-no-schoenberg-hs", "1623-and-1624-and-1626-and-1627", "1623-and-1626",unseensroot] + removals = set() for x in unseens: for o in excpts: if str(x).strip().startswith(o): - removals.append(x) + removals.add(x) # special fix for .svx file not actually in survex format unseens.remove(Path("fixedpts/gps/gps00raw")) for x in removals: unseens.remove(x) + print(f" x NOT parsing {x}") print( f"\n - {len(unseens)} survex files found which were not included in main tree. ({len(svx_scan.svxfileslist)} in main tree)", file=sys.stderr, ) + print(f" -- (but ignoring {len(removals)} of them)", file=sys.stderr) + check_team_cache() - print(f" -- Now loading the previously-omitted survex files.", file=sys.stderr) + s_date = date.today().isoformat().replace('-','.') + print(f" -- Now loading the previously-omitted survex files as {UNSEENS} *date {s_date}", file=sys.stderr) print(f" - (except: {excpts})", file=sys.stderr) with open(Path(settings.SURVEX_DATA, UNSEENS), "w") as u: @@ -2196,6 +2210,8 @@ def FindAndLoadSurvex(survexblockroot): u.write(f"; autogenerated by parser/survex.py from databasereset.py on '{datetime.now(timezone.utc)}'\n") u.write(f"; omitting any file beginning with {excpts}\n\n") u.write("*begin troggle_unseens\n") + u.write("*team something Nobody\n") + u.write(f"*date {s_date}\n") u.write("*title \"Collated unseen and unlinked survex files\"\n") for x in sorted(unseens): u.write(f" *include {x}\n") @@ -2227,11 +2243,9 @@ def FindAndLoadSurvex(survexblockroot): fcollate.write(f";*include {UNSEENS}\n") flinear.write(f"{omit_scan.depthinclude:2} {indent} *include {unseensroot}\n") omit_scan.svxpass = omit_scan.ODDS - # stop_dup_warning = True # ---------------------------------------------------------------- omit_scan.PushdownStackScan(survexblockroot, unseensroot, finrootname, flinear, fcollate) # ---------------------------------------------------------------- - # stop_dup_warning = False omit_scan.svxpass = "" flinear.write(f"{omit_scan.depthinclude:2} {indent} *edulcni {unseensroot}\n") @@ -2269,10 +2283,6 @@ def FindAndLoadSurvex(survexblockroot): print("\n - Loading All Survex Blocks (LinearLoad)", file=sys.stderr) svx_load = LoadingSurvex() - #svx_load.survexdict[survexfileroot.survexdirectory] = [] - #svx_load.survexdict[survexfileroot.survexdirectory].append(survexfileroot) - #svx_load.svxdirs[""] = survexfileroot.survexdirectory - # pr2 = cProfile.Profile() # pr2.enable() print(" ", file=sys.stderr, end="") @@ -2298,7 +2308,7 @@ def FindAndLoadSurvex(survexblockroot): mem1 = get_process_memory() print(f" - Number of SurvexDirectories: {len(svx_load.svxprim):,}") - tf = SurvexFile.objects.all().count() + tf = SurvexFile.objects.all().count() - len(removals) print(f" - Number of SurvexFiles: {tf:,}") print(f" - Number of Survex legs: {legsnumber:,}") svx_load = None @@ -2312,7 +2322,7 @@ def display_contents(blocks): for sf in sfs: print(f" SF {sf}") # print(f" SD {sf.survexdirectory} {sf.survexdirectory.cave}") - #print(f" SD {sf.survexdirectory} {sf.survexdirectory.path}") + # print(f" SD {sf.survexdirectory} {sf.survexdirectory.path}") ws = Wallet.objects.filter(survexblock=b) for w in ws: @@ -2328,6 +2338,9 @@ def parse_one_file(fpath): # --------------------------------------in progress-- """Parse just one file. Use when re-loading after editing. NOTE: *include lines are ignored. + But this is fine, they will already be in the system, UNLESS a new *include line is edited in + without also opening that file in the online editor. + In the initial file parsing in databaseReset, the *include expansion is done in an earlier stange than LinearLoad(). By the time LinearLoad() is called, all the *include expansion has happened. @@ -2348,7 +2361,7 @@ def parse_one_file(fpath): # --------------------------------------in progress-- # It is vital that the block has attached the survexfile object which is being parsed. block_dummy = SurvexBlock( - name="dummy", survexpath="", survexfile=svxfileroot, legsall=0, legslength=0.0 + name="dummy", survexfile=svxfileroot, legsall=0, legslength=0.0 ) svxfileroot.save() block_dummy.save() @@ -2356,12 +2369,7 @@ def parse_one_file(fpath): # --------------------------------------in progress-- block_dummy.name = newname block_dummy.save() print(f" - block_dummy now '{block_dummy}' {type(block_dummy)} id={block_dummy.id} f:{block_dummy.survexfile}") - - - # svx_load.survexdict[svxfileroot.survexdirectory] = [] - # svx_load.survexdict[svxfileroot.survexdirectory].append(svxfileroot) - #svx_load.svxdirs[""] = svxfileroot.survexdirectory - + # ---------------------------------------------------------------- svx_load.LinearLoad(block_dummy, svxfileroot.path, fname) # ---------------------------------------------------------------- @@ -2428,7 +2436,7 @@ def parse_one_file(fpath): # --------------------------------------in progress-- if len(sbs)<1: print(f" ! No survex blocks found. Parser failure...") for sb in sbs: - print(f" - {sb.id} re-setting survex block parent{sb=}") + print(f" - {sb.id} re-setting survex block parent {sb=}") sb.parent = existingparent # should be all the same sb.save() @@ -2441,32 +2449,22 @@ def parse_one_file(fpath): # --------------------------------------in progress-- svx_load = None return True -def MakeSurvexFileRoot(): - """Returns a file_object.path = SURVEX_TOPNAME associated with directory_object.path = SURVEX_DATA""" - # find a cave, any cave.. - smk = Cave.objects.filter(kataster_number="000") # returns a list, a QuerySet - - fileroot = SurvexFile(path=settings.SURVEX_TOPNAME, cave=None) - fileroot.save() - return fileroot - - -def MakeFileRoot(fn): - """Returns a file_object.path = _unseens.svx associated with directory_object.path = SURVEX_DATA +def MakeFileRoot(svxpath): + """Returns a file_object.path + Used by the online survex file editor when re-parsing or tries to find the primary survex file for this cave """ - cave = IdentifyCave(fn) + cave = IdentifyCave(svxpath) if not cave: - if fn != UNSEENS: - cave = create_new_cave(fn) - - print(f" - Making/finding a new root survexfile for this import: {fn}") - - fileroot = SurvexFile(path=fn, cave=cave) + if svxpath != UNSEENS: + cave = create_new_cave(svxpath) + + # is this really necessayr ?! + fileroot = SurvexFile(path=svxpath, cave=cave) fileroot.save() - fileroot.cave = cave - print(f" - new fileroot {type(fileroot)} for {fn} with cave {cave} - {fileroot}") - + print(f" - Making/finding a new dummy root survexfile for this import: {svxpath} with cave {cave}") + print(f" - new fileroot {type(fileroot)} for {svxpath} with cave {cave}\n - {fileroot.primary} {fileroot.path} {fileroot.cave} ") + return fileroot @@ -2501,35 +2499,14 @@ def LoadSurvexBlocks(): print(" - survex Data Issues flushed") mem1 = get_process_memory() print(f" - MEM:{mem1:7.2f} MB now ", file=sys.stderr) - - survexfileroot = MakeSurvexFileRoot() - # this next makes a block_object assciated with a file_object.path = SURVEX_TOPNAME - survexblockroot = SurvexBlock( - name=ROOTBLOCK, survexpath="", survexfile=survexfileroot, legsall=0, legslength=0.0 - ) - # crashes here sometimes on MariaDB complaining that cave_id should not be null. But it should be. - # django.db.utils.IntegrityError: (1048, "Column 'cave_id' cannot be null") - # fix by restarting db on server - # sudo service mariadb stop - # sudo service mariadb start - survexblockroot.save() - - omitsfileroot = MakeFileRoot(UNSEENS) - survexomitsroot = SurvexBlock( - name=OMITBLOCK, survexpath="", survexfile=omitsfileroot, legsall=0, legslength=0.0 - ) - survexomitsroot.save() - print(" - Loading Survex Blocks...") memstart = get_process_memory() # ---------------------------------------------------------------- - FindAndLoadSurvex(survexblockroot) + FindAndLoadSurvex() # ---------------------------------------------------------------- memend = get_process_memory() print(f" - MEMORY start:{memstart:.3f} MB end:{memend:.3f} MB increase={memend - memstart:.3f} MB") - survexblockroot.save() - global person_pending_cache for sb in person_pending_cache: if len(person_pending_cache[sb]) > 0: diff --git a/settings.py b/settings.py index 7548c5c..e428b9c 100644 --- a/settings.py +++ b/settings.py @@ -74,15 +74,8 @@ USE_L10N = True FIX_PERMISSIONS = [] # top-level survex file basename (without .svx) -SURVEX_TOPNAME = "1623-and-1626-no-schoenberg-hs" - - -# Caves for which survex files exist, but are not otherwise registered -# replaced (?) by expoweb/cave_data/pendingcaves.txt -# PENDING = ["1626-361", "2007-06", "2009-02", -# "2012-ns-01", "2012-ns-02", "2010-04", "2012-ns-05", "2012-ns-06", -# "2012-ns-07", "2012-ns-08", "2012-ns-12", "2012-ns-14", "2012-ns-15", "2014-bl888", -# "2018-pf-01", "2018-pf-02"] +# SURVEX_TOPNAME = "1623-and-1626-no-schoenberg-hs" +SURVEX_TOPNAME = "troggle_import_root" # same, but without all the 'essentials' gubbins APPEND_SLASH = ( False # never relevant because we have urls that match unknown files and produce an 'edit this page' response diff --git a/templates/svxfile.html b/templates/svxfile.html index 8121c45..d3e18ed 100644 --- a/templates/svxfile.html +++ b/templates/svxfile.html @@ -84,7 +84,7 @@ underground survey length: {{svxlength|floatformat:2}} metres
cave primary survexfile {{survexfile.cave.survex_file}}
directory primary survexfile {{survexfile.primary}}
{% for sb in svxblocks %} -block {{sb}} has parent block {{sb.parent}}
+block {{sb}} has parent block {{sb.parent}} (in file {{sb.parent.survexfile.path}}.svx)
{% empty %} Cannot find any dated survex blocks in this survex file (not looking at *include files).
Report this to a nerd if you think this is incorrect.