2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2024-11-24 16:21:53 +00:00

Removing unneeded svx from parse list

This commit is contained in:
Philip Sargent 2023-09-08 01:26:01 +03:00
parent 5fc5c1285a
commit 28d1092956
6 changed files with 81 additions and 163 deletions

1
.gitignore vendored
View File

@ -72,3 +72,4 @@ pydebianbullseye
javascript
_troggle_import_root.svx

View File

@ -10,31 +10,6 @@ from django.urls import reverse
# from troggle.core.models.troggle import DataIssue # circular import. Hmm
# class SurvexDirectory(models.Model):
# """This relates a survexfile (identified by path) to the primary SurvexFile
# which is the 'head' of the survex tree for that cave.
# Surely this could just be a property of Cave ? No. Several subdirectories
# all relate to the same Cave.
# But it *could* be a property of SurvexFile
# """
# path = models.CharField(max_length=200)
# primarysurvexfile = models.ForeignKey(
# "SurvexFile", related_name="primarysurvexfile", blank=True, null=True, on_delete=models.SET_NULL
# )
# class Meta:
# ordering = ("id",)
# verbose_name_plural = "Survex directories"
# def contents(self):
# return "[SvxDir:" + str(self.path) + " | Primary svx:" + str(self.primarysurvexfile.path) + ".svx ]"
# def __str__(self):
# return "[SvxDir:" + str(self.path)+ "]"
class SurvexFile(models.Model):
path = models.CharField(max_length=200)
#survexdirectory = models.ForeignKey("SurvexDirectory", blank=True, null=True, on_delete=models.SET_NULL)
@ -56,19 +31,6 @@ class SurvexFile(models.Model):
fname = Path(settings.SURVEX_DATA, self.path + ".svx")
return fname.is_file()
# def SetDirectory(self):
# dirpath = os.path.split(self.path)[0]
# # pointless search every time we import a survex file if we know there are no duplicates..
# # don't use this for initial import.
# survexdirectorylist = SurvexDirectory.objects.filter(cave=self.cave, path=dirpath)
# if survexdirectorylist:
# self.survexdirectory = survexdirectorylist[0]
# else:
# survexdirectory = SurvexDirectory(path=dirpath, cave=self.cave, primarysurvexfile=self)
# survexdirectory.save()
# self.survexdirectory = survexdirectory
# self.save()
# Don't change from the default as that breaks troggle webpages and internal referencing!
# def __str__(self):
# return "[SurvexFile:"+str(self.path) + "-" + str(self.survexdirectory) + "-" + str(self.cave)+"]"
@ -97,17 +59,6 @@ class SurvexStation(models.Model):
y = models.FloatField(blank=True, null=True)
z = models.FloatField(blank=True, null=True)
# def path(self):
# r = self.name
# b = self.block
# while True:
# if b.name:
# r = b.name + "." + r
# if b.parent:
# b = b.parent
# else:
# return r
class Meta:
ordering = ("id",)
@ -119,7 +70,7 @@ class SurvexStation(models.Model):
import math
def utmToLatLng(zone, easting, northing, northernHemisphere=True):
def utmToLatLng(zone, easting, northing, northernHemisphere=True): # move this to utils.py ?
if not northernHemisphere:
northing = 10000000 - northing
@ -206,10 +157,10 @@ class SurvexBlock(models.Model):
date = models.DateField(blank=True, null=True)
expedition = models.ForeignKey("Expedition", blank=True, null=True, on_delete=models.SET_NULL)
# if the survexfile object is deleted, then all teh suvex-blocks in it should be too,
# if the survexfile object is deleted, then all the suvex-blocks in it should be too,
# though a block can span more than one file...
survexfile = models.ForeignKey("SurvexFile", blank=True, null=True, on_delete=models.CASCADE)
survexpath = models.CharField(max_length=200) # the path for the survex stations
# survexpath = models.CharField(max_length=200, blank=True, null=True) No need for this anymore
scanswallet = models.ForeignKey(
"Wallet", null=True, on_delete=models.SET_NULL
@ -221,9 +172,6 @@ class SurvexBlock(models.Model):
class Meta:
ordering = ("id",)
# def __str__(self):
# return "[SurvexBlock:" + str(self.name) + "-path:" + str(self.survexpath) + "-cave:" + str(self.cave) + "]"
def __str__(self):
return self.name and str(self.name) or "no_name-#" + str(self.id)
@ -244,7 +192,6 @@ class SurvexBlock(models.Model):
if index not in range(0, mx):
print(f"DayIndex: More than {mx-1} SurvexBlock items on one day '{index}' {self}, restarting colour sequence.")
index = index % mx
# return list(self.survexblock_set.all()).index(self)
return index
@ -253,7 +200,7 @@ class SurvexPersonRole(models.Model):
is deleted too
"""
survexblock = models.ForeignKey("SurvexBlock", on_delete=models.CASCADE)
# increasing levels of precision, Surely we only need survexblock and person now that we have no link to a logbook entry?
# increasing levels of precision, Surely we only need survexblock and (either person or personexpedition)?
personname = models.CharField(max_length=100)
person = models.ForeignKey("Person", blank=True, null=True, on_delete=models.CASCADE) # not needed
personexpedition = models.ForeignKey("PersonExpedition", blank=True, null=True, on_delete=models.SET_NULL)

View File

@ -160,7 +160,7 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
guests.append(nickname_used)
if known_foreigner(nickname_used):
message = f" ! - {expedition.year} Known foreigner: '{nickname_used}' in entry {tid=}"
print(message)
# print(message)
else:
message = f" ! - {expedition.year} No name match for: '{nickname_used}' in entry {tid=} for this year."
print(message)

View File

@ -4,7 +4,7 @@ import re
import subprocess
import sys
import time
from datetime import datetime, timezone
from datetime import date, datetime, timezone
from pathlib import Path
import troggle.settings as settings
@ -44,9 +44,7 @@ todo = """
Also it will be a tiny bit more accurate as these leg lengths are after loop closure fixup.
"""
survexblockroot = None
survexomitsroot = None
ROOTBLOCK = "rootblock"
OMITBLOCK = "omitblock"
METRESINFEET = 3.28084
UNSEENS = "_unseens.svx"
@ -211,7 +209,7 @@ def confirm_team_on_trip(survexblock):
def check_team_cache():
global trip_team_cache
message = f"! *team CACHEFAIL check_team_cache() called "
message = f"! check_team_cache() called.. "
print(message)
for block in trip_team_cache:
@ -643,7 +641,7 @@ class LoadingSurvex:
expo = self.expos[year]
else:
expeditions = Expedition.objects.filter(year=year)
if len(expeditions) != 1:
if len(expeditions) > 1:
message = (
f"! More than one expedition in year {year} '{line}' ({survexblock}) {survexblock.survexfile.path}"
)
@ -708,11 +706,13 @@ class LoadingSurvex:
perps = get_people_on_trip(survexblock) # What, you don't know Judge Dredd slang ?
if len(line) > 10:
message = "! DATE Warning LONG DATE '{}' ({}) {}".format(oline, survexblock, survexblock.survexfile.path)
print(self.insp+message)
stash_data_issue(parser='svxdate', message=message, url=None, sb=(survexblock.survexfile.path))
if line[10] == "-": # ie a range, just look at first date
line = line[0:10]
else:
message = f"! DATE Warning LONG DATE '{oline}' ({survexblock}) {survexblock.survexfile.path}"
print(self.insp+message)
stash_data_issue(parser='svxdate', message=message, url=None, sb=(survexblock.survexfile.path))
if len(line) == 10:
year = line[:4]
@ -1220,18 +1220,15 @@ class LoadingSurvex:
# It is too late to add it to the pending caves list here, they were already
# processed in parsers/caves.py So we have to do a bespoke creation.
cave = create_new_cave(includelabel)
svxpath= includelabel
cave = create_new_cave(svxpath)
message = f" ! Warning: cave identifier '{caveid}'or {id} (guessed from file path) is not a known cave. Need to add to expoweb/cave_data/pendingcaves.txt ? In '{includelabel}.svx' at depth:[{len(depth)}]."
print("\n" + message)
print("\n" + message, file=sys.stderr)
print(f"{self.pending}", end="", file=sys.stderr)
stash_data_issue(parser="survex", message=message, url=None, sb=(includelabel))
# It is too late to add it to pending caves here, they were already processed in parsers/caves.py
# and something else is creating them...
# cave = create_new_cave(includelabel)
def LoadSurvexFile(self, svxid):
"""Creates SurvexFile in the database, and SurvexDirectory if needed
Creates a new current survexfile and valid .survexdirectory
@ -1408,7 +1405,7 @@ class LoadingSurvex:
expoyear = str(survexblock.date.year)
except:
print(f">> why is survexblock not set ?! in LoadSurvexQM()/n {survexblock.survexfile.path}")
expoyear = "1970"
expoyear = settings.EPOCH.year # 1970
@ -1667,7 +1664,7 @@ class LoadingSurvex:
newsurvexblock = SurvexBlock(
name=blkid,
parent=survexblock,
survexpath=pathlist,
# survexpath=pathlist, # use the debug file, not this, for debugging
survexfile=self.currentsurvexfile,
legsall=0,
legslength=0.0,
@ -1913,20 +1910,16 @@ class LoadingSurvex:
if path in self.svxfileslist:
# We have already used os.normpath() so this is OK. "/../" and "//" have been simplified already.
if stop_dup_warning:
# print("D",end="", file=sys.stderr)
pass
else:
message = f" * Warning. Duplicate detected. We have already seen this *include '{path}' from another survex file. Detected at callcount:{self.callcount} depth:{self.depthinclude}"
print(message)
print(message, file=flinear)
# print(message,file=sys.stderr)
stash_data_issue(parser="survex", message=message, url=None, sb=(path))
message = f" * Warning. Duplicate detected. We have already seen this *include '{path}' from another survex file. Detected at callcount:{self.callcount} depth:{self.depthinclude}"
print(message)
print(message, file=flinear)
# print(message,file=sys.stderr)
stash_data_issue(parser="survex", message=message, url=None, sb=(path))
if self.svxfileslist.count(path) > 2:
message = f" ! ERROR. Should have been caught before this. Survex file already *included 2x. Probably an infinite loop so fix your *include statements that include this. Aborting. {path}"
print(message)
print(message, file=flinear)
# print(message,file=sys.stderr)
print(message,file=sys.stderr)
stash_data_issue(parser="survex", message=message, url=None, sb=(path))
return
return
@ -2001,9 +1994,9 @@ class LoadingSurvex:
text=True,
)
if sp.returncode != 0:
message = f" ! Error running {settings.CAVERN}: {fullpath}"
message = f" ! Error when running {settings.CAVERN}: {fullpath}"
url = f"/survexfile{fullpath}.svx".replace(str(settings.SURVEX_DATA), "")
stash_data_issue(parser="xEntrances", message=message, url=url)
stash_data_issue(parser="survex", message=message, url=url)
print(message)
print(
"stderr:\n\n" + str(sp.stderr) + "\n\n" + str(sp.stdout) + "\n\nreturn code: " + str(sp.returncode)
@ -2069,21 +2062,37 @@ class LoadingSurvex:
runcavern()
def FindAndLoadSurvex(survexblockroot):
def FindAndLoadSurvex():
"""Follows the *include links successively to find survex files
This proceeds in 3 phases:
1. The root survex file is read and all the *include files are found, using PushdownStackScan()
2. All the other survex files in the :loser: repo are found, and their *includes found,
using another PushdownStackScan() [duplicates omitted]
3. The combined expanded file containing all the survex data is parsed as a single file,
using LinearLoad()"""
global stop_dup_warning
using LinearLoad()
"""
def make_survexblockroot():
survexfileroot = SurvexFile(
path=settings.SURVEX_TOPNAME, cave=None)
survexfileroot.save()
survexblockroot = SurvexBlock(
name=ROOTBLOCK, survexfile=survexfileroot, legsall=0, legslength=0.0)
# crashes here sometimes on MariaDB complaining that cave_id should not be null. But it should be.
# django.db.utils.IntegrityError: (1048, "Column 'cave_id' cannot be null")
# fix by restarting db on server
# sudo service mariadb stop
# sudo service mariadb start
survexblockroot.save()
return survexblockroot
print(" - redirecting stdout to svxblks.log...")
stdout_orig = sys.stdout
# Redirect sys.stdout to the file
sys.stdout = open("svxblks.log", "w")
print(f" - Scanning Survex Blocks tree from {settings.SURVEX_TOPNAME}.svx ...", file=sys.stderr)
survexblockroot = make_survexblockroot()
survexfileroot = survexblockroot.survexfile # i.e. SURVEX_TOPNAME only
collatefilename = "_" + survexfileroot.path + ".svx"
@ -2169,24 +2178,29 @@ def FindAndLoadSurvex(survexblockroot):
file=sys.stderr,
)
# These exceptions WILL be parsed if the are *included by any file which is not excepted
unseensroot = re.sub(r"\.svx$", "", UNSEENS)
excpts = ["surface/terrain", "kataster/kataster-boundaries", "template", "docs", "deprecated", "subsections", unseensroot]
removals = []
excpts = ["surface/terrain", "kataster/kataster-boundaries", "gpx/gpx_publish/essentials", "template", "docs", "deprecated", "subsections", "1623-and-1626-no-schoenberg-hs", "1623-and-1624-and-1626-and-1627", "1623-and-1626",unseensroot]
removals = set()
for x in unseens:
for o in excpts:
if str(x).strip().startswith(o):
removals.append(x)
removals.add(x)
# special fix for .svx file not actually in survex format
unseens.remove(Path("fixedpts/gps/gps00raw"))
for x in removals:
unseens.remove(x)
print(f" x NOT parsing {x}")
print(
f"\n - {len(unseens)} survex files found which were not included in main tree. ({len(svx_scan.svxfileslist)} in main tree)",
file=sys.stderr,
)
print(f" -- (but ignoring {len(removals)} of them)", file=sys.stderr)
check_team_cache()
print(f" -- Now loading the previously-omitted survex files.", file=sys.stderr)
s_date = date.today().isoformat().replace('-','.')
print(f" -- Now loading the previously-omitted survex files as {UNSEENS} *date {s_date}", file=sys.stderr)
print(f" - (except: {excpts})", file=sys.stderr)
with open(Path(settings.SURVEX_DATA, UNSEENS), "w") as u:
@ -2196,6 +2210,8 @@ def FindAndLoadSurvex(survexblockroot):
u.write(f"; autogenerated by parser/survex.py from databasereset.py on '{datetime.now(timezone.utc)}'\n")
u.write(f"; omitting any file beginning with {excpts}\n\n")
u.write("*begin troggle_unseens\n")
u.write("*team something Nobody\n")
u.write(f"*date {s_date}\n")
u.write("*title \"Collated unseen and unlinked survex files\"\n")
for x in sorted(unseens):
u.write(f" *include {x}\n")
@ -2227,11 +2243,9 @@ def FindAndLoadSurvex(survexblockroot):
fcollate.write(f";*include {UNSEENS}\n")
flinear.write(f"{omit_scan.depthinclude:2} {indent} *include {unseensroot}\n")
omit_scan.svxpass = omit_scan.ODDS
# stop_dup_warning = True
# ----------------------------------------------------------------
omit_scan.PushdownStackScan(survexblockroot, unseensroot, finrootname, flinear, fcollate)
# ----------------------------------------------------------------
# stop_dup_warning = False
omit_scan.svxpass = ""
flinear.write(f"{omit_scan.depthinclude:2} {indent} *edulcni {unseensroot}\n")
@ -2269,10 +2283,6 @@ def FindAndLoadSurvex(survexblockroot):
print("\n - Loading All Survex Blocks (LinearLoad)", file=sys.stderr)
svx_load = LoadingSurvex()
#svx_load.survexdict[survexfileroot.survexdirectory] = []
#svx_load.survexdict[survexfileroot.survexdirectory].append(survexfileroot)
#svx_load.svxdirs[""] = survexfileroot.survexdirectory
# pr2 = cProfile.Profile()
# pr2.enable()
print(" ", file=sys.stderr, end="")
@ -2298,7 +2308,7 @@ def FindAndLoadSurvex(survexblockroot):
mem1 = get_process_memory()
print(f" - Number of SurvexDirectories: {len(svx_load.svxprim):,}")
tf = SurvexFile.objects.all().count()
tf = SurvexFile.objects.all().count() - len(removals)
print(f" - Number of SurvexFiles: {tf:,}")
print(f" - Number of Survex legs: {legsnumber:,}")
svx_load = None
@ -2312,7 +2322,7 @@ def display_contents(blocks):
for sf in sfs:
print(f" SF {sf}")
# print(f" SD {sf.survexdirectory} {sf.survexdirectory.cave}")
#print(f" SD {sf.survexdirectory} {sf.survexdirectory.path}")
# print(f" SD {sf.survexdirectory} {sf.survexdirectory.path}")
ws = Wallet.objects.filter(survexblock=b)
for w in ws:
@ -2328,6 +2338,9 @@ def parse_one_file(fpath): # --------------------------------------in progress--
"""Parse just one file. Use when re-loading after editing.
NOTE: *include lines are ignored.
But this is fine, they will already be in the system, UNLESS a new *include line is edited in
without also opening that file in the online editor.
In the initial file parsing in databaseReset, the *include expansion is done
in an earlier stange than LinearLoad(). By the time LinearLoad() is called,
all the *include expansion has happened.
@ -2348,7 +2361,7 @@ def parse_one_file(fpath): # --------------------------------------in progress--
# It is vital that the block has attached the survexfile object which is being parsed.
block_dummy = SurvexBlock(
name="dummy", survexpath="", survexfile=svxfileroot, legsall=0, legslength=0.0
name="dummy", survexfile=svxfileroot, legsall=0, legslength=0.0
)
svxfileroot.save()
block_dummy.save()
@ -2356,12 +2369,7 @@ def parse_one_file(fpath): # --------------------------------------in progress--
block_dummy.name = newname
block_dummy.save()
print(f" - block_dummy now '{block_dummy}' {type(block_dummy)} id={block_dummy.id} f:{block_dummy.survexfile}")
# svx_load.survexdict[svxfileroot.survexdirectory] = []
# svx_load.survexdict[svxfileroot.survexdirectory].append(svxfileroot)
#svx_load.svxdirs[""] = svxfileroot.survexdirectory
# ----------------------------------------------------------------
svx_load.LinearLoad(block_dummy, svxfileroot.path, fname)
# ----------------------------------------------------------------
@ -2428,7 +2436,7 @@ def parse_one_file(fpath): # --------------------------------------in progress--
if len(sbs)<1:
print(f" ! No survex blocks found. Parser failure...")
for sb in sbs:
print(f" - {sb.id} re-setting survex block parent{sb=}")
print(f" - {sb.id} re-setting survex block parent {sb=}")
sb.parent = existingparent # should be all the same
sb.save()
@ -2441,32 +2449,22 @@ def parse_one_file(fpath): # --------------------------------------in progress--
svx_load = None
return True
def MakeSurvexFileRoot():
"""Returns a file_object.path = SURVEX_TOPNAME associated with directory_object.path = SURVEX_DATA"""
# find a cave, any cave..
smk = Cave.objects.filter(kataster_number="000") # returns a list, a QuerySet
fileroot = SurvexFile(path=settings.SURVEX_TOPNAME, cave=None)
fileroot.save()
return fileroot
def MakeFileRoot(fn):
"""Returns a file_object.path = _unseens.svx associated with directory_object.path = SURVEX_DATA
def MakeFileRoot(svxpath):
"""Returns a file_object.path
Used by the online survex file editor when re-parsing
or tries to find the primary survex file for this cave
"""
cave = IdentifyCave(fn)
cave = IdentifyCave(svxpath)
if not cave:
if fn != UNSEENS:
cave = create_new_cave(fn)
print(f" - Making/finding a new root survexfile for this import: {fn}")
fileroot = SurvexFile(path=fn, cave=cave)
if svxpath != UNSEENS:
cave = create_new_cave(svxpath)
# is this really necessayr ?!
fileroot = SurvexFile(path=svxpath, cave=cave)
fileroot.save()
fileroot.cave = cave
print(f" - new fileroot {type(fileroot)} for {fn} with cave {cave} - {fileroot}")
print(f" - Making/finding a new dummy root survexfile for this import: {svxpath} with cave {cave}")
print(f" - new fileroot {type(fileroot)} for {svxpath} with cave {cave}\n - {fileroot.primary} {fileroot.path} {fileroot.cave} ")
return fileroot
@ -2501,35 +2499,14 @@ def LoadSurvexBlocks():
print(" - survex Data Issues flushed")
mem1 = get_process_memory()
print(f" - MEM:{mem1:7.2f} MB now ", file=sys.stderr)
survexfileroot = MakeSurvexFileRoot()
# this next makes a block_object assciated with a file_object.path = SURVEX_TOPNAME
survexblockroot = SurvexBlock(
name=ROOTBLOCK, survexpath="", survexfile=survexfileroot, legsall=0, legslength=0.0
)
# crashes here sometimes on MariaDB complaining that cave_id should not be null. But it should be.
# django.db.utils.IntegrityError: (1048, "Column 'cave_id' cannot be null")
# fix by restarting db on server
# sudo service mariadb stop
# sudo service mariadb start
survexblockroot.save()
omitsfileroot = MakeFileRoot(UNSEENS)
survexomitsroot = SurvexBlock(
name=OMITBLOCK, survexpath="", survexfile=omitsfileroot, legsall=0, legslength=0.0
)
survexomitsroot.save()
print(" - Loading Survex Blocks...")
memstart = get_process_memory()
# ----------------------------------------------------------------
FindAndLoadSurvex(survexblockroot)
FindAndLoadSurvex()
# ----------------------------------------------------------------
memend = get_process_memory()
print(f" - MEMORY start:{memstart:.3f} MB end:{memend:.3f} MB increase={memend - memstart:.3f} MB")
survexblockroot.save()
global person_pending_cache
for sb in person_pending_cache:
if len(person_pending_cache[sb]) > 0:

View File

@ -74,15 +74,8 @@ USE_L10N = True
FIX_PERMISSIONS = []
# top-level survex file basename (without .svx)
SURVEX_TOPNAME = "1623-and-1626-no-schoenberg-hs"
# Caves for which survex files exist, but are not otherwise registered
# replaced (?) by expoweb/cave_data/pendingcaves.txt
# PENDING = ["1626-361", "2007-06", "2009-02",
# "2012-ns-01", "2012-ns-02", "2010-04", "2012-ns-05", "2012-ns-06",
# "2012-ns-07", "2012-ns-08", "2012-ns-12", "2012-ns-14", "2012-ns-15", "2014-bl888",
# "2018-pf-01", "2018-pf-02"]
# SURVEX_TOPNAME = "1623-and-1626-no-schoenberg-hs"
SURVEX_TOPNAME = "troggle_import_root" # same, but without all the 'essentials' gubbins
APPEND_SLASH = (
False # never relevant because we have urls that match unknown files and produce an 'edit this page' response

View File

@ -84,7 +84,7 @@ underground survey length: {{svxlength|floatformat:2}} metres<br />
cave primary survexfile <a href="/survexfile/{{ survexfile.cave.survex_file }}">{{survexfile.cave.survex_file}}</a> <br>
directory primary survexfile <a href="/survexfile/{{survexfile.primary}}.svx">{{survexfile.primary}}</a> <br />
{% for sb in svxblocks %}
block <em>{{sb}}</em> has parent block <em>{{sb.parent}}</em><br />
block <em>{{sb}}</em> has parent block <em>{{sb.parent}}</em> (in file {{sb.parent.survexfile.path}}.svx)<br />
{% empty %}
Cannot find any <em>dated</em> survex blocks in this survex file (not looking at *include files). <br />
Report this to a nerd if you think this is incorrect.