2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2024-11-22 07:11:52 +00:00

Parse all files, not just those in the *include tree

This commit is contained in:
Philip Sargent 2022-10-05 21:11:18 +03:00
parent 9e5bdace2c
commit 7e47fe1f30
2 changed files with 156 additions and 57 deletions

View File

@ -457,7 +457,7 @@ def readcaves():
DataIssue.objects.filter(parser='caves ok').delete()
DataIssue.objects.filter(parser='entrances').delete()
print(" - Creating Areas 1623, 1624 and 1626")
print(" - Creating Areas 1623, 1624, 1627 and 1626")
# This crashes on the server with MariaDB even though a null parent is explicitly allowed.
area_1623= Area.objects.create(short_name = "1623", super=None)
print(" - Saving Area 1623")
@ -468,6 +468,9 @@ def readcaves():
area_1626= Area.objects.create(short_name = "1626", super=None)
print(" - Saving Area 1626")
area_1626.save()
area_1627= Area.objects.create(short_name = "1627", super=None)
print(" - Saving Area 1627")
area_1627.save()
with transaction.atomic():

View File

@ -6,7 +6,7 @@ import copy
import subprocess
from pathlib import Path
from datetime import datetime, timedelta, date
from datetime import datetime, timedelta, date, timezone
from django.utils.timezone import get_current_timezone
from django.utils.timezone import make_aware
@ -39,9 +39,12 @@ todo = '''Also walk the entire tree in the :loser: repo looking for unconnected
'''
survexblockroot = None
survexomitsroot = None
ROOTBLOCK = "rootblock"
OMITBLOCK = "omitblock"
METRESINFEET = 3.28084
stop_dup_warning = False
debugprint = False # Turns on debug printout for just one *include file
debugprinttrigger = "!"
# debugprinttrigger = "caves-1623/40/old/EisSVH"
@ -182,7 +185,7 @@ class LoadingSurvex():
callcount = 0
caverncount = 0
ignoreprefix = ["surface", "kataster", "fixedpts", "gpx"]
ignorenoncave = ["caves-1623", "caves-1626", "caves-1623/2007-neu"]
ignorenoncave = ["caves-1623", "caves-1623/2007-NEU","caves-1626", "caves-1624", "caves-1627", "fixedpts/gps/gps00raw", ""]
includedfilename =""
currentsurvexblock = None
currentsurvexfile = None
@ -344,7 +347,7 @@ class LoadingSurvex():
the rest is discarded after error-checking.
Now skipping the error checking - returns as soon as the leg is not one we count.
REPLACE ALL THIS by reading the .log output of cavern for the file
REPLACE ALL THIS by reading the .log output of cavern for the file. But we need the lengths per Block, not by File. Hmm.
"""
invalid_clino = 180.0
invalid_compass = 720.0
@ -457,7 +460,7 @@ class LoadingSurvex():
print(("! Clino misread in", survexblock.survexfile.path))
print((" datastar:", datastar))
print((" Line:", ls))
message = ' ! Value Error: Clino misread in line %s in %s' % (ls, survexblock.survexfile.path)
message = f' ! Value Error: Clino misread in line \'{sline.lower()}\' {datastar=} {self.datastar=} {ls=} in\n{survexblock}\n{survexblock.survexfile}\n{survexblock.survexfile.path}'
DataIssue.objects.create(parser='survexleg', message=message, url=get_offending_filename(survexblock.survexfile.path))
lclino = invalid_clino
@ -668,17 +671,24 @@ class LoadingSurvex():
datastar["tape"] = i-1
self.datastar = copy.deepcopy(datastar)
return
elif ls[0] == "cartesian" or ls[0] == "nosurvey" or ls[0] == "diving" or ls[0] == "cylpolar" or ls[0] == "passage":
# message = " ! - *data {} blocks ignored. {}|{}" '{}' .format(ls[0].upper(), survexblock.name, survexblock.survexpath, args)
elif ls[0] == "passage" or ls[0] == "nosurvey" or ls[0] == "diving" or ls[0] == "cylpolar":
#message = " ! - *data {} blocks ignored. {}|{}" '{}' .format(ls[0].upper(), survexblock.name, survexblock.survexpath, args)
# print(message)
# print(message,file=sys.stderr)
# DataIssue.objects.create(parser='survex', message=message)
#print(message,file=sys.stderr)
#DataIssue.objects.create(parser='survex', message=message)
self.datastar["type"] = ls[0]
elif ls[0] == "cartesian": # We should not ignore this ?! Default for Germans ?
#message = " ! - *data {} blocks ignored. {}|{}" '{}' .format(ls[0].upper(), survexblock.name, survexblock.survexpath, args)
# print(message)
#print(message,file=sys.stderr)
#DataIssue.objects.create(parser='survex', message=message)
self.datastar["type"] = ls[0]
else:
message = " ! - Unrecognised *data statement '{}' {}|{}".format(args, survexblock.name, survexblock.survexpath)
print(message)
print(message,file=sys.stderr)
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path))
self.datastar["type"] = ls[0]
def LoadSurvexFlags(self, args):
# Valid flags are DUPLICATE, SPLAY, and SURFACE, and a flag may be preceded with NOT to turn it off.
@ -780,10 +790,10 @@ class LoadingSurvex():
# print(f'! ALREADY PENDING {caveid}',file=sys.stderr)
return
message = f" ! Error: {caveid} not a cave nor ignorable. headpath:'{headpath}' while parsing '{includelabel=}.svx' at depth:[{len(depth)}]. ignore prefix list:'{self.ignoreprefix}'"
message = f" ! Warning: cave identifier '{caveid}' (guessed from file path) is not a known cave. Need to add to expoweb/cave_data/pending.txt ? In '{includelabel}.svx' at depth:[{len(depth)}]."
print("\n"+message)
print("\n"+message,file=sys.stderr)
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(headpath))
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(includelabel))
print(f' # datastack in LoadSurvexFile:{includelabel} type:', end="",file=sys.stderr)
for dict in self.datastack:
print(f'<{dict["type"].upper()} >', end="",file=sys.stderr)
@ -1190,6 +1200,7 @@ class LoadingSurvex():
and reads only the *include and *begin and *end statements. It produces a linearised
list of the include tree and detects blocks included more than once.
"""
global stop_dup_warning
thissvxline = 0
indent = " " * self.depthinclude
sys.stderr.flush();
@ -1202,18 +1213,23 @@ class LoadingSurvex():
if path in self.svxfileslist:
# We have already used os.normpath() so this is OK. "/../" and "//" have been simplified already.
if stop_dup_warning:
#print("D",end="", file=sys.stderr)
pass
else:
message = f" * Warning. Duplicate detected. We have already seen this *include '{path}' from another survex file. Detected at callcount:{self.callcount} depth:{self.depthinclude}"
print(message)
print(message,file=flinear)
print("\n"+message,file=sys.stderr)
#print(message,file=sys.stderr)
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(path))
if self.svxfileslist.count(path) > 20:
message = " ! ERROR. Survex file already *included 20x. Probably an infinite loop so fix your *include statements that include this. Aborting. {}".format(path)
if self.svxfileslist.count(path) > 2:
message = " ! ERROR. Should have been caught before this. Survex file already *included 2x. Probably an infinite loop so fix your *include statements that include this. Aborting. {}".format(path)
print(message)
print(message,file=flinear)
print(message,file=sys.stderr)
#print(message,file=sys.stderr)
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(path))
return
return
self.svxfileslist.append(path)
try:
@ -1254,7 +1270,7 @@ class LoadingSurvex():
includepath = os.path.normpath(os.path.join(os.path.split(path)[0], re.sub(r"\.svx$", "", args)))
fullpath = os.path.join(settings.SURVEX_DATA, includepath + ".svx")
self.RunSurvexIfNeeded(os.path.join(settings.SURVEX_DATA, includepath))
self.RunSurvexIfNeeded(os.path.join(settings.SURVEX_DATA, includepath), path)
self.checkUniqueness(os.path.join(settings.SURVEX_DATA, includepath))
if os.path.isfile(fullpath):
#--------------------------------------------------------
@ -1280,7 +1296,7 @@ class LoadingSurvex():
self.depthinclude -= 1
#--------------------------------------------------------
else:
message = " ! ERROR *include file not found for:'{}'".format(includepath)
message = f" ! ERROR *include file '{includepath}' not found, listed in '{fin.name}'"
print(message)
print(message,file=sys.stderr)
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(path))
@ -1329,7 +1345,7 @@ class LoadingSurvex():
print(message)
def RunSurvexIfNeeded(self,fullpath):
def RunSurvexIfNeeded(self,fullpath, calledpath):
now = time.time()
cav_t = now - 365*24*3600
log_t = now - 365*24*3600
@ -1368,7 +1384,7 @@ class LoadingSurvex():
outputdir = Path(svxpath).parent
if not svxpath.is_file():
message = f' ! BAD survex file "{fullpath}" specified in *include (somewhere).. '
message = f' ! BAD survex file "{fullpath}" specified in *include in {calledpath} '
DataIssue.objects.create(parser='entrances', message=message)
print(message)
return
@ -1410,6 +1426,7 @@ class LoadingSurvex():
def FindAndLoadSurvex(survexblockroot):
"""Follows the *include links successively to find files in the whole include tree
"""
global stop_dup_warning
print(' - redirecting stdout to svxblks.log...')
stdout_orig = sys.stdout
# Redirect sys.stdout to the file
@ -1419,15 +1436,14 @@ def FindAndLoadSurvex(survexblockroot):
survexfileroot = survexblockroot.survexfile # i.e. SURVEX_TOPNAME only
collatefilename = "_" + survexfileroot.path + ".svx"
svx_scan = LoadingSurvex()
svx_scan.callcount = 0
svx_scan.depthinclude = 0
fullpathtotop = os.path.join(survexfileroot.survexdirectory.path, survexfileroot.path)
# Rather than do this check for the presence of the .log and .3d files synchronously here,
# we should instead run this in a separate thread asynchronously.
print(" - RunSurvexIfNeeded cavern on '{}'".format(fullpathtotop), file=sys.stderr)
svx_scan.RunSurvexIfNeeded(fullpathtotop)
svx_scan.RunSurvexIfNeeded(fullpathtotop, fullpathtotop)
svx_scan.checkUniqueness(fullpathtotop)
indent=""
@ -1447,6 +1463,7 @@ def FindAndLoadSurvex(survexblockroot):
from pstats import SortKey
pr = cProfile.Profile()
pr.enable()
#print(f"###{survexblockroot=} {survexfileroot.path=}",file=sys.stderr)
#----------------------------------------------------------------
svx_scan.PushdownStackScan(survexblockroot, survexfileroot.path, finroot, flinear, fcollate)
#----------------------------------------------------------------
@ -1460,42 +1477,108 @@ def FindAndLoadSurvex(survexblockroot):
fcollate.write(";*edulcni {}\n".format(survexfileroot.path))
mem1 = get_process_memory()
flinear.write("\n - MEM:{:.2f} MB STOP {}\n".format(mem1,survexfileroot.path))
flinear.write(" - MEM:{:.3f} MB USED\n".format(mem1-mem0))
svxfileslist = svx_scan.svxfileslist
flinear.write(" - {:,} survex files in linear include list \n".format(len(svxfileslist)))
flinear.close()
fcollate.close()
flinear.write(" - MEM:{:.3f} MB ADDITIONALLY USED\n".format(mem1-mem0))
flinear.write(" - {:,} survex files in linear include list \n".format(len(svx_scan.svxfileslist)))
print("\n - {:,} runs of survex 'cavern' refreshing .3d files \n".format(svx_scan.caverncount),file=sys.stderr)
svx_scan = None # Hmm. Does this actually delete all the instance variables if they are lists, dicts etc.?
print("\n - {:,} survex files in linear include list \n".format(len(svxfileslist)),file=sys.stderr)
print(" - {:,} runs of survex 'cavern' refreshing .3d files".format(svx_scan.caverncount),file=sys.stderr)
print(" - {:,} survex files from tree in linear include list".format(len(svx_scan.svxfileslist)),file=sys.stderr)
mem1 = get_process_memory()
print(" - MEM:{:7.2f} MB END ".format(mem0),file=sys.stderr)
print(" - MEM:{:7.3f} MB USED".format(mem1-mem0),file=sys.stderr)
print(" - MEM:{:7.2f} MB END ".format(mem1),file=sys.stderr)
print(" - MEM:{:7.3f} MB ADDITIONALLY USED".format(mem1-mem0),file=sys.stderr)
a = []
#
# Process all the omitted files in :loser: with some exceptions
#
unseens = set()
b=[]
for p in Path(settings.SURVEX_DATA).rglob('*.svx'):
if p.is_file():
po = p.relative_to(Path(settings.SURVEX_DATA))
pox = po.with_suffix('')
if str(pox) not in svxfileslist:
print(f"[{pox}]", file=sys.stderr)
a.append(pox)
if str(pox) not in svx_scan.svxfileslist:
# print(f"[{pox}]", file=sys.stderr)
unseens.add(pox)
else:
print("'", end=" ", file=sys.stderr)
b.append(pox)
print("=>", len(a), len(b), len(svxfileslist), file=sys.stderr)
if len(b) != len(svx_scan.svxfileslist):
print(f" ! Mismatch. {len(b)} survex files found which should be {len(svx_scan.svxfileslist)} in main tree)", file=sys.stderr)
excpts = ["surface/terrain", "kataster/kataster-boundaries", "template", "docs", "_unseens"]
removals = []
for x in unseens:
for o in excpts:
if str(x).strip().startswith(o):
removals.append(x)
for x in removals:
unseens.remove(x)
print(f" - {len(unseens)} survex files found which were not included in main tree. ({len(svx_scan.svxfileslist)} in main tree)", file=sys.stderr)
print(f" -- Now loading the previously-omitted survex files.", file=sys.stderr)
with open(Path(settings.SURVEX_DATA, '_unseens.svx'), 'w') as u:
u.write(f"; {len(unseens):,} survex files not *included by {settings.SURVEX_TOPNAME} (which are {len(svx_scan.svxfileslist):,} files)\n")
u.write(f"; autogenerated by parser/survex.py from databasereset.py on '{datetime.now(timezone.utc)}'\n")
u.write(f"; omitting any file beginning with {excpts}\n\n")
u.write(f"*begin unseens\n")
for x in sorted(unseens):
u.write(f" *include {x}\n")
u.write(f"*end unseens\n")
survexfileroot = survexblockroot.survexfile # i.e. SURVEX_TOPNAME only
omit_scan = LoadingSurvex()
omit_scan.callcount = 0
omit_scan.depthinclude = 0
fullpathtotop = os.path.join(survexfileroot.survexdirectory.path, '_unseens.svx')
# copy the list to prime the next pass through the files
omit_scan.svxfileslist = svx_scan.svxfileslist[:]
svx_scan.svxfileslist = [] # free memory
svx_scan = None # Hmm. Does this actually delete all the instance variables if they are lists, dicts etc.?
print(" - RunSurvexIfNeeded cavern on '{}'".format(fullpathtotop), file=sys.stderr)
omit_scan.RunSurvexIfNeeded(fullpathtotop, fullpathtotop)
omit_scan.checkUniqueness(fullpathtotop)
mem0 = get_process_memory()
print(" - MEM:{:7.2f} MB START '_unseens'".format(mem0),file=sys.stderr)
#flinear = open('svxlinear.log', 'w')
flinear.write(f" - MEM:{mem0:7.2f} MB START '_unseens'\n")
print(" ", file=sys.stderr,end='')
finroot = open(fullpathtotop)
fcollate.write(";*include {}\n".format('_unseens.svx'))
flinear.write("{:2} {} *include {}\n".format(omit_scan.depthinclude, indent, '_unseens'))
stop_dup_warning = True
#----------------------------------------------------------------
omit_scan.PushdownStackScan(survexblockroot, '_unseens', finroot, flinear, fcollate)
#----------------------------------------------------------------
stop_dup_warning = False
flinear.write("{:2} {} *edulcni {}\n".format(omit_scan.depthinclude, indent, '_unseens'))
fcollate.write(";*edulcni {}\n".format('_unseens.svx'))
mem1 = get_process_memory()
flinear.write("\n - MEM:{:.2f} MB STOP {} OMIT\n".format(mem1,'_unseens.svx'))
flinear.write(" - MEM:{:.3f} MB ADDITIONALLY USED OMIT\n".format(mem1-mem0))
flinear.write(" - {:,} survex files in linear include list OMIT \n".format(len(omit_scan.svxfileslist)))
flinear.close()
fcollate.close()
print("\n - {:,} runs of survex 'cavern' refreshing .3d files in the unseen list \n".format(omit_scan.caverncount),file=sys.stderr)
print("\n - {:,} survex files in linear include list including previously unseen ones \n".format(len(omit_scan.svxfileslist)),file=sys.stderr)
omit_scan = None # Hmm. Does this actually delete all the instance variables if they are lists, dicts etc.?
mem1 = get_process_memory()
print(" - MEM:{:7.2f} MB END ".format(mem1),file=sys.stderr)
print(" - MEM:{:7.3f} MB ADDITIONALLY USED".format(mem1-mem0),file=sys.stderr)
for i in [0,1,2,3,4,5]:
print(f"==> [{svxfileslist[i]}]", file=sys.stderr)
svxfileslist = [] # free memory
# Before doing this, it would be good to identify the *equate and *entrance we need that are relevant to the
# entrance locations currently loaded after this by LoadPos(), but could better be done before ?
@ -1503,6 +1586,8 @@ def FindAndLoadSurvex(survexblockroot):
print('\n - Loading All Survex Blocks (LinearLoad)',file=sys.stderr)
svx_load = LoadingSurvex()
mem1 = get_process_memory()
print(" - MEM:{:7.2f} MB after creating empty loading object.".format(mem1),file=sys.stderr)
svx_load.survexdict[survexfileroot.survexdirectory] = []
svx_load.survexdict[survexfileroot.survexdirectory].append(survexfileroot)
@ -1525,7 +1610,7 @@ def FindAndLoadSurvex(survexblockroot):
# ps.print_stats()
print("\n - MEM:{:7.2f} MB STOP".format(mem1),file=sys.stderr)
print(" - MEM:{:7.3f} MB USED".format(mem1-mem0),file=sys.stderr)
print(" - MEM:{:7.3f} MB ADDITIONALLY USED".format(mem1-mem0),file=sys.stderr)
# Close the logging file, Restore sys.stdout to our old saved file handle
sys.stdout.close()
@ -1536,12 +1621,12 @@ def FindAndLoadSurvex(survexblockroot):
legsnumber = svx_load.legsnumber
mem1 = get_process_memory()
print(" - Number of SurvexDirectories: {}".format(len(svx_load.survexdict)))
print(" - Number of SurvexDirectories: {:,}".format(len(svx_load.survexdict)))
tf=0
for d in svx_load.survexdict:
tf += len(svx_load.survexdict[d])
print(" - Number of SurvexFiles: {}".format(tf))
print(f" - Number of Survex legs: {legsnumber}")
print(f" - Number of SurvexFiles: {tf:,}")
print(f" - Number of Survex legs: {legsnumber:,}")
svx_load = None
return legsnumber
@ -1563,6 +1648,14 @@ def MakeSurvexFileRoot():
fileroot.save() # mutually dependent objects need a double-save like this
return fileroot
def MakeOmitFileRoot(fn):
"""Returns a file_object.path = _unseens.svx associated with directory_object.path = SURVEX_DATA
"""
fileroot = SurvexFile(path=fn, cave=None)
fileroot.survexdirectory = SurvexDirectory.objects.get(path=settings.SURVEX_DATA)
fileroot.save()
return fileroot
def LoadSurvexBlocks():
print(' - Flushing All Survex Blocks...')
@ -1588,10 +1681,13 @@ def LoadSurvexBlocks():
# fix by restarting db on server
# sudo service mariadb stop
# sudo service mariadb start
survexblockroot.save()
omitsfileroot = MakeOmitFileRoot("_unseens.svx")
survexomitsroot = SurvexBlock(name=OMITBLOCK, survexpath="", cave=None, survexfile=omitsfileroot,
legsall=0, legslength=0.0)
survexomitsroot.save()
print(' - Loading Survex Blocks...')
memstart = get_process_memory()
#----------------------------------------------------------------