2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2024-11-22 07:11:52 +00:00

Survex editor now parses edited files

This commit is contained in:
Philip Sargent 2023-03-23 19:05:25 +00:00
parent 562ef48f19
commit 770edd6391
4 changed files with 167 additions and 125 deletions

View File

@ -27,8 +27,11 @@ class SurvexDirectory(models.Model):
ordering = ("id",)
verbose_name_plural = "Survex directories"
def contents(self):
return "[SvxDir:" + str(self.path) + " | Primary svx:" + str(self.primarysurvexfile.path) + ".svx ]"
def __str__(self):
return "[SurvexDirectory:" + str(self.path) + " | Primary svx:" + str(self.primarysurvexfile.path) + ".svx ]"
return "[SvxDir:" + str(self.path)+ "]"
class SurvexFile(models.Model):
@ -44,13 +47,11 @@ class SurvexFile(models.Model):
# return "[SurvexFile:"+str(self.path) + "-" + str(self.survexdirectory) + "-" + str(self.cave)+"]"
def exists(self):
"""This is only used within the Django templates
"""
fname = Path(settings.SURVEX_DATA, self.path + ".svx")
return fname.is_file()
def OpenFile(self):
fname = os.path.join(settings.SURVEX_DATA, self.path + ".svx")
return open(fname)
def SetDirectory(self):
dirpath = os.path.split(self.path)[0]
# pointless search every time we import a survex file if we know there are no duplicates..
@ -64,6 +65,10 @@ class SurvexFile(models.Model):
self.survexdirectory = survexdirectory
self.save()
# Don't change from the default as that breaks troggle webpages and internal referencing!
# def __str__(self):
# return "[SurvexFile:"+str(self.path) + "-" + str(self.survexdirectory) + "-" + str(self.cave)+"]"
def __str__(self):
return self.path

View File

@ -40,9 +40,14 @@ even though there are dozens of surveys.
- Save and re-parse an edited survexfile which already exists in the db, and update
all its dependencies (work in progress)
- overlapping and cross-calling when things fail make this hard to undersand, e.g. svx() and
survexcavessingle() can get called for a survex file depending on whether the URL ends in ".svx" or not,
but each tries to handle the other case too.
"""
survexdatasetpath = Path(settings.SURVEX_DATA)
SVXPATH = Path(settings.SURVEX_DATA)
# NB this template text must be identical to that in :loser:/templates/template.svx
survextemplatefile = """; *** THIS IS A TEMPLATE FILE NOT WHAT YOU MIGHT BE EXPECTING ***
@ -160,7 +165,7 @@ class SvxForm(forms.Form):
template = False
def GetDiscCode(self):
fname = survexdatasetpath / (self.data["filename"] + ".svx")
fname = SVXPATH / (self.data["filename"] + ".svx")
if not fname.is_file():
print(">>> >>> WARNING - svx file not found, showing TEMPLATE SVX", fname, flush=True)
self.template = True
@ -186,7 +191,7 @@ class SvxForm(forms.Form):
return difflist
def SaveCode(self, rcode):
fname = survexdatasetpath / (self.data["filename"] + ".svx")
fname = SVXPATH / (self.data["filename"] + ".svx")
if not fname.is_file():
if re.search(r"\[|\]", rcode):
errmsg = "Error: remove all []s from the text.\nEverything inside [] are only template guidance.\n\n"
@ -203,7 +208,7 @@ class SvxForm(forms.Form):
fout = open(fname, "w", encoding="utf8", newline="\n")
except FileNotFoundError:
pth = os.path.dirname(self.data["filename"])
newpath = survexdatasetpath / pth
newpath = SVXPATH / pth
if not os.path.exists(newpath):
os.makedirs(newpath)
fout = open(fname, "w", encoding="utf8", newline="\n")
@ -232,8 +237,8 @@ class SvxForm(forms.Form):
def Process(self):
print(">>>>....\n....Processing\n")
froox = os.fspath(survexdatasetpath / (self.data["filename"] + ".svx"))
froog = os.fspath(survexdatasetpath / (self.data["filename"] + ".log"))
froox = os.fspath(SVXPATH / (self.data["filename"] + ".svx"))
froog = os.fspath(SVXPATH / (self.data["filename"] + ".log"))
cwd = os.getcwd()
os.chdir(os.path.split(froox)[0])
os.system(settings.CAVERN + " --log " + froox)
@ -248,7 +253,7 @@ class SvxForm(forms.Form):
# print(message)
# print(f'stderr:\n\n' + str(sp.stderr) + '\n\n' + str(sp.stdout) + '\n\nreturn code: ' + str(sp.returncode))
filepatherr = Path(survexdatasetpath / str(self.data["filename"] + ".err"))
filepatherr = Path(SVXPATH / str(self.data["filename"] + ".err"))
if filepatherr.is_file():
if filepatherr.stat().st_size == 0:
filepatherr.unlink() # delete empty closure error file
@ -281,11 +286,14 @@ def svx(request, survex_file):
also has no difflist.
Needs refactoring. Too many piecemeal edits and odd state dependencies.
On Get does the SAME THING as svxcavesingle but is called when the .svx suffix is MISSING
"""
warning = False
print(survex_file)
if survex_file.lower().endswith(".svx"):
#cope with ".svx.svx" bollox
survex_file = survex_file[:-4]
print(survex_file)
@ -361,7 +369,7 @@ def svx(request, survex_file):
# collect all the survex blocks which actually have a valid date
if svxfile:
has_3d = (Path(survexdatasetpath) / Path(survex_file + ".3d")).is_file()
has_3d = (Path(SVXPATH) / Path(survex_file + ".3d")).is_file()
try:
svxblocks = svxfile.survexblock_set.filter(date__isnull=False).order_by('date')
except:
@ -438,9 +446,9 @@ def events_on_dates(svxblocks):
# The cavern running function. This is NOT where it is run inside the form! see SvxForm.Process() for that
def process(survex_file):
"""This runs cavern only where a .3d, .log or .err file is requested."""
filepathsvx = survexdatasetpath / str(survex_file + ".svx")
filepathsvx = SVXPATH / str(survex_file + ".svx")
cwd = os.getcwd()
os.chdir(os.path.split(os.fspath(survexdatasetpath / survex_file))[0])
os.chdir(os.path.split(os.fspath(SVXPATH / survex_file))[0])
os.system(settings.CAVERN + " --log " + str(filepathsvx))
os.chdir(cwd)
@ -453,27 +461,27 @@ def process(survex_file):
# print(message)
# print(f'stderr:\n\n' + str(sp.stderr) + '\n\n' + str(sp.stdout) + '\n\nreturn code: ' + str(sp.returncode))
filepatherr = Path(survexdatasetpath / str(survex_file + ".err"))
filepatherr = Path(SVXPATH / str(survex_file + ".err"))
if filepatherr.is_file():
if filepatherr.stat().st_size == 0:
filepatherr.unlink() # delete empty closure error file
def threed(request, survex_file):
filepath3d = survexdatasetpath / str(survex_file + ".3d")
survexdatasetpath / str(survex_file + ".log")
filepath3d = SVXPATH / str(survex_file + ".3d")
SVXPATH / str(survex_file + ".log")
if filepath3d.is_file():
threed = open(filepath3d, "rb")
return HttpResponse(threed, content_type="application/x-aven")
else:
process(survex_file) # should not need to do this if it already exists, as it should.
log = open(survexdatasetpath / str(survex_file + ".log"), "r", encoding="utf-8")
log = open(SVXPATH / str(survex_file + ".log"), "r", encoding="utf-8")
return HttpResponse(log, content_type="text")
def svxlog(request, survex_file):
"""Used for rendering .log files from survex outputtype"""
filepathlog = survexdatasetpath / str(survex_file + ".log")
filepathlog = SVXPATH / str(survex_file + ".log")
if not filepathlog.is_file():
process(survex_file)
log = open(filepathlog, "r")
@ -481,7 +489,7 @@ def svxlog(request, survex_file):
def err(request, survex_file):
filepatherr = survexdatasetpath / str(survex_file + ".err")
filepatherr = SVXPATH / str(survex_file + ".err")
if not filepatherr.is_file(): # probably not there because it was empty, but re-run anyway
process(survex_file)
process(survex_file)
@ -547,7 +555,7 @@ def identifycavedircontents(gcavedir):
def get_survexareapath(area):
return survexdatasetpath / str("caves-" + area)
return SVXPATH / str("caves-" + area)
# direct local non-database browsing through the svx file repositories
@ -653,7 +661,7 @@ def survexcavesingle(request, survex_cave):
# maybe - and _ mixed up, or CUCC-2017- instead of 2017-CUCC-, or CUCC2015DL01 . Let's not get carried away..
# or it might be an exact search for a specific survefile but just missing the '.svx.
if (Path(survexdatasetpath) / Path(survex_cave + ".svx")).is_file():
if (SVXPATH / Path(survex_cave + ".svx")).is_file():
return svx(request, survex_cave)
for unoff in [sc, sc.replace("-", "_"), sc.replace("_", "-"), sc.replace("-", ""), sc.replace("_", "")]:

View File

@ -24,8 +24,6 @@ Parses and imports logbooks in all their wonderful confusion
todo = """
- Most of the time is during the database writing (6s out of 8s).
- this is a slow and uncertain function too: cave = getCaveByReference(caveRef)
- profile the code to find bad repetitive things, of which there are many.
- attach or link a DataIssue to an individual expo (logbook) so that it can be found and deleted

View File

@ -8,13 +8,13 @@ from datetime import datetime, timezone
from pathlib import Path
import troggle.settings as settings
from troggle.core.models.caves import Cave, Entrance
from troggle.core.models.caves import Cave, Entrance, GetCaveLookup
from troggle.core.models.logbooks import QM
from troggle.core.models.survex import SurvexBlock, SurvexDirectory, SurvexFile, SurvexPersonRole, SurvexStation
from troggle.core.models.wallets import Wallet
from troggle.core.models.troggle import DataIssue, Expedition
from troggle.core.utils import chaosmonkey, get_process_memory
from troggle.parsers.logbooks import GetCaveLookup
#from troggle.parsers.logbooks import GetCaveLookup
from troggle.parsers.people import GetPersonExpeditionNameLookup, known_foreigner
"""Imports the tree of survex files following from a defined root .svx file
@ -64,6 +64,28 @@ class SurvexLeg:
compass = 0.0
clino = 0.0
def IdentifyCave(cavepath):
"""Given a file path for a survex file, or a survex-block path,
return the cave object
"""
caveslist = GetCaveLookup()
if cavepath.lower() in caveslist:
return caveslist[cavepath.lower()]
# TO DO - this predates the big revision to Gcavelookup so look at this again carefully
path_match = LoadingSurvex.rx_cave.search(cavepath) # use as Class method
if path_match:
sluggy = f"{path_match.group(1)}-{path_match.group(2)}"
guesses = [sluggy.lower(), path_match.group(2).lower()]
for g in guesses:
if g in caveslist:
caveslist[cavepath] = caveslist[g]
return caveslist[g]
print(f" ! Failed to find cave for {cavepath.lower()}")
else:
# not a cave, but that is fine.
# print(f' ! No regex(standard identifier) cave match for {cavepath.lower()}')
return None
def datewallet(w, earliest):
"""Gets the date of the youngest survexblock associated with the wallet
REFACTOR this to do the whole date-getting task
@ -1093,23 +1115,6 @@ class LoadingSurvex:
f" $ flagslist:{flags}",
)
def IdentifyCave(self, cavepath):
if cavepath.lower() in self.caveslist:
return self.caveslist[cavepath.lower()]
# TO DO - this predates the big revision to Gcavelookup so look at this again carefully
path_match = self.rx_cave.search(cavepath)
if path_match:
sluggy = f"{path_match.group(1)}-{path_match.group(2)}"
guesses = [sluggy.lower(), path_match.group(2).lower()]
for g in guesses:
if g in self.caveslist:
self.caveslist[cavepath] = self.caveslist[g]
return self.caveslist[g]
print(f" ! Failed to find cave for {cavepath.lower()}")
else:
# not a cave, but that is fine.
# print(f' ! No regex(standard identifier) cave match for {cavepath.lower()}')
return None
def GetSurvexDirectory(self, headpath):
"""This creates a SurvexDirectory if it has not been seen before, and on creation
@ -1214,7 +1219,7 @@ class LoadingSurvex:
newdirectory.save()
newfile.survexdirectory = newdirectory
self.survexdict[newdirectory].append(newfile)
cave = self.IdentifyCave(headpath) # cave already exists in db
cave = IdentifyCave(headpath) # cave already exists in db
if not newdirectory:
message = f" ! 'None' SurvexDirectory returned from GetSurvexDirectory({headpath})"
@ -1459,27 +1464,41 @@ class LoadingSurvex:
if edulcni:
self.ProcessEdulcniLine(edulcni)
def LoadSurvexSetup(self, survexblock, survexfile):
self.depthbegin = 0
self.datastar = self.datastardefault
print(
self.insp
+ f" - MEM:{get_process_memory():.3f} Reading. parent:{survexblock.survexfile.path} <> {survexfile.path} "
)
self.lineno = 0
sys.stderr.flush()
self.callcount += 1
if self.callcount % 10 == 0:
print(".", file=sys.stderr, end="")
if self.callcount % 500 == 0:
print("\n", file=sys.stderr, end="")
# Try to find the cave in the DB if not use the string as before
path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", survexblock.survexfile.path)
def get_cave(self, path):
"""Read the file path to a survex file and guesses the cave
"""
path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", path)
if path_match:
pos_cave = f"{path_match.group(1)}-{path_match.group(2)}"
cave = getCaveByReference(pos_cave)
if cave:
survexfile.cave = cave
return cave
return None
# def LoadSurvexSetup(self, survexblock, survexfile):
# """REFACTOR to use get_cave()
# This does not seem to get run at all ?!
# """
# self.depthbegin = 0
# self.datastar = self.datastardefault
# print(
# self.insp
# + f" - MEM:{get_process_memory():.3f} Reading. parent:{survexblock.survexfile.path} <> {survexfile.path} "
# )
# self.lineno = 0
# sys.stderr.flush()
# self.callcount += 1
# if self.callcount % 10 == 0:
# print(".", file=sys.stderr, end="")
# if self.callcount % 500 == 0:
# print("\n", file=sys.stderr, end="")
# # Try to find the cave in the DB if not use the string as before
# path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", survexblock.survexfile.path)
# if path_match:
# pos_cave = f"{path_match.group(1)}-{path_match.group(2)}"
# cave = getCaveByReference(pos_cave)
# if cave:
# survexfile.cave = cave
def LinearLoad(self, survexblock, path, collatefilename):
"""Loads a single survex file. Usually used to import all the survex files which have been collated
@ -1497,7 +1516,7 @@ class LoadingSurvex:
slengthtotal = 0.0
nlegstotal = 0
self.relativefilename = path
self.IdentifyCave(path) # this will produce null for survex files which are geographic collections
IdentifyCave(path) # this will produce null for survex files which are geographic collections
self.currentsurvexfile = survexblock.survexfile
self.currentsurvexfile.save() # django insists on this although it is already saved !?
@ -1637,6 +1656,7 @@ class LoadingSurvex:
legslength=0.0,
)
newsurvexblock.save()
print(f"SB: #{newsurvexblock.id} '{newsurvexblock}' parent:{newsurvexblock.parent} f:{newsurvexblock.survexfile}")
newsurvexblock.title = (
"(" + survexblock.title + ")"
) # copy parent inititally, overwrite if it has its own
@ -2306,43 +2326,48 @@ def parse_one_file(fpath): # --------------------------------------in progress--
In the initial file parsing in databaseReset, the *include expansion is done
in an earlier stange than LinearLoad(). By the time LinearLoad() is called,
all the *include expansion has happened.
WORK IN PROGRESS.
Works fine for completely new survex file.
For an edited, pre-existing survex file,
I am having great trouble getting the 'parent' block to work correctly.
It gets overwritten, and then nullified, on repeated SAVE & import.
I should learn how to step through with the debugger.
"""
def parse_new_svx(fpath, blockroot=None, svxfileroot=None):
def parse_new_svx(fpath, svx_load, cave, svxfileroot=None):
"""We need a dummy survex block which has the survexfile being parsed
as its .survexfile field. But it is used in two ways, it is also
set as the parent block for the new blocks being created. This has to be fixed
later.
This all needs refactoring.
"""
if svxfileroot == None:
svxfileroot = MakeFileRoot(fpath)
svxfileroot = MakeFileRoot(fpath, cave)
svxfileroot.save()
if blockroot == None:
newname = "adhoc_" + str(Path(str(svxfileroot)).name)
survexblockparent = SurvexBlock(
name=newname, survexpath="", survexfile=svxfileroot, legsall=0, legslength=0.0
)
survexblockparent.save()
blockroot = survexblockparent
# It is vital that the block has attached the survexfile object which is being parsed.
block_dummy = SurvexBlock(
name="dummy", survexpath="", survexfile=svxfileroot, legsall=0, legslength=0.0
)
svxfileroot.save()
block_dummy.save()
newname = f"#{block_dummy.id}_" + str(Path(str(svxfileroot)).name)
block_dummy.name = newname
block_dummy.save()
print(f" - block_dummy now '{block_dummy}' {type(block_dummy)} id={block_dummy.id} f:{block_dummy.survexfile}")
svx_load.survexdict[svxfileroot.survexdirectory] = []
svx_load.survexdict[svxfileroot.survexdirectory].append(svxfileroot)
svx_load.svxdirs[""] = svxfileroot.survexdirectory
# ----------------------------------------------------------------
svx_load.LinearLoad(blockroot, svxfileroot.path, fname)
svx_load.LinearLoad(block_dummy, svxfileroot.path, fname)
# ----------------------------------------------------------------
# Now we don't need or want the dummy any more
block_dummy.delete()
global svx_load
print(f"\n - Loading One Survex file '{fpath}'", file=sys.stderr)
svx_load = LoadingSurvex()
svx_load.survexdict = {}
fname = Path(settings.SURVEX_DATA, (fpath + ".svx"))
# print(f" - {fname=}")
svxs = SurvexFile.objects.filter(path=fpath)
if svxs:
@ -2351,67 +2376,70 @@ def parse_one_file(fpath): # --------------------------------------in progress--
print(f" - Aborting file parsing & import into database.")
return False
print(f" - Pre-existing survexfile {svxs}.")
# reparse_existing_svx(svxs)
existingsvx = SurvexFile.objects.get(path=fpath)
existingcave = existingsvx.cave
print(f" - survexfile is {existingsvx} id={existingsvx.id} {existingcave}")
print(f" - survexfile id={existingsvx.id} {existingsvx} {existingcave}")
sbs = existingsvx.survexblock_set.all()
existingparent = None
parents =set()
if sbs:
for sb in sbs:
print(f" - cleaning survex block {sb=}")
# print(f" - {sb.id} checking survex block {sb=}")
try:
if sb.parent:
parents.add(sb.parent)
# print(f" - adding {sb.parent=}")
except:
print(f" ! FAILURE to access sb.parent {sb=}")
sb.delete()
print(f" ! FAILURE to access sb.parent {sb=}\n ! {sb.parent_id=} ")# \n{dir(sb)}
# even though the parent_id exists.. hmm.
for sb in sbs:
# print(f" - {sb.id} {sb.pk} {sb}")
sb_keep = sb
if sb not in parents:
# print(f" - {sb.id} Deleting survex block {sb=}")
sb.delete()
if parents:
print(f" - set of parent blocks {parents}")
# print(f" - parents get {parents}")
if len(parents) > 1:
print(f" - WARNING more than one parent survex block!")
existingparent = parents.pop()
existingparent = parents.pop() # removes it
parents.add(existingparent) # restores it
# print(f" - deleting survex file {existingsvx=}")
# existingsvx.delete()
print(f" - Reloading and parsing this survexfile '{fpath}' Loading...")
print(f" - Reloading and parsing this survexfile '{fpath}' Loading...")
# Logic is that we need an SB which links to the survexfile we are parsing for the parser
# to work, but we delete all those before we start parsing. Urk.
#===========
parse_new_svx(fpath, svx_load, existingsvx, svxfileroot=existingsvx)
#===========
parse_new_svx(fpath, blockroot=existingparent, svxfileroot=existingsvx)
svxs = SurvexFile.objects.filter(path=fpath)
if len(svxs)>1:
print(f" ! Mistake? More than one survex file object in database with the same file-path {svxs}")
print(f" - Aborting file parsing & import into database.")
return False
replacesvx = SurvexFile.objects.get(path=fpath)
replacesvx.cave = existingcave
print(f" - new/replacement survexfile {svxs}. id={replacesvx.id}")
replacesvx.save()
print(f" - survexfile id={existingsvx.id} update ")
if parents:
sbs = replacesvx.survexblock_set.all()
print(f" - parents set {parents}")
sbs = existingsvx.survexblock_set.all()
if len(sbs)<1:
print(f" ! No survex blocks found. Parser failure...")
for sb in sbs:
print(f" - re-setting survex block parent{sb=}")
print(f" - {sb.id} re-setting survex block parent{sb=}")
sb.parent = existingparent # should be all the same
sb.save()
else:
print(f" - Not seen this survexfile before '{fpath}' Loading...")
parse_new_svx(fpath)
print(f" - Not seen this survexfile before '{fpath}' Loading. ..")
#===========
parse_new_svx(fpath,svx_load, IdentifyCave(fpath))
#===========
legsnumber = svx_load.legsnumber
print(f" - Number of SurvexDirectories: {len(svx_load.survexdict):,}")
print(f" - SurvexDirectories: {svx_load.survexdict}")
tf = 0
for d in svx_load.survexdict:
tf += len(svx_load.survexdict[d])
print(f" - Number of SurvexFiles: {tf:,}")
print(f" - Number of Survex legs: {legsnumber:,}")
print(f" - Length of Survex legs: {svx_load.slength:.2f} m")
# print(f" - Number of SurvexDirectories: {len(svx_load.survexdict):,}")
# tf = 0
# for d in svx_load.survexdict:
# print(f" - SD: {d}")
# tf += len(svx_load.survexdict[d])
# print(f" - Number of SurvexFiles: {tf:,}")
# print(f" - Number of Survex legs: {svx_load.legsnumber:,}")
# print(f" - Length of Survex legs: {svx_load.slength:.2f} m")
svx_load = None
return True
@ -2432,15 +2460,18 @@ def MakeSurvexFileRoot():
return fileroot
def MakeFileRoot(fn):
def MakeFileRoot(fn, cave):
"""Returns a file_object.path = _unseens.svx associated with directory_object.path = SURVEX_DATA
CHANGE THIS to just use the same block root as for SURVEX_TOPNAME ?
"""
print(f" - making a new root survexfile for this import: {fn}")
fileroot = SurvexFile(path=fn, cave=None)
print(f" - Making a new root survexfile for this import: {fn}")
fileroot = SurvexFile(path=fn, cave=cave)
fileroot.survexdirectory = SurvexDirectory.objects.get(id=1) # just re-use the first thing we made
fileroot.save()
cave = IdentifyCave(fn)
fileroot.cave = cave
print(f" - new fileroot {type(fileroot)} for {fn} with cave {cave}")
return fileroot
@ -2490,7 +2521,7 @@ def LoadSurvexBlocks():
# sudo service mariadb start
survexblockroot.save()
omitsfileroot = MakeFileRoot(UNSEENS)
omitsfileroot = MakeFileRoot(UNSEENS, None)
survexomitsroot = SurvexBlock(
name=OMITBLOCK, survexpath="", survexfile=omitsfileroot, legsall=0, legslength=0.0
)