Working. More fault checking.

This commit is contained in:
Philip Sargent 2020-06-27 17:55:59 +01:00
parent c55716df08
commit 4716eaa4b6
2 changed files with 266 additions and 55 deletions

View File

@ -35,6 +35,8 @@ class SurvexFile(models.Model):
def SetDirectory(self):
dirpath = os.path.split(self.path)[0]
# pointless search every time we import a survex file if we know there are no duplicates..
# don't use this for initial import.
survexdirectorylist = SurvexDirectory.objects.filter(cave=self.cave, path=dirpath)
if survexdirectorylist:
self.survexdirectory = survexdirectorylist[0]

View File

@ -41,26 +41,35 @@ class LoadSurvex():
rx_linelen = re.compile(r"[\d\-+.]+$")
rx_team = re.compile(r"(?i)(Insts|Notes|Tape|Dog|Useless|Pics|Helper|Disto|Consultant)\s+(.*)$")
rx_person = re.compile(r"(?i) and | / |, | & | \+ |^both$|^none$")
rx_qm = re.compile(r'^\s*QM(\d)\s+?([a-dA-DxX])\s+([\w\-]+)\.(\d+)\s+(([\w\-]+)\.(\d+)|\-)\s+(.+)$')
rx_qm = re.compile(r'(?i)^\s*QM(\d)\s+?([a-dA-DxX])\s+([\w\-]+)\.(\d+)\s+(([\w\-]+)\.(\d+)|\-)\s+(.+)$')
# remember there is also QM_PATTERN used in views_other and set in settings.py
rx_comment = re.compile(r"([^;]*?)\s*(?:;\s*(.*))?\n?$")
rx_ref = re.compile(r'^\s*ref[\s.:]*(\d+)\s*#\s*(X)?\s*(\d+)')
rx_star = re.compile(r'\s*\*[\s,]*(\w+)\s*(.*?)\s*(?:;.*)?$')
rx_cave = re.compile(r'caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/')
rx_comment = re.compile(r'([^;]*?)\s*(?:;\s*(.*))?\n?$')
rx_comminc = re.compile(r'(?i)^\s*;\*include[\s](.*)$') # inserted by linear collate ;*include
rx_commcni = re.compile(r'(?i)^\s*;\*edulcni[\s](.*)$') # inserted by linear collate ;*edulcni
rx_include = re.compile(r'(?i)^\s*(\*include[\s].*)$')
rx_ref = re.compile(r'(?i)^\s*ref[\s.:]*(\d+)\s*#\s*(X)?\s*(\d+)')
rx_star = re.compile(r'(?i)\s*\*[\s,]*(\w+)\s*(.*?)\s*(?:;.*)?$')
rx_starref = re.compile(r'(?i)^\s*\*ref[\s.:]*((?:19[6789]\d)|(?:20[0123]\d))\s*#?\s*(X)?\s*(.*?\d+.*?)$')
rx_argsref = re.compile(r'(?i)^[\s.:]*((?:19[6789]\d)|(?:20[0123]\d))\s*#?\s*(X)?\s*(.*?\d+.*?)$')
survexlegsalllength = 0.0
survexlegsnumber = 0
depthbegin = 0
depthimport = 0
depthinclude = 0
stackbegin =[]
stackimport = []
stackinclude = []
svxfileslist =[]
lineno = 0
insp = ""
callcount = 0
stardata ={}
includedfilename =""
currenttitle =""
currentsurvexblock = None
currentsurvexfile = None
currentcave = None
def __init__(self):
pass
@ -234,7 +243,6 @@ class LoadSurvex():
print((self.insp+message))
models.DataIssue.objects.create(parser='survex', message=message)
def LoadSurvexQM(self, survexblock, qmline):
insp = self.insp
qm_no = qmline.group(1)
@ -274,9 +282,65 @@ class LoadSurvex():
print(insp+message)
models.DataIssue.objects.create(parser='survex', message=message)
def LoadSurvexDataCmd(survexblock,args):
ls = args.lower().split()
stardata = { "type":ls[0] }
for i in range(0, len(ls)):
stardata[self.stardataparamconvert.get(ls[i], ls[i])] = i - 1
self.stardata = stardata
if ls[0] in ["normal", "cartesian", "nosurvey"]:
assert (("from" in stardata and "to" in stardata) or "station" in stardata), args
elif ls[0] == "default":
stardata = self.stardatadefault
else:
assert ls[0] == "passage", args
def LoadSurvexFlags(self, line, cmd):
# Here we could set on/off 'splay', 'not splay', 'surface', 'not surface', or 'duplicate'
# but this data is only used for sense-checking not to actually calculate anything important
pass
def IdentifyCave(self, cavepath):
path = os.path.join(os.path.split(cavepath)[0], re.sub(r"\.svx$", "", cavepath))
path_match = self.rx_cave.search(path)
print(' - Attempting cave match for %s' % path)
if path_match:
pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2))
# print(insp+pos_cave)
cave = models_caves.getCaveByReference(pos_cave)
if cave:
survexfile.cave = cave
print(' - Cave matched for %s' % path)
return cave
else:
print(' ! No cave match for %s' % path)
return None
def LoadSurvexFileBlock(self, survexblock, includelabel):
"""Creates SurvexDirectory and SurvexFile in the database
with links to 'cave'
Creates a new current survexblock with valid .survexfile and valid .survexdirectory
"""
cave = self.IdentifyCave(self, includelabel)
survexdirectory = SurvexDirectory(path=dirpath, cave=cave, primarysurvexfile=self)
survexdirectory.save()
newsurvexfile = models_survex.SurvexFile(path=includelabel)
newsurvexfile.survexdirectory = survexdirectory
newsurvexfile.save()
name = includelabel
newsurvexblock = models_survex.SurvexBlock(name=name, parent=survexblock,
survexpath=survexblock.survexpath+"."+name,
cave=survexfile.cave, survexfile=newsurvexfile,
legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0)
newsurvexblock.save
self.currentsurvexfile = newsurvexfile
self.currentsurvexblock = newsurvexblock
def LoadSurvexComment(self, survexblock, comment):
# ignore all comments except ;ref and ;QM
# ignore all comments except ;ref and ;QM and ;*include (for collated survex file)
refline = self.rx_ref.match(comment)
if refline:
comment = comment.replace("ref","").strip()
@ -285,11 +349,17 @@ class LoadSurvex():
qmline = self.rx_qm.match(comment)
if qmline:
self.LoadSurvexQM(survexblock, qmline)
included = self.rx_comminc.match(comment)
# ;*include means we have been included; not 'proceed to include' which *include means
if included:
self.LoadSurvexFileBlock(survexblock, included)
def LoadSurvexFlags(self, line, cmd):
# Here we could set on/off 'splay', 'not splay', 'surface', 'not surface', or 'duplicate'
# but this data is only used for sense-checking not to actually calculate anything important
pass
edulcni = self.rx_commcni.match(comment)
# ;*include means we have been included; not 'proceed to include' which *include means
if edulcni:
currentsurvexblock = currentsurvexblock.parent
currentsurvexfile = currentsurvexblock.parent.survexfile
def LoadSurvexSetup(self,survexblock, survexfile):
self.depthbegin = 0
@ -311,12 +381,10 @@ class LoadSurvex():
if cave:
survexfile.cave = cave
def RecursiveLoad(self, survexblock, survexfile, fin):
def RecursiveRecursiveLoad(self, survexblock, survexfile, fin):
"""Follows the *include links in all the survex files from the root file 1623.svx
and reads in the survex blocks, other data and the wallet references (scansfolder) as it
goes. This part of the data import process is where the maximum memory is used and where it
goes. This part of the data include process is where the maximum memory is used and where it
crashes on memory-constrained machines. Begin-end blocks may also be nested.
"""
self.LoadSurvexSetup(survexblock, survexfile)
@ -361,7 +429,7 @@ class LoadSurvex():
self.insp += "> "
#--------------------------------------------------------
fininclude = includesurvexfile.OpenFile()
self.RecursiveLoad(survexblock, includesurvexfile, fininclude)
self.RecursiveRecursiveLoad(survexblock, includesurvexfile, fininclude)
fininclude.close()
#--------------------------------------------------------
self.insp = self.insp[2:]
@ -402,7 +470,7 @@ class LoadSurvex():
print(insp+" - BLOCK-enter nested *begin/*end block: '{}'".format(name))
self.insp += "> "
#--------------------------------------------------------
self.RecursiveLoad(survexblockdown, survexfile, fin)
self.RecursiveRecursiveLoad(survexblockdown, survexfile, fin)
#--------------------------------------------------------
# do not close the file as there may be more blocks in this one
# and it is re-read afresh with every nested begin-end block.
@ -451,12 +519,99 @@ class LoadSurvex():
else:
pass # ignore all other sorts of data
def RecursiveScan(self, survexblock, survexfile, fin, flinear):
"""Follows the *include links in all the survex files from the root file 1623.svx
and reads only the *import and *begin and *end statements. It produces a linearised
list of the import tree
def LinearRecursiveLoad(self, survexblock, path, fin, skipto):
"""Loads a single survex file. Usually used to import all the survex files which have been collated
into a single file. Loads the begin/end blocks recursively.
"""
indent = " " * self.depthimport
self.relativefilename = path
cave = self.IdentifyCave(path) # this will produce null for survex files which are geographic collections
svxlines = fin.read().splitlines()
for svxline in svxlines:
self.lineno += 1
if self.lineno < skipto:
continue # skip through file to the place we got up to
sline, comment = self.rx_comment.match(svxline.strip()).groups()
if comment:
self.LoadSurvexComment(survexblock, comment)
if not sline:
continue # skip blank lines
# detect a star command
mstar = self.rx_star.match(sline)
if mstar: # yes we are reading a *cmd
cmd, args = mstar.groups()
cmd = cmd.lower()
if re.match("begin$(?i)", cmd):
self.depthbegin += 1
if args:
depth = " " * self.depthbegin
self.stackbegin.append(args.lower())
previousnlegs = self.survexlegsnumber
name = args.lower()
print(' - Begin found for:{}, creating new SurvexBlock '.format(name))
survexblockdown = models_survex.SurvexBlock(name=name, parent=survexblock,
survexpath=survexblock.survexpath+"."+name,
cave=self.currentcave, survexfile=self.currentsurvexfile,
legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0)
survexblockdown.save()
survexblock.save()
survexblock = survexblockdown
else:
self.depthbegin += 1
elif re.match("end$(?i)", cmd):
# haven#t really thought this through..
if survexblock:
self.currentsurvexblock = survexblock.parent
self.currentsurvexfile = survexblock.parent.survexfile
if self.depthbegin:
print(" - End -return from nested *begin/*end block: '{}'".format(args))
self.depthbegin -= 1
else:
legsinblock = self.survexlegsnumber - previousnlegs
print(" - LEGS: {} (previous: {}, now:{})".format(legsinblock,previousnlegs,self.survexlegsnumber))
survexblock.legsall = legsinblock
survexblock.save()
return
elif re.match("title$(?i)", cmd):
self.currenttitle = args
elif cmd == "ref":
self.LoadSurvexRef(survexblock, args)
elif cmd == "flags":
self.LoadSurvexFlags(args, cmd)
elif cmd == "data":
self.LoadSurvexDataCmd(survexblock, args)
elif re.match("date$(?i)", cmd):
self.LoadSurvexDate(survexblock, args)
elif re.match("team$(?i)", cmd):
self.LoadSurvexTeam(survexblock, args)
elif cmd == "set" and re.match("names(?i)", args):
pass
elif re.match("include$(?i)", cmd):
message = " ! -ERROR *include command not expected here {}. Re-run a full Survex import.".format(path)
print(message)
print(message,file=sys.stderr)
models.DataIssue.objects.create(parser='survex', message=message)
else:
self.LoadSurvexIgnore(survexblock, args, cmd)
else: # not a *cmd so we are reading data OR rx_comment failed
if "from" in self.stardata: # only interested in survey legs
self.LoadSurvexLineLeg(survexblock, svxline, sline, comment)
else:
pass # ignore all other sorts of data
def RecursiveScan(self, survexblock, survexfile, fin, flinear, fcollate):
"""Follows the *include links in all the survex files from the root file 1623.svx
and reads only the *include and *begin and *end statements. It produces a linearised
list of the include tree
"""
indent = " " * self.depthinclude
sys.stderr.flush();
self.callcount +=1
if self.callcount % 10 ==0 :
@ -464,11 +619,28 @@ class LoadSurvex():
if self.callcount % 500 ==0 :
print("\n", file=sys.stderr,end='')
if survexfile in self.svxfileslist:
message = " * Warning. Survex file already seen: {}".format(survexfile.path)
print(message)
print(message,file=flinear)
print(message,file=sys.stderr)
models.DataIssue.objects.create(parser='survex', message=message)
if self.svxfileslist.count(survexfile) > 20:
message = " ! ERROR. Survex file already seen 20x. Probably an infinite loop so fix your *include statements that include this. Aborting. {}".format(survexfile.path)
print(message)
print(message,file=flinear)
print(message,file=sys.stderr)
models.DataIssue.objects.create(parser='survex', message=message)
return
self.svxfileslist.append(survexfile)
svxlines = fin.read().splitlines()
for svxline in svxlines:
self.lineno += 1
includestmt =self.rx_include.match(svxline)
if not includestmt:
fcollate.write("{}\n".format(svxline))
sline, comment = self.rx_comment.match(svxline.strip()).groups()
mstar = self.rx_star.match(sline)
if mstar: # yes we are reading a *cmd
@ -481,24 +653,32 @@ class LoadSurvex():
includesurvexfile = models_survex.SurvexFile(path=includepath)
if includesurvexfile.exists():
# do not create SurvexFile in DB here by doing includesurvexfile.save(). Do it when reading data.
#--------------------------------------------------------
self.depthimport += 1
self.depthinclude += 1
fininclude = includesurvexfile.OpenFile()
flinear.write("{:2} {} *import {}\n".format(self.depthimport, indent, includesurvexfile.path))
fcollate.write(";*include {}\n".format(includesurvexfile.path))
flinear.write("{:2} {} *include {}\n".format(self.depthinclude, indent, includesurvexfile.path))
push = includesurvexfile.path.lower()
self.stackimport.append(push)
self.RecursiveScan(survexblock, includesurvexfile, fininclude, flinear)
pop = self.stackimport.pop()
self.stackinclude.append(push)
self.RecursiveScan(survexblock, includesurvexfile, fininclude, flinear, fcollate)
pop = self.stackinclude.pop()
if pop != push:
print("!!!!!!! ERROR pop != push {} != {} {}".format(pop, push, self.stackimport))
print("!!!!!!! ERROR pop != push {} != {} {}\n".format(pop, push, self.stackimport),file=flinear)
print("!!!!!!! ERROR pop != push {} != {} {}".format(pop, push, self.stackimport),file=sys.stderr)
flinear.write("{:2} {} *tropmi {}\n".format(self.depthimport, indent, includesurvexfile.path))
message = "!!!!!!! ERROR pop != push {} != {} {}".format(pop, push, self.stackinclude)
print(message)
print(message,file=flinear)
print(message,file=sys.stderr)
models.DataIssue.objects.create(parser='survex', message=message)
flinear.write("{:2} {} *edulcni {}\n".format(self.depthinclude, indent, includesurvexfile.path))
fcollate.write(";*edulcni {}\n".format(includesurvexfile.path))
fininclude.close()
self.depthimport -= 1
self.depthinclude -= 1
#--------------------------------------------------------
else:
print(" ! ERROR *include file not found for {}".format(includesurvexfile))
message = " ! ERROR *include file not found for {}".format(includesurvexfile)
print(message)
print(message,file=sys.stderr)
models.DataIssue.objects.create(parser='survex', message=message)
elif re.match("begin$(?i)", cmd):
self.depthbegin += 1
depth = " " * self.depthbegin
@ -516,14 +696,17 @@ class LoadSurvex():
args = " "
popargs = self.stackbegin.pop()
if popargs != args.lower():
print("!!!!!!! ERROR BEGIN/END pop != push {} != {}\n{}".format(popargs, args, self. stackbegin))
print("!!!!!!! ERROR BEGIN/END pop != push {} != {}\n{}\n".format(popargs, args, self. stackbegin), file=flinear)
print(" !!!!!!! ERROR BEGIN/END pop != push {} != {}\n{}".format(popargs, args,self. stackbegin), file=sys.stderr,)
message = "!!!!!!! ERROR BEGIN/END pop != push {} != {}\n{}".format(popargs, args, self. stackbegin)
print(message)
print(message,file=flinear)
print(message,file=sys.stderr)
models.DataIssue.objects.create(parser='survex', message=message)
self.depthbegin -= 1
pass
def FindAndLoadAllSurvex(survexblockroot):
"""Follows the *include links recursively to find files
"""
@ -535,43 +718,69 @@ def FindAndLoadAllSurvex(survexblockroot):
print(' - SCANNING All Survex Blocks...',file=sys.stderr)
survexfileroot = survexblockroot.survexfile
svxl0 = LoadSurvex()
svxl0.callcount = 0
svxl0.depthimport = 0
collatefilename = "_" + survexfileroot.path + ".svx"
svx_scan = LoadSurvex()
svx_scan.callcount = 0
svx_scan.depthinclude = 0
indent=""
fcollate = open(collatefilename, 'w')
mem0 = models.get_process_memory()
print(" - MEM:{:7.2f} MB START".format(mem0),file=sys.stderr)
flinear = open('svxlinear.log', 'w')
flinear.write(" - MEM:{:.2f} MB START {}\n".format(mem0,survexfileroot.path))
flinear.write(" - MEM:{:7.2f} MB START {}\n".format(mem0,survexfileroot.path))
finroot = survexfileroot.OpenFile()
flinear.write("{:2} {} *import {}\n".format(svxl0.depthimport, indent, survexfileroot.path))
svxl0.RecursiveScan(survexblockroot, survexfileroot, finroot, flinear)
flinear.write("{:2} {} *tropmi {}\n".format(svxl0.depthimport, indent, survexfileroot.path))
fcollate.write(";*include {}\n".format(survexfileroot.path))
flinear.write("{:2} {} *include {}\n".format(svx_scan.depthinclude, indent, survexfileroot.path))
svx_scan.RecursiveScan(survexblockroot, survexfileroot, finroot, flinear, fcollate)
flinear.write("{:2} {} *edulcni {}\n".format(svx_scan.depthinclude, indent, survexfileroot.path))
fcollate.write(";*edulcni {}\n".format(survexfileroot.path))
mem1 = models.get_process_memory()
flinear.write(" - MEM:{:.2f} MB STOP {}\n".format(mem1,survexfileroot.path))
flinear.write(" - MEM:{:.3f} MB USED\n".format(mem1-mem0))
svxfileslist = svxl0.svxfileslist
flinear.write(" - {:,} survex files in linear import list \n".format(len(svxfileslist)))
svxfileslist = svx_scan.svxfileslist
flinear.write(" - {:,} survex files in linear include list \n".format(len(svxfileslist)))
flinear.close()
svxl0 = None
print("\n - {:,} survex files in linear import list \n".format(len(svxfileslist)),file=sys.stderr)
fcollate.close()
svx_scan = None
print("\n - {:,} survex files in linear include list \n".format(len(svxfileslist)),file=sys.stderr)
# INSERT IN HERE linear, not recursive, wrt import loading of all the data using [svxfileslist] #
for f in svxfileslist:
# Load legs etc. recursive only in BEGIN / END
pass
mem1 = models.get_process_memory()
print(" - MEM:{:7.2f} MB END ".format(mem0),file=sys.stderr)
print(" - MEM:{:7.3f} MB USED".format(mem1-mem0),file=sys.stderr)
svxfileslist = [] # free memory
# Before doing this, it would be good to identify the *equate and *entrance we need that are relevant to the
# entrance locations currently loaded after this by LoadPos(), but could better be done before ?
# look in MapLocations() for how we find the entrances
print('\n - Loading All Survex Blocks...',file=sys.stderr)
svx_load = LoadSurvex()
with open(collatefilename, "r") as fcollate:
#svx_load.LinearRecursiveLoad(survexblockroot,survexfileroot.path,fcollate, 0)
pass
print(" - MEM:{:7.2f} MB STOP".format(mem1),file=sys.stderr)
print(" - MEM:{:7.3f} MB USED".format(mem1-mem0),file=sys.stderr)
survexlegsnumber = svx_load.survexlegsnumber
survexlegsalllength = svx_load.survexlegsalllength
mem1 = models.get_process_memory()
svx_load = None
print('\n - Loading All Survex Blocks...',file=sys.stderr)
svxlrl = LoadSurvex()
finroot = survexfileroot.OpenFile()
svxlrl.RecursiveLoad(survexblockroot, survexfileroot, finroot)
svxlrl.RecursiveRecursiveLoad(survexblockroot, survexfileroot, finroot)
finroot.close()
survexlegsnumber = svxlrl.survexlegsnumber
survexlegsalllength = svxlrl.survexlegsalllength
svxlrl = None
# Close the logging file, Restore sys.stdout to our old saved file handle
sys.stdout.close()
print("+", file=sys.stderr)