2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2024-11-25 16:51:54 +00:00

rearrange ref and comment detection

This commit is contained in:
Philip Sargent 2020-06-24 22:46:18 +01:00
parent 664c18ebbe
commit 04f14c91f0

View File

@ -38,7 +38,6 @@ class LoadSurvex():
stardatadefault = {"type":"normal", "t":"leg", "from":0, "to":1, "tape":2, "compass":3, "clino":4}
stardataparamconvert = {"length":"tape", "bearing":"compass", "gradient":"clino"}
rx_braskets= re.compile(r"[()]")
rx_linelen = re.compile(r"[\d\-+.]+$")
rx_team = re.compile(r"(?i)(Insts|Notes|Tape|Dog|Useless|Pics|Helper|Disto|Consultant)\s+(.*)$")
rx_person = re.compile(r"(?i) and | / |, | & | \+ |^both$|^none$")
@ -46,21 +45,15 @@ class LoadSurvex():
# remember there is also QM_PATTERN used in views_other and set in settings.py
rx_comment = re.compile(r"([^;]*?)\s*(?:;\s*(.*))?\n?$")
rx_ref = re.compile(r'.*?ref.*?(\d+)\s*#\s*(X)?\s*(\d+)')
rx_ref = re.compile(r'^\s*ref[\s.:]*(\d+)\s*#\s*(X)?\s*(\d+)')
rx_star = re.compile(r'\s*\*[\s,]*(\w+)\s*(.*?)\s*(?:;.*)?$')
# years from 1960 to 2039
rx_starref = re.compile(r'(?i)^\s*\*ref[\s.:]*((?:19[6789]\d)|(?:20[0123]\d))\s*#?\s*(X)?\s*(.*?\d+.*?)$')
# rx_starref = re.compile("""?x # VERBOSE mode - can't get this to work
# ^\s*\*ref # look for *ref at start of line
# [\s.:]* # some spaces, stops or colons
# ((?:19[6789]\d)|(?:20[0123]\d)) # a date from 1960 to 2039 - captured as one field
# \s*# # spaces then hash separator
# ?\s*(X) # optional X - captured
# ?\s*(.*?\d+.*?) # maybe a space, then at least one digit in the string - captured
# $(?i)""", re.X) # the end (do the whole thing case insensitively)
rx_argsref = re.compile(r'(?i)^[\s.:]*((?:19[6789]\d)|(?:20[0123]\d))\s*#?\s*(X)?\s*(.*?\d+.*?)$')
survexlegsalllength = 0.0
survexlegsnumber = 0
depthbegin = 0
lineno = 0
insp = ""
callcount = 0
stardata ={}
@ -112,16 +105,21 @@ class LoadSurvex():
survexblock.expeditionday = survexblock.expedition.get_expedition_day(survexblock.date)
survexblock.save()
def LoadSurvexLineLeg(self, survexblock, stardata, sline, comment):
def LoadSurvexLineLeg(self, survexblock, svxline, sline, comment):
"""This reads compass, clino and tape data but only keeps the tape lengths,
the rest is discarded after error-checking.
"""
ls = sline.lower().split()
stardata = self.stardata
survexleg = SurvexLeg()
ls = sline.lower().split()
# this next fails for two surface survey svx files which use / for decimal point
# e.g. '29/09' in the tape measurement, or use decimals but in brackets, e.g. (06.05)
if stardata["type"] == "normal":
tape = self.rx_braskets.sub("",ls[stardata["tape"]])
if stardata["type"] == "normal": # should use current flags setting for this
# print(" !! lineno '{}'\n !! svxline '{}'\n !! sline '{}'\n !! ls '{}'\n !! stardata {}".format(self.lineno, svxline, sline, ls,stardata))
tape = ls[stardata["tape"]]
tape = tape.replace("(",".")
tape = tape.replace(")",".")
tape = tape.replace("/",".")
try:
survexleg.tape = float(tape)
@ -133,6 +131,13 @@ class LoadSurvex():
message = ' ! Value Error: Tape misread in line %s in %s' % (ls, survexblock.survexfile.path)
models.DataIssue.objects.create(parser='survex', message=message)
survexleg.tape = 0
try:
survexblock.totalleglength += survexleg.tape
self.survexlegsalllength += survexleg.tape
except ValueError:
message = ' ! Value Error: Tape length not added %s in %s' % (ls, survexblock.survexfile.path)
models.DataIssue.objects.create(parser='survex', message=message)
try:
lclino = ls[stardata["clino"]]
except:
@ -142,6 +147,7 @@ class LoadSurvex():
message = ' ! Value Error: Clino misread in line %s in %s' % (ls, survexblock.survexfile.path)
models.DataIssue.objects.create(parser='survex', message=message)
lclino = error
try:
lcompass = ls[stardata["compass"]]
except:
@ -151,6 +157,7 @@ class LoadSurvex():
message = ' ! Value Error: Compass misread in line %s in %s' % (ls, survexblock.survexfile.path)
models.DataIssue.objects.create(parser='survex', message=message)
lcompass = error
if lclino == "up":
survexleg.compass = 0.0
survexleg.clino = 90.0
@ -176,22 +183,23 @@ class LoadSurvex():
# delete the object so that django autosaving doesn't save it.
survexleg = None
itape = stardata.get("tape")
if itape:
try:
survexblock.totalleglength += float(ls[itape])
self.survexlegsalllength += float(ls[itape])
except ValueError:
print("! Length not added")
def LoadSurvexRef(self, survexblock, args):
# *REF but also ; Ref years from 1960 to 2039
if len(args)< 4:
message = " ! Empty or BAD *REF command '{}' at {}".format(args, survexblock.survexfile.path)
print((self.insp+message))
models.DataIssue.objects.create(parser='survex', message=message)
return
argsgps = self.rx_argsref.match(args)
if argsgps:
yr, letterx, wallet = argsgps.groups()
else:
message = " ! BAD *REF command '{}' at {}".format(args, survexblock.survexfile.path)
print((self.insp+message))
models.DataIssue.objects.create(parser='survex', message=message)
return
def LoadSurvexLinePassage(self, survexblock, stardata, sline, comment):
# do not import this: *data passage.. data which is LRUD not tape/compass/clino
pass
def LoadSurvexRef(self, survexblock, mstar):
# *REF but also ; Ref
yr,letterx,wallet = mstar.groups()
if not letterx:
letterx = ""
else:
@ -199,23 +207,32 @@ class LoadSurvex():
if len(wallet)<2:
wallet = "0" + wallet
assert (int(yr)>1960 and int(yr)<2039), "Wallet year out of bounds: %s" % yr
assert (int(wallet)<100), "Wallet number more than 100: %s" % wallet
refscan = "%s#%s%s" % (yr, letterx, wallet)
try:
if int(wallet)>100:
message = " ! Wallet *REF {} - too big {}".format(refscan, survexblock.survexfile.path)
print((self.insp+message))
models.DataIssue.objects.create(parser='survex', message=message)
except:
message = " ! Wallet *REF {} - not numeric {}".format(refscan, survexblock.survexfile.path)
print((self.insp+message))
models.DataIssue.objects.create(parser='survex', message=message)
manyscansfolders = models_survex.ScansFolder.objects.filter(walletname=refscan)
if manyscansfolders:
survexblock.scansfolder = manyscansfolders[0]
survexblock.save()
if len(manyscansfolders) > 1:
message = ' ! Wallet *REF {} - multiple scan folders found {}'.format(refscan, survexblock.survexfile.path)
message = " ! Wallet *REF {} - multiple scan folders found {}".format(refscan, survexblock.survexfile.path)
print((self.insp+message))
models.DataIssue.objects.create(parser='survex', message=message)
else:
message = ' ! Wallet *REF {} - NOT found in manyscansfolders {}'.format(refscan, survexblock.survexfile.path)
message = " ! Wallet *REF {} - NOT found in manyscansfolders {}".format(refscan, survexblock.survexfile.path)
print((self.insp+message))
models.DataIssue.objects.create(parser='survex', message=message)
def LoadSurvexQM(self, insp, survexblock, qmline):
def LoadSurvexQM(self, survexblock, qmline):
insp = self.insp
qm_no = qmline.group(1)
qm_grade = qmline.group(2)
qm_from_section = qmline.group(3)
@ -261,28 +278,33 @@ class LoadSurvex():
models.DataIssue.objects.create(parser='survex', message=message)
pass
def LoadSurvexComment(self, survexblock, comment):
# ignore all comments except ;ref and ;QM
refline = self.rx_ref.match(comment)
if refline:
comment = comment.replace("ref","").strip()
self.LoadSurvexRef(survexblock, comment)
def RecursiveLoad(self,survexblock, survexfile, fin):
"""Follows the *include links in all the survex files from the root file 1623.svx
and reads in the survex blocks, other data and the wallet references (scansfolder) as it
goes. This part of the data import process is where the maximum memory is used and where it
crashes on memory-constrained machines. Begin-end blocks may also be nested.
"""
iblankbegins = 0
stardata = self.stardatadefault
insp =self.insp
qmline = self.rx_qm.match(comment)
if qmline:
self.LoadSurvexQM(survexblock, qmline)
def LoadSurvexFlags(self, line, cmd):
# Here we could set on/off 'splay', 'not splay', 'surface', 'not surface', or 'duplicate'
# but this data is only used for sense-checking not to actually calculate anything important
pass
def LoadSurvexSetup(self,survexblock, survexfile):
self.depthbegin = 0
self.stardata = self.stardatadefault
blocklegs = self.survexlegsnumber
print(insp+" - MEM:{:.3f} Reading. parent:{} <> {} ".format(models.get_process_memory(),survexblock.survexfile.path,survexfile.path))
stamp = datetime.now()
lineno = 0
print(self.insp+" - MEM:{:.3f} Reading. parent:{} <> {} ".format(models.get_process_memory(),survexblock.survexfile.path, survexfile.path))
self.lineno = 0
sys.stderr.flush();
self.callcount +=1
if self.callcount >=10:
self.callcount=0
print(".", file=sys.stderr,end='')
# Try to find the cave in the DB if not use the string as before
path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", survexblock.survexfile.path)
if path_match:
@ -290,150 +312,144 @@ class LoadSurvex():
cave = models_caves.getCaveByReference(pos_cave)
if cave:
survexfile.cave = cave
svxlines = ''
def RecursiveLoad(self, survexblock, survexfile, fin):
"""Follows the *include links in all the survex files from the root file 1623.svx
and reads in the survex blocks, other data and the wallet references (scansfolder) as it
goes. This part of the data import process is where the maximum memory is used and where it
crashes on memory-constrained machines. Begin-end blocks may also be nested.
"""
self.LoadSurvexSetup(survexblock, survexfile)
insp =self.insp
previousnlegs = 0
svxlines = fin.read().splitlines()
# cannot close file now as it may be recursively called with the same file id fin if nested *begin
# occurs.
# cannot close file now as may be recursively called with the same fin if nested *begin-end
for svxline in svxlines:
lineno += 1
# break the line at the comment
self.lineno += 1
sline, comment = self.rx_comment.match(svxline.strip()).groups()
mref = comment and self.rx_ref.match(comment)
if mref:
self.LoadSurvexRef(survexblock, mref)
qmline = comment and self.rx_qm.match(comment)
if qmline:
self.LoadSurvexQM(insp, survexblock, qmline)
if comment:
self.LoadSurvexComment(survexblock, comment)
if not sline:
continue
# detect the star ref command
rstar = self.rx_starref.match(sline)
if rstar:
self.LoadSurvexRef(survexblock, rstar)
continue # skip blank lines
# detect the star command
mstar = self.rx_star.match(sline)
if not mstar:
if "from" in stardata:
self.LoadSurvexLineLeg(survexblock, stardata, sline, comment)
pass
elif stardata["type"] == "passage":
pass
#self.LoadSurvexLinePassage(survexblock, stardata, sline, comment)
#Missing "station" in stardata.
continue
# detect the star command
cmd, line = mstar.groups()
cmd = cmd.lower()
if re.match("include$(?i)", cmd):
includepath = os.path.normpath(os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", line)))
print((insp+' - Include path found, including - ' + includepath))
# Try to find the cave in the DB. if not, use the string as before
path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", includepath)
if path_match:
pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2))
print((insp+' - Match in DB (i) for cave {}.'.format(pos_cave)))
cave = models_caves.getCaveByReference(pos_cave)
if cave:
survexfile.cave = cave
else:
print((insp+' - NO Match in DB (i) for a cave for {}'.format(includepath)))
includesurvexfile = models_survex.SurvexFile(path=includepath)
includesurvexfile.save()
includesurvexfile.SetDirectory()
if includesurvexfile.exists():
survexblock.save()
fininclude = includesurvexfile.OpenFile()
self.survexlegsnumber = blocklegs
self.insp += "> "
self.RecursiveLoad(survexblock, includesurvexfile, fininclude)
#--------------------------------------------------------
fininclude.close()
self.insp = self.insp[2:]
insp = self.insp
blocklegs = self.survexlegsnumber
else:
print((insp+' ! ERROR *include file not found for %s' % includesurvexfile))
elif re.match("begin$(?i)", cmd):
# On a *begin statement we start a new survexblock.
# There should not be any *include inside a begin-end block, so this is a simple
# load not a recursive fileload. But there may be many blocks nested to any depth in one file.
if line:
newsvxpath = os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", line))
# Try to find the cave in the DB if not use the string as before
path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", newsvxpath)
if mstar: # yes we are reading a *cmd
cmd, args = mstar.groups()
cmd = cmd.lower()
if re.match("include$(?i)", cmd):
includepath = os.path.normpath(os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", args)))
print((insp+' - INCLUDE-go path found, including - ' + includepath))
# Try to find the cave in the DB. if not, use the string as before
path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", includepath)
if path_match:
pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2))
# print(insp+pos_cave)
print((insp+' - Match in DB (i) for cave {}.'.format(pos_cave)))
cave = models_caves.getCaveByReference(pos_cave)
if cave:
survexfile.cave = cave
else:
print((insp+' - No match (b) for %s' % newsvxpath))
print((insp+' - NO Match in DB (i) for a cave for {}'.format(includepath)))
previousnlegs = blocklegs
name = line.lower()
print(insp+' - Begin found for:{}, creating new SurvexBlock '.format(name))
survexblockdown = models_survex.SurvexBlock(name=name, parent=survexblock,
survexpath=survexblock.survexpath+"."+name,
cave=survexfile.cave, survexfile=survexfile,
legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0)
survexblockdown.save()
survexblock.save()
survexblock = survexblockdown
print(insp+" - ENTERING nested *begin/*end block: {}".format(name))
self.survexlegsnumber = blocklegs
self.insp += "> "
self.RecursiveLoad(survexblockdown, survexfile, fin)
#--------------------------------------------------------
# do not close the file as there may be more blocks in this one
# and it is re-read afresh with every nested begin-end block.
self.insp = self.insp[2:]
insp = self.insp
blocklegs = self.survexlegsnumber
else:
iblankbegins += 1
includesurvexfile = models_survex.SurvexFile(path=includepath)
includesurvexfile.save()
includesurvexfile.SetDirectory()
if includesurvexfile.exists():
survexblock.save()
self.insp += "> "
#--------------------------------------------------------
fininclude = includesurvexfile.OpenFile()
self.RecursiveLoad(survexblock, includesurvexfile, fininclude)
fininclude.close()
#--------------------------------------------------------
self.insp = self.insp[2:]
insp = self.insp
print((insp+' - INCLUDE-return from include - ' + includepath))
else:
print((insp+' ! ERROR *include file not found for %s' % includesurvexfile))
elif re.match("end$(?i)", cmd):
if iblankbegins:
print(insp+" - RETURNING from nested *begin/*end block: {}".format(line))
iblankbegins -= 1
elif re.match("begin$(?i)", cmd):
# On a *begin statement we start a new survexblock.
# There should not be any *include inside a begin-end block, so this is a simple
# load not a recursive fileload. But there may be many blocks nested to any depth in one file.
if args:
newsvxpath = os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", args))
# Try to find the cave in the DB if not use the string as before
path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", newsvxpath)
if path_match:
pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2))
# print(insp+pos_cave)
cave = models_caves.getCaveByReference(pos_cave)
if cave:
survexfile.cave = cave
else:
print((insp+' - No match (b) for %s' % newsvxpath))
previousnlegs = self.survexlegsnumber
name = args.lower()
print(insp+' - Begin found for:{}, creating new SurvexBlock '.format(name))
# the recursive call re-reads the entire file. This is wasteful. We should pass in only
# the un-parsed part of the file.
survexblockdown = models_survex.SurvexBlock(name=name, parent=survexblock,
survexpath=survexblock.survexpath+"."+name,
cave=survexfile.cave, survexfile=survexfile,
legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0)
survexblockdown.save()
survexblock.save()
survexblock = survexblockdown
print(insp+" - BLOCK-enter nested *begin/*end block: '{}'".format(name))
self.insp += "> "
#--------------------------------------------------------
self.RecursiveLoad(survexblockdown, survexfile, fin)
#--------------------------------------------------------
# do not close the file as there may be more blocks in this one
# and it is re-read afresh with every nested begin-end block.
self.insp = self.insp[2:]
insp = self.insp
else:
self.depthbegin += 1
elif re.match("end$(?i)", cmd):
if self.depthbegin:
print(insp+" - BLOCK-return from nested *begin/*end block: '{}'".format(args))
self.depthbegin -= 1
else:
legsinblock = self.survexlegsnumber - previousnlegs
print(insp+" - LEGS: {} (previous: {}, now:{})".format(legsinblock,previousnlegs,self.survexlegsnumber))
survexblock.legsall = legsinblock
survexblock.save()
return
elif cmd == "ref":
self.LoadSurvexRef(survexblock, args)
elif cmd == "flags":
self.LoadSurvexFlags(args, cmd)
elif cmd == "data":
ls = args.lower().split()
stardata = { "type":ls[0] }
for i in range(0, len(ls)):
stardata[self.stardataparamconvert.get(ls[i], ls[i])] = i - 1
self.stardata = stardata
if ls[0] in ["normal", "cartesian", "nosurvey"]:
assert (("from" in stardata and "to" in stardata) or "station" in stardata), args
elif ls[0] == "default":
stardata = self.stardatadefault
else:
assert ls[0] == "passage", args
elif cmd == "set" and re.match("names(?i)", args):
pass
elif re.match("date$(?i)", cmd):
self.LoadSurvexDate(survexblock, args)
elif re.match("team$(?i)", cmd):
self.LoadSurvexTeam(survexblock, args)
else:
legsinblock = self.survexlegsnumber - previousnlegs
print(insp+" - LEGS: {} (previous: {}, now:{})".format(legsinblock,previousnlegs,self.survexlegsnumber))
survexblock.legsall = legsinblock
survexblock.save()
return
elif cmd == "flags":
# Here we could set on/off 'splay', 'not splay', 'surface', 'not surface', or 'duplicate'
# but this data is only used for sense-checking not to actually calculate anything important
pass
elif cmd == "data":
ls = line.lower().split()
stardata = { "type":ls[0] }
for i in range(0, len(ls)):
stardata[self.stardataparamconvert.get(ls[i], ls[i])] = i - 1
self.stardata = stardata
if ls[0] in ["normal", "cartesian", "nosurvey"]:
assert (("from" in stardata and "to" in stardata) or "station" in stardata), line
elif ls[0] == "default":
stardata = self.stardatadefault
self.LoadSurvexIgnore(survexblock, args, cmd)
else: # not a *cmd so we are reading data OR rx_comment failed
if "from" in self.stardata: # only interested in survey legs
self.LoadSurvexLineLeg(survexblock, svxline, sline, comment)
else:
assert ls[0] == "passage", line
elif cmd == "set" and re.match("names(?i)", line):
pass
elif re.match("date$(?i)", cmd):
self.LoadSurvexDate(survexblock, line)
elif re.match("team$(?i)", cmd):
self.LoadSurvexTeam(survexblock, line)
else:
self.LoadSurvexIgnore(survexblock, line, cmd)
pass # ignore all other sorts of data
def FindAndLoadAllSurvex(survexblockroot, survexfileroot):