forked from expo/troggle
rename SurvexScansFolders and tidy survex parser
This commit is contained in:
@@ -2,6 +2,8 @@ import sys
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import resource
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from subprocess import call, Popen, PIPE
|
||||
|
||||
@@ -29,14 +31,12 @@ ROOTBLOCK = "rootblock"
|
||||
|
||||
|
||||
def LoadSurvexLineLeg(survexblock, stardata, sline, comment, cave):
|
||||
"""This reads compass, clino and tape data but only keeps the tape lengths,
|
||||
the rest is discarded after error-checking.
|
||||
"""
|
||||
global survexlegsalllength
|
||||
global survexlegsnumber
|
||||
# The try catches here need replacing as they are relatively expensive
|
||||
ls = sline.lower().split()
|
||||
#ssfrom = survexblock.MakeSurvexStation(ls[stardata["from"]])
|
||||
#ssto = survexblock.MakeSurvexStation(ls[stardata["to"]])
|
||||
|
||||
# survexleg = models_survex.SurvexLeg(block=survexblock, stationfrom=ssfrom, stationto=ssto)
|
||||
survexleg = models_survex.SurvexLeg()
|
||||
# this next fails for two surface survey svx files which use / for decimal point
|
||||
# e.g. '29/09' in the tape measurement, or use decimals but in brackets, e.g. (06.05)
|
||||
@@ -97,11 +97,9 @@ def LoadSurvexLineLeg(survexblock, stardata, sline, comment, cave):
|
||||
if cave:
|
||||
survexleg.cave = cave
|
||||
|
||||
# only save proper legs
|
||||
# No need to save as we are measuring lengths only on parsing now.
|
||||
# delete the object so that django autosaving doesn't save it.
|
||||
survexleg = None
|
||||
#survexleg.save()
|
||||
|
||||
itape = stardata.get("tape")
|
||||
if itape:
|
||||
@@ -111,7 +109,6 @@ def LoadSurvexLineLeg(survexblock, stardata, sline, comment, cave):
|
||||
except ValueError:
|
||||
print("! Length not added")
|
||||
# No need to save as we are measuring lengths only on parsing now.
|
||||
#survexblock.save()
|
||||
|
||||
|
||||
def LoadSurvexEquate(survexblock, sline):
|
||||
@@ -146,12 +143,12 @@ rx_starref = re.compile(r'(?i)^\s*\*ref[\s.:]*((?:19[6789]\d)|(?:20[0123]\d))\s*
|
||||
# $(?i)""", re.X) # the end (do the whole thing case insensitively)
|
||||
|
||||
rx_team = re.compile(r"(?i)(Insts|Notes|Tape|Dog|Useless|Pics|Helper|Disto|Consultant)\s+(.*)$")
|
||||
rx_team_member = re.compile(r"(?i) and | / |, | & | \+ |^both$|^none$")
|
||||
rx_person = re.compile(r"(?i) and | / |, | & | \+ |^both$|^none$")
|
||||
rx_qm = re.compile(r'^\s*QM(\d)\s+?([a-dA-DxX])\s+([\w\-]+)\.(\d+)\s+(([\w\-]+)\.(\d+)|\-)\s+(.+)$')
|
||||
|
||||
insp = ""
|
||||
callcount = 0
|
||||
def RecursiveLoad(survexblock, survexfile, fin, textlines):
|
||||
def RecursiveLoad(survexblock, survexfile, fin):
|
||||
"""Follows the *include links in all the survex files from the root file 1623.svx
|
||||
and reads in the survex blocks, other data and the wallet references (survexscansfolder) as it
|
||||
goes. This part of the data import process is where the maximum memory is used and where it
|
||||
@@ -165,7 +162,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
|
||||
global callcount
|
||||
global survexlegsnumber
|
||||
|
||||
print(insp+" - Reading file: " + survexblock.survexfile.path + " <> " + survexfile.path)
|
||||
print(insp+" - MEM:{} Reading. parent:{} <> {} ".format(get_process_memory(),survexblock.survexfile.path,survexfile.path))
|
||||
stamp = datetime.now()
|
||||
lineno = 0
|
||||
|
||||
@@ -199,12 +196,12 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
|
||||
if len(wallet)<2:
|
||||
wallet = "0" + wallet
|
||||
refscan = "%s#%s%s" % (yr, letterx, wallet )
|
||||
survexscansfolders = models_survex.SurvexScansFolder.objects.filter(walletname=refscan)
|
||||
if survexscansfolders:
|
||||
survexblock.survexscansfolder = survexscansfolders[0]
|
||||
manyscansfolders = models_survex.ScansFolder.objects.filter(walletname=refscan)
|
||||
if manyscansfolders:
|
||||
survexblock.survexscansfolder = manyscansfolders[0]
|
||||
survexblock.save()
|
||||
else:
|
||||
message = ' ! Wallet ; ref {} - NOT found in survexscansfolders {}'.format(refscan, survexblock.survexfile.path)
|
||||
message = ' ! Wallet ; ref {} - NOT found in manyscansfolders {}'.format(refscan, survexblock.survexfile.path)
|
||||
print((insp+message))
|
||||
models.DataIssue.objects.create(parser='survex', message=message)
|
||||
|
||||
@@ -265,12 +262,12 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
|
||||
assert (int(yr)>1960 and int(yr)<2039), "Wallet year out of bounds: %s" % yr
|
||||
assert (int(wallet)<100), "Wallet number more than 100: %s" % wallet
|
||||
refscan = "%s#%s%s" % (yr, letterx, wallet)
|
||||
survexscansfolders = models_survex.SurvexScansFolder.objects.filter(walletname=refscan)
|
||||
if survexscansfolders:
|
||||
survexblock.survexscansfolder = survexscansfolders[0]
|
||||
manyscansfolders = models_survex.ScansFolder.objects.filter(walletname=refscan)
|
||||
if manyscansfolders:
|
||||
survexblock.survexscansfolder = manyscansfolders[0]
|
||||
survexblock.save()
|
||||
else:
|
||||
message = ' ! Wallet *REF {} - NOT found in survexscansfolders {}'.format(refscan, survexblock.survexfile.path)
|
||||
message = ' ! Wallet *REF {} - NOT found in manyscansfolders {}'.format(refscan, survexblock.survexfile.path)
|
||||
print((insp+message))
|
||||
models.DataIssue.objects.create(parser='survex', message=message)
|
||||
continue
|
||||
@@ -291,17 +288,18 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
|
||||
cmd = cmd.lower()
|
||||
if re.match("include$(?i)", cmd):
|
||||
includepath = os.path.normpath(os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", line)))
|
||||
print((insp+' - Include path found including - ' + includepath))
|
||||
# Try to find the cave in the DB if not use the string as before
|
||||
print((insp+' - Include path found, including - ' + includepath))
|
||||
# Try to find the cave in the DB. if not, use the string as before
|
||||
path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", includepath)
|
||||
if path_match:
|
||||
pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2))
|
||||
# print(insp+pos_cave)
|
||||
print((insp+' - Match in DB (i) for cave {}.'.format(pos_cave)))
|
||||
cave = models_caves.getCaveByReference(pos_cave)
|
||||
if cave:
|
||||
survexfile.cave = cave
|
||||
else:
|
||||
print((insp+' - No match in DB (i) for %s, so loading..' % includepath))
|
||||
print((insp+' - NO Match in DB (i) for a cave for {}'.format(includepath)))
|
||||
|
||||
includesurvexfile = models_survex.SurvexFile(path=includepath)
|
||||
includesurvexfile.save()
|
||||
includesurvexfile.SetDirectory()
|
||||
@@ -309,10 +307,17 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
|
||||
survexblock.save()
|
||||
fininclude = includesurvexfile.OpenFile()
|
||||
insp += "> "
|
||||
RecursiveLoad(survexblock, includesurvexfile, fininclude, textlines)
|
||||
RecursiveLoad(survexblock, includesurvexfile, fininclude)
|
||||
#--------------------------------------------------------
|
||||
fininclude.close()
|
||||
insp = insp[2:]
|
||||
else:
|
||||
print((insp+' ! ERROR *include file not found for %s' % includesurvexfile))
|
||||
|
||||
elif re.match("begin$(?i)", cmd):
|
||||
# On a *begin statement we start a new survexblock.
|
||||
# There should not be any *include inside a begin-end block, so this is a simple
|
||||
# load not a recursive load. But there may be many blocks in one file.
|
||||
if line:
|
||||
newsvxpath = os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", line))
|
||||
# Try to find the cave in the DB if not use the string as before
|
||||
@@ -328,16 +333,16 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
|
||||
|
||||
previousnlegs = survexlegsnumber
|
||||
name = line.lower()
|
||||
print((insp+' - Begin found for: ' + name))
|
||||
# survexblockdown = models_survex.SurvexBlock(name=name, begin_char=fin.tell(), parent=survexblock, survexpath=survexblock.survexpath+"."+name, cave=survexfile.cave, survexfile=survexfile, totalleglength=0.0)
|
||||
print(insp+' - Begin found for:{}, creating new SurvexBlock '.format(name))
|
||||
survexblockdown = models_survex.SurvexBlock(name=name, parent=survexblock, survexpath=survexblock.survexpath+"."+name,
|
||||
cave=survexfile.cave, survexfile=survexfile, legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0)
|
||||
survexblockdown.save()
|
||||
survexblock.save()
|
||||
survexblock = survexblockdown
|
||||
textlinesdown = [ ]
|
||||
insp += "> "
|
||||
RecursiveLoad(survexblockdown, survexfile, fin, textlinesdown)
|
||||
RecursiveLoad(survexblockdown, survexfile, fin)
|
||||
#--------------------------------------------------------
|
||||
# do not close the file as there may be more blocks in this one
|
||||
insp = insp[2:]
|
||||
else:
|
||||
iblankbegins += 1
|
||||
@@ -346,7 +351,6 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
|
||||
if iblankbegins:
|
||||
iblankbegins -= 1
|
||||
else:
|
||||
# .text not used, using it for number of legs per block
|
||||
legsinblock = survexlegsnumber - previousnlegs
|
||||
print(insp+"LEGS: {} (previous: {}, now:{})".format(legsinblock,previousnlegs,survexlegsnumber))
|
||||
survexblock.legsall = legsinblock
|
||||
@@ -370,7 +374,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
|
||||
# print(insp+' - Team found: ')
|
||||
mteammember = rx_team.match(line)
|
||||
if mteammember:
|
||||
for tm in rx_team_member.split(mteammember.group(2)):
|
||||
for tm in rx_person.split(mteammember.group(2)):
|
||||
if tm:
|
||||
personexpedition = survexblock.expedition and GetPersonExpeditionNameLookup(survexblock.expedition).get(tm.lower())
|
||||
if (personexpedition, tm) not in teammembers:
|
||||
@@ -431,6 +435,31 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
|
||||
timetaken = endstamp - stamp
|
||||
# print(insp+' - Time to process: ' + str(timetaken))
|
||||
|
||||
def get_process_memory():
|
||||
usage=resource.getrusage(resource.RUSAGE_SELF)
|
||||
return usage[2]/1024.0
|
||||
|
||||
|
||||
def FindAndLoadAllSurvex(survexblockroot, survexfileroot):
|
||||
"""Follows the *include links recursively to find files
|
||||
"""
|
||||
print(' - redirecting stdout to loadsurvexblks.log...')
|
||||
stdout_orig = sys.stdout
|
||||
# Redirect sys.stdout to the file
|
||||
sys.stdout = open('loadsurvexblks.log', 'w')
|
||||
|
||||
finroot = survexfileroot.OpenFile()
|
||||
RecursiveLoad(survexblockroot, survexfileroot, finroot)
|
||||
finroot.close()
|
||||
|
||||
# Close the logging file
|
||||
sys.stdout.close()
|
||||
print("+", file=sys.stderr)
|
||||
sys.stderr.flush();
|
||||
# Restore sys.stdout to our old saved file handler
|
||||
sys.stdout = stdout_orig
|
||||
|
||||
|
||||
def LoadAllSurvexBlocks():
|
||||
global survexlegsalllength
|
||||
global survexlegsnumber
|
||||
@@ -441,47 +470,28 @@ def LoadAllSurvexBlocks():
|
||||
models_survex.SurvexFile.objects.all().delete()
|
||||
models_survex.SurvexDirectory.objects.all().delete()
|
||||
models_survex.SurvexEquate.objects.all().delete()
|
||||
#models_survex.SurvexLeg.objects.all().delete()
|
||||
models_survex.SurvexTitle.objects.all().delete()
|
||||
models_survex.SurvexPersonRole.objects.all().delete()
|
||||
models_survex.SurvexStation.objects.all().delete()
|
||||
|
||||
print(" - Data flushed")
|
||||
# Clear the data issues as we are reloading
|
||||
models.DataIssue.objects.filter(parser='survex').delete()
|
||||
print(' - Loading All Survex Blocks...')
|
||||
|
||||
print(' - redirecting stdout to loadsurvexblks.log...')
|
||||
stdout_orig = sys.stdout
|
||||
# Redirect sys.stdout to the file
|
||||
sys.stdout = open('loadsurvexblks.log', 'w')
|
||||
|
||||
survexfile = models_survex.SurvexFile(path=settings.SURVEX_TOPNAME, cave=None)
|
||||
survexfile.save()
|
||||
survexfile.SetDirectory()
|
||||
|
||||
#Load all
|
||||
# this is the first so id=1
|
||||
survexblockroot = models_survex.SurvexBlock(name=ROOTBLOCK, survexpath="", cave=None, survexfile=survexfile,
|
||||
|
||||
survexfileroot = models_survex.SurvexFile(path=settings.SURVEX_TOPNAME, cave=None)
|
||||
survexfileroot.save()
|
||||
survexfileroot.SetDirectory()
|
||||
survexblockroot = models_survex.SurvexBlock(name=ROOTBLOCK, survexpath="", cave=None, survexfile=survexfileroot,
|
||||
legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0)
|
||||
# this is the first so id=1
|
||||
survexblockroot.save()
|
||||
fin = survexfile.OpenFile()
|
||||
textlines = [ ]
|
||||
# The real work starts here
|
||||
RecursiveLoad(survexblockroot, survexfile, fin, textlines)
|
||||
fin.close()
|
||||
|
||||
print(' - Loading All Survex Blocks...')
|
||||
FindAndLoadAllSurvex(survexblockroot, survexfileroot)
|
||||
|
||||
survexblockroot.totalleglength = survexlegsalllength
|
||||
survexblockroot.legsall = survexlegsnumber
|
||||
#survexblockroot.text = "".join(textlines) these are all blank
|
||||
survexblockroot.save()
|
||||
|
||||
# Close the file
|
||||
sys.stdout.close()
|
||||
print("+", file=sys.stderr)
|
||||
sys.stderr.flush();
|
||||
|
||||
# Restore sys.stdout to our old saved file handler
|
||||
sys.stdout = stdout_orig
|
||||
print(" - total number of survex legs: {}m".format(survexlegsnumber))
|
||||
print(" - total leg lengths loaded: {}m".format(survexlegsalllength))
|
||||
print(' - Loaded All Survex Blocks.')
|
||||
@@ -500,48 +510,6 @@ def LoadPos():
|
||||
topdata = settings.SURVEX_DATA + settings.SURVEX_TOPNAME
|
||||
print((' - Generating a list of Pos from %s.svx and then loading...' % (topdata)))
|
||||
|
||||
# TO DO - remove the cache file apparatus. Not needed. Only laser points and entrances loaded now.
|
||||
|
||||
# Be careful with the cache file.
|
||||
# If LoadPos has been run before,
|
||||
# but without cave import being run before,
|
||||
# then *everything* may be in the fresh 'not found' cache file.
|
||||
|
||||
# cachefile = settings.SURVEX_DATA + "posnotfound.cache"
|
||||
# notfoundbefore = {}
|
||||
# if os.path.isfile(cachefile):
|
||||
# # this is not a good test. 1623.svx may never change but *included files may have done.
|
||||
# # When the *include is unrolled, we will be able to get a proper timestamp to use
|
||||
# # and can increase the timeout from 3 days to 30 days.
|
||||
# updtsvx = os.path.getmtime(topdata + ".svx")
|
||||
# updtcache = os.path.getmtime(cachefile)
|
||||
# age = updtcache - updtsvx
|
||||
# print((' svx: %s cache: %s not-found cache is fresher by: %s' % (updtsvx, updtcache, str(timedelta(seconds=age) ))))
|
||||
|
||||
# now = time.time()
|
||||
# if now - updtcache > 3*24*60*60:
|
||||
# print(" cache is more than 3 days old. Deleting.")
|
||||
# os.remove(cachefile)
|
||||
# elif age < 0 :
|
||||
# print(" cache is stale. Deleting.")
|
||||
# os.remove(cachefile)
|
||||
# else:
|
||||
# print(" cache is fresh. Reading...")
|
||||
# try:
|
||||
# with open(cachefile, "r") as f:
|
||||
# for line in f:
|
||||
# l = line.rstrip()
|
||||
# if l in notfoundbefore:
|
||||
# notfoundbefore[l] +=1 # should not be duplicates
|
||||
# print(" DUPLICATE ", line, notfoundbefore[l])
|
||||
# else:
|
||||
# notfoundbefore[l] =1
|
||||
# except:
|
||||
# print(" FAILURE READ opening cache file %s" % (cachefile))
|
||||
# raise
|
||||
|
||||
|
||||
# notfoundnow =[]
|
||||
found = 0
|
||||
skip = {}
|
||||
print("\n") # extra line because cavern overwrites the text buffer somehow
|
||||
@@ -615,18 +583,5 @@ def LoadPos():
|
||||
models.DataIssue.objects.create(parser='survex', message=message)
|
||||
raise
|
||||
|
||||
#print(" - %s failed lookups of SurvexStation.objects. %s found. %s skipped." % (len(notfoundnow),found, len(skip)))
|
||||
print(" - {} SurvexStation entrances found.".format(found))
|
||||
|
||||
# if found > 10: # i.e. a previous cave import has been done
|
||||
# try:
|
||||
# with open(cachefile, "w") as f:
|
||||
# c = len(notfoundnow)+len(skip)
|
||||
# for i in notfoundnow:
|
||||
# pass #f.write("%s\n" % i)
|
||||
# for j in skip:
|
||||
# pass #f.write("%s\n" % j) # NB skip not notfoundbefore
|
||||
# print((' Not-found cache file written: %s entries' % c))
|
||||
# except:
|
||||
# print(" FAILURE WRITE opening cache file %s" % (cachefile))
|
||||
# raise
|
||||
Reference in New Issue
Block a user