Changes needed to stop the survex parser having to go through the data twice

Taken from the Django 1.10 upgrade branch
This commit is contained in:
Sam Wenham 2020-02-21 15:57:07 +00:00
parent f5fe2d9e33
commit e77aa9fb84
3 changed files with 186 additions and 79 deletions

View File

@ -535,13 +535,15 @@ class Cave(TroggleModel):
def getCaveByReference(reference):
areaname, code = reference.split("-", 1)
print(areaname, code)
#print(areaname, code)
area = Area.objects.get(short_name = areaname)
print(area)
foundCaves = list(Cave.objects.filter(area = area, kataster_number = code).all()) + list(Cave.objects.filter(area = area, unofficial_number = code).all())
#print(area)
foundCaves = list(Cave.objects.filter(area = area, kataster_number = code).all()) + list(Cave.objects.filter(area = area, unofficial_number = code).all())
print(list(foundCaves))
assert len(foundCaves) == 1
return foundCaves[0]
if len(foundCaves) == 1:
return foundCaves[0]
else:
return False
class OtherCaveName(TroggleModel):
name = models.CharField(max_length=160)

View File

@ -6,3 +6,4 @@ django-imagekit
Image
django-tinymce==2.7.0
smartencoding
unidecode

View File

@ -5,20 +5,26 @@ import troggle.settings as settings
from subprocess import call, Popen, PIPE
from troggle.parsers.people import GetPersonExpeditionNameLookup
from django.utils.timezone import get_current_timezone
from django.utils.timezone import make_aware
import re
import os
from datetime import datetime
line_leg_regex = re.compile(r"[\d\-+.]+$")
def LoadSurvexLineLeg(survexblock, stardata, sline, comment):
def LoadSurvexLineLeg(survexblock, stardata, sline, comment, cave):
# The try catches here need replacing as they are relativly expensive
ls = sline.lower().split()
ssfrom = survexblock.MakeSurvexStation(ls[stardata["from"]])
ssto = survexblock.MakeSurvexStation(ls[stardata["to"]])
survexleg = models.SurvexLeg(block=survexblock, stationfrom=ssfrom, stationto=ssto)
if stardata["type"] == "normal":
try:
survexleg.tape = float(ls[stardata["tape"]])
except ValueError:
except ValueError:
print("Tape misread in", survexblock.survexfile.path)
print("Stardata:", stardata)
print("Line:", ls)
@ -53,14 +59,17 @@ def LoadSurvexLineLeg(survexblock, stardata, sline, comment):
survexleg.compass = 1000
survexleg.clino = -90.0
else:
assert re.match(r"[\d\-+.]+$", lcompass), ls
assert re.match(r"[\d\-+.]+$", lclino) and lclino != "-", ls
assert line_leg_regex.match(lcompass), ls
assert line_leg_regex.match(lclino) and lclino != "-", ls
survexleg.compass = float(lcompass)
survexleg.clino = float(lclino)
if cave:
survexleg.cave = cave
# only save proper legs
survexleg.save()
itape = stardata.get("tape")
if itape:
try:
@ -80,96 +89,212 @@ def LoadSurvexEquate(survexblock, sline):
def LoadSurvexLinePassage(survexblock, stardata, sline, comment):
pass
stardatadefault = {"type":"normal", "t":"leg", "from":0, "to":1, "tape":2, "compass":3, "clino":4}
stardataparamconvert = {"length":"tape", "bearing":"compass", "gradient":"clino"}
regex_comment = re.compile(r"([^;]*?)\s*(?:;\s*(.*))?\n?$")
regex_ref = re.compile(r'.*?ref.*?(\d+)\s*#\s*(\d+)')
regex_star = re.compile(r'\s*\*[\s,]*(\w+)\s*(.*?)\s*(?:;.*)?$')
regex_team = re.compile(r"(Insts|Notes|Tape|Dog|Useless|Pics|Helper|Disto|Consultant)\s+(.*)$(?i)")
regex_team_member = re.compile(r" and | / |, | & | \+ |^both$|^none$(?i)")
regex_qm = re.compile(r'^\s*QM(\d)\s+?([a-dA-DxX])\s+([\w\-]+)\.(\d+)\s+(([\w\-]+)\.(\d+)|\-)\s+(.+)$')
def RecursiveLoad(survexblock, survexfile, fin, textlines):
iblankbegins = 0
text = [ ]
stardata = stardatadefault
teammembers = [ ]
# uncomment to print out all files during parsing
print("Reading file: " + survexblock.survexfile.path)
while True:
svxline = fin.readline().decode("latin1")
if not svxline:
return
textlines.append(svxline)
# uncomment to print out all files during parsing
print(" - Reading file: " + survexblock.survexfile.path)
stamp = datetime.now()
lineno = 0
# Try to find the cave in the DB if not use the string as before
path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", survexblock.survexfile.path)
if path_match:
pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2))
# print('Match')
# print(pos_cave)
cave = models.getCaveByReference(pos_cave)
if cave:
survexfile.cave = cave
svxlines = ''
svxlines = fin.read().splitlines()
# print('Cave - preloop ' + str(survexfile.cave))
# print(survexblock)
for svxline in svxlines:
# print(survexblock)
# print(svxline)
# if not svxline:
# print(' - Not survex')
# return
# textlines.append(svxline)
lineno += 1
# print(' - Line: %d' % lineno)
# break the line at the comment
sline, comment = re.match(r"([^;]*?)\s*(?:;\s*(.*))?\n?$", svxline.strip()).groups()
sline, comment = regex_comment.match(svxline.strip()).groups()
# detect ref line pointing to the scans directory
mref = comment and re.match(r'.*?ref.*?(\d+)\s*#\s*(\d+)', comment)
mref = comment and regex_ref.match(comment)
if mref:
refscan = "%s#%s" % (mref.group(1), mref.group(2))
survexscansfolders = models.SurvexScansFolder.objects.filter(walletname=refscan)
if survexscansfolders:
survexblock.survexscansfolder = survexscansfolders[0]
#survexblock.refscandir = "%s/%s%%23%s" % (mref.group(1), mref.group(1), mref.group(2))
survexblock.save()
survexblock.save()
continue
# This whole section should be moved if we can have *QM become a proper survex command
# Spec of QM in SVX files, currently commented out need to add to survex
# needs to match regex_qm
# ;Serial number grade(A/B/C/D/X) nearest-station resolution-station description
# ;QM1 a hobnob_hallway_2.42 hobnob-hallway_3.42 junction of keyhole passage
# ;QM1 a hobnob_hallway_2.42 - junction of keyhole passage
qmline = comment and regex_qm.match(comment)
if qmline:
print(qmline.groups())
#(u'1', u'B', u'miraclemaze', u'1.17', u'-', None, u'\tcontinuation of rift')
qm_no = qmline.group(1)
qm_grade = qmline.group(2)
qm_from_section = qmline.group(3)
qm_from_station = qmline.group(4)
qm_resolve_section = qmline.group(6)
qm_resolve_station = qmline.group(7)
qm_notes = qmline.group(8)
print('Cave - %s' % survexfile.cave)
print('QM no %d' % int(qm_no))
print('QM grade %s' % qm_grade)
print('QM section %s' % qm_from_section)
print('QM station %s' % qm_from_station)
print('QM res section %s' % qm_resolve_section)
print('QM res station %s' % qm_resolve_station)
print('QM notes %s' % qm_notes)
# If the QM isn't resolved (has a resolving station) thn load it
if not qm_resolve_section or qm_resolve_section is not '-' or qm_resolve_section is not 'None':
from_section = models.SurvexBlock.objects.filter(name=qm_from_section)
# If we can find a section (survex note chunck, named)
if len(from_section) > 0:
print(from_section[0])
from_station = models.SurvexStation.objects.filter(block=from_section[0], name=qm_from_station)
# If we can find a from station then we have the nearest station and can import it
if len(from_station) > 0:
print(from_station[0])
qm = models.QM.objects.create(number=qm_no,
nearest_station=from_station[0],
grade=qm_grade.upper(),
location_description=qm_notes)
else:
print('QM found but resolved')
#print('Cave -sline ' + str(cave))
if not sline:
continue
# detect the star command
mstar = re.match(r'\s*\*[\s,]*(\w+)\s*(.*?)\s*(?:;.*)?$', sline)
mstar = regex_star.match(sline)
if not mstar:
if "from" in stardata:
LoadSurvexLineLeg(survexblock, stardata, sline, comment)
# print('Cave ' + str(survexfile.cave))
# print(survexblock)
LoadSurvexLineLeg(survexblock, stardata, sline, comment, survexfile.cave)
# print(' - From: ')
#print(stardata)
pass
elif stardata["type"] == "passage":
LoadSurvexLinePassage(survexblock, stardata, sline, comment)
# print(' - Passage: ')
#Missing "station" in stardata.
continue
# detect the star command
cmd, line = mstar.groups()
cmd = cmd.lower()
if re.match("include$(?i)", cmd):
includepath = os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", line))
includesurvexfile = models.SurvexFile(path=includepath, cave=survexfile.cave)
print(' - Include file found including - ' + includepath)
# Try to find the cave in the DB if not use the string as before
path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", includepath)
if path_match:
pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2))
# print(pos_cave)
cave = models.getCaveByReference(pos_cave)
if cave:
survexfile.cave = cave
else:
print('No match for %s' % includepath)
includesurvexfile = models.SurvexFile(path=includepath)
includesurvexfile.save()
includesurvexfile.SetDirectory()
if includesurvexfile.exists():
survexblock.save()
fininclude = includesurvexfile.OpenFile()
RecursiveLoad(survexblock, includesurvexfile, fininclude, textlines)
elif re.match("begin$(?i)", cmd):
if line:
if line:
newsvxpath = os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", line))
# Try to find the cave in the DB if not use the string as before
path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", newsvxpath)
if path_match:
pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2))
print(pos_cave)
cave = models.getCaveByReference(pos_cave)
if cave:
survexfile.cave = cave
else:
print('No match for %s' % newsvxpath)
name = line.lower()
survexblockdown = models.SurvexBlock(name=name, begin_char=fin.tell(), parent=survexblock, survexpath=survexblock.survexpath+"."+name, cave=survexblock.cave, survexfile=survexfile, totalleglength=0.0)
print(' - Begin found for: ' + name)
# print('Block cave: ' + str(survexfile.cave))
survexblockdown = models.SurvexBlock(name=name, begin_char=fin.tell(), parent=survexblock, survexpath=survexblock.survexpath+"."+name, cave=survexfile.cave, survexfile=survexfile, totalleglength=0.0)
survexblockdown.save()
survexblock.save()
survexblock = survexblockdown
# print(survexblockdown)
textlinesdown = [ ]
RecursiveLoad(survexblockdown, survexfile, fin, textlinesdown)
else:
iblankbegins += 1
elif re.match("end$(?i)", cmd):
if iblankbegins:
iblankbegins -= 1
else:
survexblock.text = "".join(textlines)
survexblock.save()
# print(' - End found: ')
endstamp = datetime.now()
timetaken = endstamp - stamp
# print(' - Time to process: ' + str(timetaken))
return
elif re.match("date$(?i)", cmd):
if len(line) == 10:
survexblock.date = re.sub(r"\.", "-", line)
#print(' - Date found: ' + line)
survexblock.date = make_aware(datetime.strptime(re.sub(r"\.", "-", line), '%Y-%m-%d'), get_current_timezone())
expeditions = models.Expedition.objects.filter(year=line[:4])
if expeditions:
assert len(expeditions) == 1
survexblock.expedition = expeditions[0]
survexblock.expeditionday = survexblock.expedition.get_expedition_day(survexblock.date)
survexblock.save()
elif re.match("team$(?i)", cmd):
mteammember = re.match(r"(Insts|Notes|Tape|Dog|Useless|Pics|Helper|Disto|Consultant)\s+(.*)$(?i)", line)
pass
# print(' - Team found: ')
mteammember = regex_team.match(line)
if mteammember:
for tm in re.split(r" and | / |, | & | \+ |^both$|^none$(?i)", mteammember.group(2)):
for tm in regex_team_member.split(mteammember.group(2)):
if tm:
personexpedition = survexblock.expedition and GetPersonExpeditionNameLookup(survexblock.expedition).get(tm.lower())
if (personexpedition, tm) not in teammembers:
@ -179,18 +304,23 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
if personexpedition:
personrole.person=personexpedition.person
personrole.save()
elif cmd == "title":
survextitle = models.SurvexTitle(survexblock=survexblock, title=line.strip('"'), cave=survexblock.cave)
#print(' - Title found: ')
survextitle = models.SurvexTitle(survexblock=survexblock, title=line.strip('"'), cave=survexfile.cave)
survextitle.save()
pass
elif cmd == "require":
# should we check survex version available for processing?
pass
elif cmd == "data":
#print(' - Data found: ')
ls = line.lower().split()
stardata = { "type":ls[0] }
#print(' - Star data: ', stardata)
#print(ls)
for i in range(0, len(ls)):
stardata[stardataparamconvert.get(ls[i], ls[i])] = i - 1
if ls[0] in ["normal", "cartesian", "nosurvey"]:
@ -199,40 +329,23 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
stardata = stardatadefault
else:
assert ls[0] == "passage", line
elif cmd == "equate":
#print(' - Equate found: ')
LoadSurvexEquate(survexblock, line)
elif cmd == "fix":
#print(' - Fix found: ')
survexblock.MakeSurvexStation(line.split()[0])
else:
#print(' - Stuff')
if cmd not in ["sd", "include", "units", "entrance", "data", "flags", "title", "export", "instrument",
"calibrate", "set", "infer", "alias", "ref", "cs", "declination", "case"]:
print("Unrecognised command in line:", cmd, line, survexblock, survexblock.survexfile.path)
def ReloadSurvexCave(survex_cave, area):
print(survex_cave, area)
cave = models.Cave.objects.get(kataster_number=survex_cave, area__short_name=area)
print(cave)
#cave = models.Cave.objects.get(kataster_number=survex_cave)
cave.survexblock_set.all().delete()
cave.survexfile_set.all().delete()
cave.survexdirectory_set.all().delete()
survexfile = models.SurvexFile(path="caves-" + cave.kat_area() + "/" + survex_cave + "/" + survex_cave, cave=cave)
survexfile.save()
survexfile.SetDirectory()
survexblockroot = models.SurvexBlock(name="root", survexpath="caves-" + cave.kat_area(), begin_char=0, cave=cave, survexfile=survexfile, totalleglength=0.0)
survexblockroot.save()
fin = survexfile.OpenFile()
textlines = [ ]
RecursiveLoad(survexblockroot, survexfile, fin, textlines)
survexblockroot.text = "".join(textlines)
survexblockroot.save()
endstamp = datetime.now()
timetaken = endstamp - stamp
# print(' - Time to process: ' + str(timetaken))
def LoadAllSurvexBlocks():
@ -258,22 +371,13 @@ def LoadAllSurvexBlocks():
survexblockroot.save()
fin = survexfile.OpenFile()
textlines = [ ]
# The real work starts here
RecursiveLoad(survexblockroot, survexfile, fin, textlines)
fin.close()
survexblockroot.text = "".join(textlines)
survexblockroot.save()
#Load each cave,
#FIXME this should be dealt with load all above
print(" - Reloading all caves")
caves = models.Cave.objects.all()
for cave in caves:
if cave.kataster_number and os.path.isdir(os.path.join(settings.SURVEX_DATA, "caves-" + cave.kat_area(), cave.kataster_number)):
if cave.kataster_number not in ['40']:
print("loading", cave, cave.kat_area())
ReloadSurvexCave(cave.kataster_number, cave.kat_area())
poslineregex = re.compile(r"^\(\s*([+-]?\d*\.\d*),\s*([+-]?\d*\.\d*),\s*([+-]?\d*\.\d*)\s*\)\s*([^\s]+)$")
@ -286,7 +390,7 @@ def LoadPos():
posfile = open("%s%s.pos" % (settings.SURVEX_DATA, settings.SURVEX_TOPNAME))
posfile.readline() #Drop header
for line in posfile.readlines():
r = poslineregex.match(line)
r = poslineregex.match(line)
if r:
x, y, z, name = r.groups()
try: