2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2024-11-22 07:11:52 +00:00

big changes to cope with survexblock not yet dated, no *date yet

This commit is contained in:
Philip Sargent 2022-10-07 23:48:41 +03:00
parent bec262bb2d
commit 4e9680a3ad

View File

@ -19,18 +19,11 @@ from troggle.parsers.logbooks import GetCaveLookup
from troggle.core.models.troggle import DataIssue, Expedition
from troggle.core.models.survex import SurvexPersonRole, Wallet, SurvexDirectory, SurvexFile, SurvexBlock, SurvexStation
'''Imports the tree of survex files following form a defined root .svx file
It does also NOT scan the Loser repo for all the svx files - though it should !
'''Imports the tree of survex files following from a defined root .svx file
It also scans the Loser repo for all the svx files, which it loads individually afterwards.
'''
todo = '''Also walk the entire tree in the :loser: repo looking for unconnected survex files
- add them to the system so that they can be reported-on
- produce a parser report and create a troggle report page (some are OK, e.g. futility series replaced by ARGE survey in 115)
- If you look at e.g. http://expo.survex.com/survexfile/161#T_caves-1623/161/lhr/alllhr
you will see than have the team members are recognised by this parser, but not recognised by the
wider troggle system (the name is not a hyperlink) - apparently randomly.
GetPersonExpeditionNameLookup() needs to be fixed.
todo = '''
-#BUG, if *date comes after *team, the person's date is not set at all. It needs re-setting at the endof the block.
@ -106,6 +99,7 @@ def get_people_on_trip(survexblock):
people.append(f'{p.personname}')
return list(set(people))
class LoadingSurvex():
"""A 'survex block' is a *begin...*end set of cave data.
A survex file can contain many begin-end blocks, which can be nested, and which can *include
@ -128,8 +122,10 @@ class LoadingSurvex():
rx_names = re.compile(r'(?i)names')
rx_flagsnot= re.compile(r"not\s")
rx_linelen = re.compile(r"[\d\-+.]+$")
instruments = "(waiting_patiently|slacker|Useless|nagging|unknown|Inst|instrument|rig|rigger|rigging|helper|something| compass|comp|clino|Notes|sketch|book|Tape|Dog|Pics|photo|drawing|Helper|GPS|Disto|Distox|Distox2|topodroid|point|Consultant|nail|polish|nail_polish_bitch|nail_polish_monkey|varnish|nail_polish|nail_varnish|bitch|monkey|PowerDrill|drill)"
rx_teammem = re.compile(r"(?i)"+instruments+"?(?:es|s)?\s+(.*)"+instruments+"?(?:es|s)?$")
instruments = "(bitch|bodger|bolt|bolter|bolting|book|clino|comp|compass|consultant|disto|distox|distox2|dog|dogsbody|drawing|drill|gps|helper|inst|instr|instrument|monkey|nagging|nail|nail_polish|nail_polish_bitch|nail_polish_monkey|nail_varnish|nail_varnish_bitch|note|paint|photo|pic|point|polish|powerdrill|rig|rigger|rigging|sketch|slacker|something|tape|topodroid|unknown|useless|varnish|waiting_patiently)"
rx_teammem = re.compile(r"(?i)"+instruments+"?(?:es|s)?\s+(.*)$")
rx_teamold = re.compile(r"(?i)(.*)\s+"+instruments+"?(?:es|s)?$")
rx_teamabs = re.compile(r"(?i)^\s*("+instruments+")?(?:es|s)?\s*$")
rx_person = re.compile(r"(?i) and | / |, | & | \+ |^both$|^none$")
rx_qm = re.compile(r'(?i)^\s*QM(\d+)\s+?([a-dA-DxX])\s+([\w\-\_]+)\.([\w\.\-]+)\s+(([\w\-]+)\.([\w\.\-]+)|\-)\s+(.+)$')
# does not recognise non numeric suffix survey point ids
@ -228,22 +224,74 @@ class LoadingSurvex():
personrole is used to record that a person was on a trip, NOT the role they played.
(NB PersonTrip is a logbook thing)
"""
teammembers = [ ]
mteammember = self.rx_teammem.match(line)
if mteammember:
def record_team_member(tm, survexblock):
tm = tm.strip('\"\'')
# Refactor. The dict GetPersonExpeditionNameLookup(expo) indexes by name and has values of personexpedition
# This is convoluted, the whole personexpedition concept is unnecessary.
# we need the current expedition, but if there has been no date yet in the survex file, we don't know which one it is.
# so we can't validate whether the person was on expo or not.
# we will have to attach them to the survexblock anyway, and then do a
# later check on whether they are valid when we get the date.
personrole, created = SurvexPersonRole.objects.update_or_create(survexblock=survexblock, personexpedition=personexpedition, personname=tm)
expo = survexblock.expedition # may be None if no *date yet
# this syntax was bizarre.. made more obvious
if expo:
if survexblock.expeditionday: # *date has been set
personrole.expeditionday = survexblock.expeditionday
else:
# should not happen
message = "! *team {} expo ok, expedition day not in *team {} ({}) created? '{}'".format(expo.year, survexblock.survexfile.path, survexblock, created )
print(self.insp+message)
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path))
personexpedition = GetPersonExpeditionNameLookup(expo).get(tm.lower())
personrole.person=personexpedition.person
self.currentpersonexped.append(personexpedition)
if not personexpedition:
# we know the date and expo, but can't find the person
message = "! *team {} '{}' FAIL personexpedition lookup on *team {} ({}) in '{}' {} ".format(expo.year, tm, survexblock.survexfile.path, survexblock, created, line)
print(self.insp+message)
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path))
else:
personexpedition = None
# don't know the date yet, assume the person is valid. It wull get picked up with the *date appears
personrole.save()
mteammember = self.rx_teammem.match(line) # matches the role at the beginning
if not mteammember:
moldstyle = self.rx_teamold.match(line) # matches the role at the the end of the string
if moldstyle:
for tm in self.rx_person.split(moldstyle.group(1)):
if tm:
record_team_member(tm, survexblock)
# seems to be working
# msg = "! OLD tm='{}' line: '{}' ({}) {}".format(tm, line, survexblock, survexblock.survexfile.path)
# print(msg, file=sys.stderr)
else:
message = "! *team {} ({}) Weird '{}' oldstyle line: '{}'".format(survexblock.survexfile.path, survexblock, mteammember.group(1), line)
print(self.insp+message)
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path))
else:
nullmember = self.rx_teamabs.match(line) # matches empty role line. Ignore these.
if not nullmember:
message = "! *team {} ({}) Bad line: '{}'".format(survexblock.survexfile.path, survexblock, line)
print(self.insp+message)
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path))
else:
for tm in self.rx_person.split(mteammember.group(2)):
if tm:
tm = tm.strip('\"\'')
personexpedition = survexblock.expedition and GetPersonExpeditionNameLookup(survexblock.expedition).get(tm.lower())
if (personexpedition, tm) not in teammembers:
teammembers.append((personexpedition, tm))
personrole = SurvexPersonRole(survexblock=survexblock, personexpedition=personexpedition, personname=tm)
personrole.save()
personrole.expeditionday = survexblock.expeditionday #BUG, if *date comes after *team, this is NOT SET.
if personexpedition:
personrole.person=personexpedition.person
self.currentpersonexped.append(personexpedition)
personrole.save()
record_team_member(tm, survexblock)
else:
if not mteammember.group(2).lower() in ('none', 'both'):
message = "! Weird *team '{}' newstyle line: '{}' ({}) {}".format(mteammember.group(2), line, survexblock, survexblock.survexfile.path)
print(self.insp+message)
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path))
def LoadSurvexEntrance(self, survexblock, line):
# Not using this yet
@ -285,29 +333,53 @@ class LoadingSurvex():
print(self.insp+message)
DataIssue.objects.create(parser='survexunits', message=message)
def get_expo_from_year(self, year):
# cacheing to save DB query on every block
if year in self.expos:
expo = self.expos[year]
else:
expeditions = Expedition.objects.filter(year=year)
if len(expeditions) != 1 :
message = f"! More than one expedition in year {year} '{line}' ({survexblock}) {survexblock.survexfile.path}"
print(self.insp+message)
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path))
expo= expeditions[0]
self.expos[year]= expo
return expo
def LoadSurvexDate(self, survexblock, line):
# we should make this a date RANGE for everything?
def findexpedition(year):
return Expedition.objects.filter(year=year)
def setdate(year):
# cacheing to save DB query on every block
if year in self.expos:
expo = self.expos[year]
else:
expeditions = findexpedition(year)
if len(expeditions) != 1 :
message = f"! More than one expedition in year {year} '{line}' ({survexblock}) {survexblock.survexfile.path}"
print(self.insp+message)
DataIssue.objects.create(parser='survexunits', message=message)
expo= expeditions[0]
self.expos[year]= expo
def setdate_on_survexblock(year):
expo = self.get_expo_from_year(year)
survexblock.expedition = expo
survexblock.expeditionday = survexblock.expedition.get_expedition_day(survexblock.date)
survexblock.expeditionday = expo.get_expedition_day(survexblock.date)
survexblock.save()
team = SurvexPersonRole.objects.filter(survexblock=survexblock)
for p in team:
if not p.expeditionday: # *date and *team in 'wrong' order. All working now.
p.expeditionday = survexblock.expeditionday
p.save()
if not p.personexpedition: # again, we didn't know the date until now
pe = GetPersonExpeditionNameLookup(expo).get(p.personname.lower())
if pe:
# message = "! {} ({}) Fixing undated personexpedition '{}'".format(survexblock.survexfile.path, survexblock, p.personname)
# print(self.insp+message)
# DataIssue.objects.create(parser='survex', message=message)
p.personexpedition = pe
p.person = p.personexpedition.person
p.save()
else:
message = "! *team {} '{}' FAIL personexpedition lookup on *date {} ({}) '{}'".format(year, p, survexblock.survexfile.path, survexblock, p.personname)
print(self.insp+message)
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path))
oline = line
if len(line) > 10:
# message = "! DATE Warning LONG DATE '{}' ({}) {}".format(oline, survexblock, survexblock.survexfile.path)
@ -320,7 +392,7 @@ class LoadingSurvex():
# TO DO set to correct Austrian timezone Europe/Vienna ?
# %m and %d need leading zeros. Source svx files require them.
survexblock.date = datetime.strptime(line.replace('.','-'), '%Y-%m-%d')
setdate(year)
setdate_on_survexblock(year)
elif len(line) == 7:
year = line[:4]
perps = get_people_on_trip(survexblock) # What, you don't know Judge Dredd slang ?
@ -328,7 +400,7 @@ class LoadingSurvex():
print(self.insp+message)
DataIssue.objects.create(parser='svxdate', message=message, url=get_offending_filename(survexblock.survexfile.path))
survexblock.date = datetime.strptime(line.replace('.','-'), '%Y-%m') # sets to first of month
setdate(year)
setdate_on_survexblock(year)
elif len(line) == 4:
year = line[:4]
perps = get_people_on_trip(survexblock)
@ -336,13 +408,13 @@ class LoadingSurvex():
print(self.insp+message)
DataIssue.objects.create(parser='svxdate', message=message, url=get_offending_filename(survexblock.survexfile.path))
survexblock.date = datetime.strptime(line, '%Y') # sets to January 1st
setdate(year)
setdate_on_survexblock(year)
else:
# these errors are reporting the wrong survexblock, which is actually a SurvexFile (!)
message = "! DATE Error unrecognised '{}' ({}) {}".format(oline, survexblock, survexblock.survexfile.path)
message = "! DATE Error unrecognised '{}-{}' ({}) {}".format(oline, survexblock, type(survexblock), survexblock.survexfile.path)
print(self.insp+message)
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path))
print(f" {survexblock.parent=}") # fails as SUrvexFile has no .parent ...ugh.
print(f" {type(survexblock)=}") # survexblock.parent fails as a SurvexFile has no .parent ...ugh.
print(f" {survexblock.survexpath=}")
print(f" {survexblock.survexfile=}")
#raise
@ -976,6 +1048,7 @@ class LoadingSurvex():
def LinearLoad(self, survexblock, path, collatefilename):
"""Loads a single survex file. Usually used to import all the survex files which have been collated
into a single file. Loads the begin/end blocks using a stack for labels.
Uses the python generator idiom to avoid loading the whole file (21MB) into memory.
"""
blkid = None
pathlist = None
@ -1192,7 +1265,7 @@ class LoadingSurvex():
# this is a python generator idiom.
# see https://realpython.com/introduction-to-python-generators/
# this is the first use of generators in troggle (Oct.2022)
# this is the first use of generators in troggle (Oct.2022) and save 21 MB of memory
with open(collatefilename, "r") as fcollate:
for svxline in fcollate:
self.lineno += 1
@ -1349,7 +1422,7 @@ class LoadingSurvex():
return
return
try:
# python generator idiom again
# python generator idiom again. Not important here as these are small files
with open(finname, "r") as fin:
for svxline in fin:
process_line(svxline)
@ -1475,7 +1548,6 @@ def FindAndLoadSurvex(survexblockroot):
survexfileroot = survexblockroot.survexfile # i.e. SURVEX_TOPNAME only
collatefilename = "_" + survexfileroot.path + ".svx"
svx_scan = LoadingSurvex()
svx_scan.callcount = 0
svx_scan.depthinclude = 0
@ -1502,7 +1574,6 @@ def FindAndLoadSurvex(survexblockroot):
from pstats import SortKey
pr = cProfile.Profile()
pr.enable()
#print(f"###{survexblockroot=} {survexfileroot.path=}",file=sys.stderr)
#----------------------------------------------------------------
svx_scan.PushdownStackScan(survexblockroot, survexfileroot.path, finrootname, flinear, fcollate)
#----------------------------------------------------------------
@ -1525,7 +1596,6 @@ def FindAndLoadSurvex(survexblockroot):
mem1 = get_process_memory()
print(" - MEM:{:7.2f} MB END ".format(mem1),file=sys.stderr)
print(" - MEM:{:7.3f} MB ADDITIONALLY USED".format(mem1-mem0),file=sys.stderr)
#
# Process all the omitted files in :loser: with some exceptions
#
@ -1556,8 +1626,6 @@ def FindAndLoadSurvex(survexblockroot):
for x in removals:
unseens.remove(x)
# for x in unseens:
# print(f"'{x}', ", end='', file=sys.stderr)
print(f"\n - {len(unseens)} survex files found which were not included in main tree. ({len(svx_scan.svxfileslist)} in main tree)", file=sys.stderr)
print(f" -- Now loading the previously-omitted survex files.", file=sys.stderr)
@ -1621,7 +1689,6 @@ def FindAndLoadSurvex(survexblockroot):
print(" - MEM:{:7.3f} MB ADDITIONALLY USED".format(mem1-mem0),file=sys.stderr)
# Before doing this, it would be good to identify the *equate and *entrance we need that are relevant to the
# entrance locations currently loaded after this by LoadPos(), but could better be done before ?
# look in MapLocations() for how we find the entrances
@ -1635,18 +1702,15 @@ def FindAndLoadSurvex(survexblockroot):
#pr2 = cProfile.Profile()
#pr2.enable()
mem1 = get_process_memory()
print(f" - MEM:{mem1:7.2f} MB NOT reading '{collatefilename}' into memory.",file=sys.stderr)
print(" ", file=sys.stderr,end='')
#----------------------------------------------------------------
svx_load.LinearLoad(survexblockroot,survexfileroot.path, collatefilename)
svx_load.LinearLoad(survexblockroot, survexfileroot.path, collatefilename)
#----------------------------------------------------------------
#pr2.disable()
# with open('LinearLoad.prof', 'w') as f:
# ps = pstats.Stats(pr2, stream=f)
# ps.sort_stats(SortKey.CUMULATIVE)
# ps.print_stats()
svxlines = [] # empty 30MB of stashed file
mem1 = get_process_memory()
print("\n - MEM:{:7.2f} MB STOP".format(mem1),file=sys.stderr)
print(" - MEM:{:7.3f} MB ADDITIONALLY USED".format(mem1-mem0),file=sys.stderr)
@ -1696,20 +1760,32 @@ def MakeOmitFileRoot(fn):
return fileroot
def LoadSurvexBlocks():
mem1 = get_process_memory()
print(" - MEM:{:7.2f} MB now ".format(mem1),file=sys.stderr)
print(' - Flushing All Survex Blocks...')
# why does this increase memory use by 20 MB ?!
# We have foreign keys, Django needs to load the related objects
# in order to resolve how the relation should handle the deletion:
# https://docs.djangoproject.com/en/3.2/ref/models/fields/#django.db.models.ForeignKey.on_delete
SurvexBlock.objects.all().delete()
SurvexFile.objects.all().delete()
SurvexDirectory.objects.all().delete()
SurvexPersonRole.objects.all().delete()
SurvexStation.objects.all().delete()
print(" - survex Data Issues flushed")
mem1 = get_process_memory()
print(" - MEM:{:7.2f} MB now. Foreign key objects loaded on deletion. ".format(mem1),file=sys.stderr)
print(" - Flushing survex Data Issues ")
DataIssue.objects.filter(parser='survex').delete()
DataIssue.objects.filter(parser='svxdate').delete()
DataIssue.objects.filter(parser='survexleg').delete()
DataIssue.objects.filter(parser='survexunits').delete()
DataIssue.objects.filter(parser='entrances').delete()
DataIssue.objects.filter(parser='xEntrances').delete()
print(" - survex Data Issues flushed")
mem1 = get_process_memory()
print(" - MEM:{:7.2f} MB now ".format(mem1),file=sys.stderr)
survexfileroot = MakeSurvexFileRoot()
# this next makes a block_object assciated with a file_object.path = SURVEX_TOPNAME