mirror of
https://expo.survex.com/repositories/troggle/.git
synced 2024-11-21 23:01:52 +00:00
big changes to cope with survexblock not yet dated, no *date yet
This commit is contained in:
parent
bec262bb2d
commit
4e9680a3ad
@ -19,18 +19,11 @@ from troggle.parsers.logbooks import GetCaveLookup
|
||||
from troggle.core.models.troggle import DataIssue, Expedition
|
||||
from troggle.core.models.survex import SurvexPersonRole, Wallet, SurvexDirectory, SurvexFile, SurvexBlock, SurvexStation
|
||||
|
||||
'''Imports the tree of survex files following form a defined root .svx file
|
||||
It does also NOT scan the Loser repo for all the svx files - though it should !
|
||||
'''Imports the tree of survex files following from a defined root .svx file
|
||||
It also scans the Loser repo for all the svx files, which it loads individually afterwards.
|
||||
'''
|
||||
|
||||
todo = '''Also walk the entire tree in the :loser: repo looking for unconnected survex files
|
||||
- add them to the system so that they can be reported-on
|
||||
- produce a parser report and create a troggle report page (some are OK, e.g. futility series replaced by ARGE survey in 115)
|
||||
|
||||
- If you look at e.g. http://expo.survex.com/survexfile/161#T_caves-1623/161/lhr/alllhr
|
||||
you will see than have the team members are recognised by this parser, but not recognised by the
|
||||
wider troggle system (the name is not a hyperlink) - apparently randomly.
|
||||
GetPersonExpeditionNameLookup() needs to be fixed.
|
||||
todo = '''
|
||||
|
||||
-#BUG, if *date comes after *team, the person's date is not set at all. It needs re-setting at the endof the block.
|
||||
|
||||
@ -105,7 +98,8 @@ def get_people_on_trip(survexblock):
|
||||
for p in qpeople:
|
||||
people.append(f'{p.personname}')
|
||||
return list(set(people))
|
||||
|
||||
|
||||
|
||||
class LoadingSurvex():
|
||||
"""A 'survex block' is a *begin...*end set of cave data.
|
||||
A survex file can contain many begin-end blocks, which can be nested, and which can *include
|
||||
@ -128,8 +122,10 @@ class LoadingSurvex():
|
||||
rx_names = re.compile(r'(?i)names')
|
||||
rx_flagsnot= re.compile(r"not\s")
|
||||
rx_linelen = re.compile(r"[\d\-+.]+$")
|
||||
instruments = "(waiting_patiently|slacker|Useless|nagging|unknown|Inst|instrument|rig|rigger|rigging|helper|something| compass|comp|clino|Notes|sketch|book|Tape|Dog|Pics|photo|drawing|Helper|GPS|Disto|Distox|Distox2|topodroid|point|Consultant|nail|polish|nail_polish_bitch|nail_polish_monkey|varnish|nail_polish|nail_varnish|bitch|monkey|PowerDrill|drill)"
|
||||
rx_teammem = re.compile(r"(?i)"+instruments+"?(?:es|s)?\s+(.*)"+instruments+"?(?:es|s)?$")
|
||||
instruments = "(bitch|bodger|bolt|bolter|bolting|book|clino|comp|compass|consultant|disto|distox|distox2|dog|dogsbody|drawing|drill|gps|helper|inst|instr|instrument|monkey|nagging|nail|nail_polish|nail_polish_bitch|nail_polish_monkey|nail_varnish|nail_varnish_bitch|note|paint|photo|pic|point|polish|powerdrill|rig|rigger|rigging|sketch|slacker|something|tape|topodroid|unknown|useless|varnish|waiting_patiently)"
|
||||
rx_teammem = re.compile(r"(?i)"+instruments+"?(?:es|s)?\s+(.*)$")
|
||||
rx_teamold = re.compile(r"(?i)(.*)\s+"+instruments+"?(?:es|s)?$")
|
||||
rx_teamabs = re.compile(r"(?i)^\s*("+instruments+")?(?:es|s)?\s*$")
|
||||
rx_person = re.compile(r"(?i) and | / |, | & | \+ |^both$|^none$")
|
||||
rx_qm = re.compile(r'(?i)^\s*QM(\d+)\s+?([a-dA-DxX])\s+([\w\-\_]+)\.([\w\.\-]+)\s+(([\w\-]+)\.([\w\.\-]+)|\-)\s+(.+)$')
|
||||
# does not recognise non numeric suffix survey point ids
|
||||
@ -228,22 +224,74 @@ class LoadingSurvex():
|
||||
personrole is used to record that a person was on a trip, NOT the role they played.
|
||||
(NB PersonTrip is a logbook thing)
|
||||
"""
|
||||
teammembers = [ ]
|
||||
mteammember = self.rx_teammem.match(line)
|
||||
if mteammember:
|
||||
def record_team_member(tm, survexblock):
|
||||
tm = tm.strip('\"\'')
|
||||
# Refactor. The dict GetPersonExpeditionNameLookup(expo) indexes by name and has values of personexpedition
|
||||
# This is convoluted, the whole personexpedition concept is unnecessary.
|
||||
|
||||
# we need the current expedition, but if there has been no date yet in the survex file, we don't know which one it is.
|
||||
# so we can't validate whether the person was on expo or not.
|
||||
# we will have to attach them to the survexblock anyway, and then do a
|
||||
# later check on whether they are valid when we get the date.
|
||||
|
||||
personrole, created = SurvexPersonRole.objects.update_or_create(survexblock=survexblock, personexpedition=personexpedition, personname=tm)
|
||||
|
||||
expo = survexblock.expedition # may be None if no *date yet
|
||||
# this syntax was bizarre.. made more obvious
|
||||
if expo:
|
||||
if survexblock.expeditionday: # *date has been set
|
||||
personrole.expeditionday = survexblock.expeditionday
|
||||
else:
|
||||
# should not happen
|
||||
message = "! *team {} expo ok, expedition day not in *team {} ({}) created? '{}'".format(expo.year, survexblock.survexfile.path, survexblock, created )
|
||||
print(self.insp+message)
|
||||
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path))
|
||||
|
||||
|
||||
personexpedition = GetPersonExpeditionNameLookup(expo).get(tm.lower())
|
||||
personrole.person=personexpedition.person
|
||||
self.currentpersonexped.append(personexpedition)
|
||||
|
||||
if not personexpedition:
|
||||
# we know the date and expo, but can't find the person
|
||||
message = "! *team {} '{}' FAIL personexpedition lookup on *team {} ({}) in '{}' {} ".format(expo.year, tm, survexblock.survexfile.path, survexblock, created, line)
|
||||
print(self.insp+message)
|
||||
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path))
|
||||
else:
|
||||
personexpedition = None
|
||||
# don't know the date yet, assume the person is valid. It wull get picked up with the *date appears
|
||||
|
||||
personrole.save()
|
||||
|
||||
mteammember = self.rx_teammem.match(line) # matches the role at the beginning
|
||||
if not mteammember:
|
||||
moldstyle = self.rx_teamold.match(line) # matches the role at the the end of the string
|
||||
if moldstyle:
|
||||
for tm in self.rx_person.split(moldstyle.group(1)):
|
||||
if tm:
|
||||
record_team_member(tm, survexblock)
|
||||
# seems to be working
|
||||
# msg = "! OLD tm='{}' line: '{}' ({}) {}".format(tm, line, survexblock, survexblock.survexfile.path)
|
||||
# print(msg, file=sys.stderr)
|
||||
else:
|
||||
message = "! *team {} ({}) Weird '{}' oldstyle line: '{}'".format(survexblock.survexfile.path, survexblock, mteammember.group(1), line)
|
||||
print(self.insp+message)
|
||||
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path))
|
||||
else:
|
||||
nullmember = self.rx_teamabs.match(line) # matches empty role line. Ignore these.
|
||||
if not nullmember:
|
||||
message = "! *team {} ({}) Bad line: '{}'".format(survexblock.survexfile.path, survexblock, line)
|
||||
print(self.insp+message)
|
||||
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path))
|
||||
else:
|
||||
for tm in self.rx_person.split(mteammember.group(2)):
|
||||
if tm:
|
||||
tm = tm.strip('\"\'')
|
||||
personexpedition = survexblock.expedition and GetPersonExpeditionNameLookup(survexblock.expedition).get(tm.lower())
|
||||
if (personexpedition, tm) not in teammembers:
|
||||
teammembers.append((personexpedition, tm))
|
||||
personrole = SurvexPersonRole(survexblock=survexblock, personexpedition=personexpedition, personname=tm)
|
||||
personrole.save()
|
||||
personrole.expeditionday = survexblock.expeditionday #BUG, if *date comes after *team, this is NOT SET.
|
||||
if personexpedition:
|
||||
personrole.person=personexpedition.person
|
||||
self.currentpersonexped.append(personexpedition)
|
||||
personrole.save()
|
||||
record_team_member(tm, survexblock)
|
||||
else:
|
||||
if not mteammember.group(2).lower() in ('none', 'both'):
|
||||
message = "! Weird *team '{}' newstyle line: '{}' ({}) {}".format(mteammember.group(2), line, survexblock, survexblock.survexfile.path)
|
||||
print(self.insp+message)
|
||||
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path))
|
||||
|
||||
def LoadSurvexEntrance(self, survexblock, line):
|
||||
# Not using this yet
|
||||
@ -284,29 +332,53 @@ class LoadingSurvex():
|
||||
message = "! *UNITS in YARDS!? - not converted '{}' ({}) {}".format(line, survexblock, survexblock.survexfile.path)
|
||||
print(self.insp+message)
|
||||
DataIssue.objects.create(parser='survexunits', message=message)
|
||||
|
||||
|
||||
def get_expo_from_year(self, year):
|
||||
# cacheing to save DB query on every block
|
||||
if year in self.expos:
|
||||
expo = self.expos[year]
|
||||
else:
|
||||
expeditions = Expedition.objects.filter(year=year)
|
||||
if len(expeditions) != 1 :
|
||||
message = f"! More than one expedition in year {year} '{line}' ({survexblock}) {survexblock.survexfile.path}"
|
||||
print(self.insp+message)
|
||||
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path))
|
||||
|
||||
expo= expeditions[0]
|
||||
self.expos[year]= expo
|
||||
return expo
|
||||
|
||||
def LoadSurvexDate(self, survexblock, line):
|
||||
# we should make this a date RANGE for everything?
|
||||
def findexpedition(year):
|
||||
return Expedition.objects.filter(year=year)
|
||||
|
||||
def setdate(year):
|
||||
# cacheing to save DB query on every block
|
||||
if year in self.expos:
|
||||
expo = self.expos[year]
|
||||
else:
|
||||
expeditions = findexpedition(year)
|
||||
if len(expeditions) != 1 :
|
||||
message = f"! More than one expedition in year {year} '{line}' ({survexblock}) {survexblock.survexfile.path}"
|
||||
print(self.insp+message)
|
||||
DataIssue.objects.create(parser='survexunits', message=message)
|
||||
|
||||
expo= expeditions[0]
|
||||
self.expos[year]= expo
|
||||
|
||||
def setdate_on_survexblock(year):
|
||||
expo = self.get_expo_from_year(year)
|
||||
survexblock.expedition = expo
|
||||
survexblock.expeditionday = survexblock.expedition.get_expedition_day(survexblock.date)
|
||||
survexblock.expeditionday = expo.get_expedition_day(survexblock.date)
|
||||
survexblock.save()
|
||||
|
||||
team = SurvexPersonRole.objects.filter(survexblock=survexblock)
|
||||
for p in team:
|
||||
if not p.expeditionday: # *date and *team in 'wrong' order. All working now.
|
||||
|
||||
p.expeditionday = survexblock.expeditionday
|
||||
p.save()
|
||||
|
||||
if not p.personexpedition: # again, we didn't know the date until now
|
||||
pe = GetPersonExpeditionNameLookup(expo).get(p.personname.lower())
|
||||
if pe:
|
||||
# message = "! {} ({}) Fixing undated personexpedition '{}'".format(survexblock.survexfile.path, survexblock, p.personname)
|
||||
# print(self.insp+message)
|
||||
# DataIssue.objects.create(parser='survex', message=message)
|
||||
p.personexpedition = pe
|
||||
p.person = p.personexpedition.person
|
||||
p.save()
|
||||
else:
|
||||
message = "! *team {} '{}' FAIL personexpedition lookup on *date {} ({}) '{}'".format(year, p, survexblock.survexfile.path, survexblock, p.personname)
|
||||
print(self.insp+message)
|
||||
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path))
|
||||
|
||||
|
||||
|
||||
oline = line
|
||||
if len(line) > 10:
|
||||
@ -320,7 +392,7 @@ class LoadingSurvex():
|
||||
# TO DO set to correct Austrian timezone Europe/Vienna ?
|
||||
# %m and %d need leading zeros. Source svx files require them.
|
||||
survexblock.date = datetime.strptime(line.replace('.','-'), '%Y-%m-%d')
|
||||
setdate(year)
|
||||
setdate_on_survexblock(year)
|
||||
elif len(line) == 7:
|
||||
year = line[:4]
|
||||
perps = get_people_on_trip(survexblock) # What, you don't know Judge Dredd slang ?
|
||||
@ -328,7 +400,7 @@ class LoadingSurvex():
|
||||
print(self.insp+message)
|
||||
DataIssue.objects.create(parser='svxdate', message=message, url=get_offending_filename(survexblock.survexfile.path))
|
||||
survexblock.date = datetime.strptime(line.replace('.','-'), '%Y-%m') # sets to first of month
|
||||
setdate(year)
|
||||
setdate_on_survexblock(year)
|
||||
elif len(line) == 4:
|
||||
year = line[:4]
|
||||
perps = get_people_on_trip(survexblock)
|
||||
@ -336,13 +408,13 @@ class LoadingSurvex():
|
||||
print(self.insp+message)
|
||||
DataIssue.objects.create(parser='svxdate', message=message, url=get_offending_filename(survexblock.survexfile.path))
|
||||
survexblock.date = datetime.strptime(line, '%Y') # sets to January 1st
|
||||
setdate(year)
|
||||
setdate_on_survexblock(year)
|
||||
else:
|
||||
# these errors are reporting the wrong survexblock, which is actually a SurvexFile (!)
|
||||
message = "! DATE Error unrecognised '{}' ({}) {}".format(oline, survexblock, survexblock.survexfile.path)
|
||||
message = "! DATE Error unrecognised '{}-{}' ({}) {}".format(oline, survexblock, type(survexblock), survexblock.survexfile.path)
|
||||
print(self.insp+message)
|
||||
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path))
|
||||
print(f" {survexblock.parent=}") # fails as SUrvexFile has no .parent ...ugh.
|
||||
print(f" {type(survexblock)=}") # survexblock.parent fails as a SurvexFile has no .parent ...ugh.
|
||||
print(f" {survexblock.survexpath=}")
|
||||
print(f" {survexblock.survexfile=}")
|
||||
#raise
|
||||
@ -976,6 +1048,7 @@ class LoadingSurvex():
|
||||
def LinearLoad(self, survexblock, path, collatefilename):
|
||||
"""Loads a single survex file. Usually used to import all the survex files which have been collated
|
||||
into a single file. Loads the begin/end blocks using a stack for labels.
|
||||
Uses the python generator idiom to avoid loading the whole file (21MB) into memory.
|
||||
"""
|
||||
blkid = None
|
||||
pathlist = None
|
||||
@ -1192,7 +1265,7 @@ class LoadingSurvex():
|
||||
|
||||
# this is a python generator idiom.
|
||||
# see https://realpython.com/introduction-to-python-generators/
|
||||
# this is the first use of generators in troggle (Oct.2022)
|
||||
# this is the first use of generators in troggle (Oct.2022) and save 21 MB of memory
|
||||
with open(collatefilename, "r") as fcollate:
|
||||
for svxline in fcollate:
|
||||
self.lineno += 1
|
||||
@ -1349,7 +1422,7 @@ class LoadingSurvex():
|
||||
return
|
||||
return
|
||||
try:
|
||||
# python generator idiom again
|
||||
# python generator idiom again. Not important here as these are small files
|
||||
with open(finname, "r") as fin:
|
||||
for svxline in fin:
|
||||
process_line(svxline)
|
||||
@ -1475,7 +1548,6 @@ def FindAndLoadSurvex(survexblockroot):
|
||||
survexfileroot = survexblockroot.survexfile # i.e. SURVEX_TOPNAME only
|
||||
collatefilename = "_" + survexfileroot.path + ".svx"
|
||||
|
||||
|
||||
svx_scan = LoadingSurvex()
|
||||
svx_scan.callcount = 0
|
||||
svx_scan.depthinclude = 0
|
||||
@ -1502,7 +1574,6 @@ def FindAndLoadSurvex(survexblockroot):
|
||||
from pstats import SortKey
|
||||
pr = cProfile.Profile()
|
||||
pr.enable()
|
||||
#print(f"###{survexblockroot=} {survexfileroot.path=}",file=sys.stderr)
|
||||
#----------------------------------------------------------------
|
||||
svx_scan.PushdownStackScan(survexblockroot, survexfileroot.path, finrootname, flinear, fcollate)
|
||||
#----------------------------------------------------------------
|
||||
@ -1524,8 +1595,7 @@ def FindAndLoadSurvex(survexblockroot):
|
||||
|
||||
mem1 = get_process_memory()
|
||||
print(" - MEM:{:7.2f} MB END ".format(mem1),file=sys.stderr)
|
||||
print(" - MEM:{:7.3f} MB ADDITIONALLY USED".format(mem1-mem0),file=sys.stderr)
|
||||
|
||||
print(" - MEM:{:7.3f} MB ADDITIONALLY USED".format(mem1-mem0),file=sys.stderr)
|
||||
#
|
||||
# Process all the omitted files in :loser: with some exceptions
|
||||
#
|
||||
@ -1556,8 +1626,6 @@ def FindAndLoadSurvex(survexblockroot):
|
||||
|
||||
for x in removals:
|
||||
unseens.remove(x)
|
||||
# for x in unseens:
|
||||
# print(f"'{x}', ", end='', file=sys.stderr)
|
||||
print(f"\n - {len(unseens)} survex files found which were not included in main tree. ({len(svx_scan.svxfileslist)} in main tree)", file=sys.stderr)
|
||||
print(f" -- Now loading the previously-omitted survex files.", file=sys.stderr)
|
||||
|
||||
@ -1621,7 +1689,6 @@ def FindAndLoadSurvex(survexblockroot):
|
||||
print(" - MEM:{:7.3f} MB ADDITIONALLY USED".format(mem1-mem0),file=sys.stderr)
|
||||
|
||||
|
||||
|
||||
# Before doing this, it would be good to identify the *equate and *entrance we need that are relevant to the
|
||||
# entrance locations currently loaded after this by LoadPos(), but could better be done before ?
|
||||
# look in MapLocations() for how we find the entrances
|
||||
@ -1635,18 +1702,15 @@ def FindAndLoadSurvex(survexblockroot):
|
||||
|
||||
#pr2 = cProfile.Profile()
|
||||
#pr2.enable()
|
||||
mem1 = get_process_memory()
|
||||
print(f" - MEM:{mem1:7.2f} MB NOT reading '{collatefilename}' into memory.",file=sys.stderr)
|
||||
print(" ", file=sys.stderr,end='')
|
||||
#----------------------------------------------------------------
|
||||
svx_load.LinearLoad(survexblockroot,survexfileroot.path, collatefilename)
|
||||
svx_load.LinearLoad(survexblockroot, survexfileroot.path, collatefilename)
|
||||
#----------------------------------------------------------------
|
||||
#pr2.disable()
|
||||
# with open('LinearLoad.prof', 'w') as f:
|
||||
# ps = pstats.Stats(pr2, stream=f)
|
||||
# ps.sort_stats(SortKey.CUMULATIVE)
|
||||
# ps.print_stats()
|
||||
svxlines = [] # empty 30MB of stashed file
|
||||
mem1 = get_process_memory()
|
||||
print("\n - MEM:{:7.2f} MB STOP".format(mem1),file=sys.stderr)
|
||||
print(" - MEM:{:7.3f} MB ADDITIONALLY USED".format(mem1-mem0),file=sys.stderr)
|
||||
@ -1696,21 +1760,33 @@ def MakeOmitFileRoot(fn):
|
||||
return fileroot
|
||||
|
||||
def LoadSurvexBlocks():
|
||||
mem1 = get_process_memory()
|
||||
print(" - MEM:{:7.2f} MB now ".format(mem1),file=sys.stderr)
|
||||
|
||||
print(' - Flushing All Survex Blocks...')
|
||||
# why does this increase memory use by 20 MB ?!
|
||||
# We have foreign keys, Django needs to load the related objects
|
||||
# in order to resolve how the relation should handle the deletion:
|
||||
# https://docs.djangoproject.com/en/3.2/ref/models/fields/#django.db.models.ForeignKey.on_delete
|
||||
SurvexBlock.objects.all().delete()
|
||||
SurvexFile.objects.all().delete()
|
||||
SurvexDirectory.objects.all().delete()
|
||||
SurvexPersonRole.objects.all().delete()
|
||||
SurvexStation.objects.all().delete()
|
||||
print(" - survex Data Issues flushed")
|
||||
mem1 = get_process_memory()
|
||||
print(" - MEM:{:7.2f} MB now. Foreign key objects loaded on deletion. ".format(mem1),file=sys.stderr)
|
||||
|
||||
print(" - Flushing survex Data Issues ")
|
||||
DataIssue.objects.filter(parser='survex').delete()
|
||||
DataIssue.objects.filter(parser='svxdate').delete()
|
||||
DataIssue.objects.filter(parser='survexleg').delete()
|
||||
DataIssue.objects.filter(parser='survexunits').delete()
|
||||
DataIssue.objects.filter(parser='entrances').delete()
|
||||
DataIssue.objects.filter(parser='xEntrances').delete()
|
||||
|
||||
print(" - survex Data Issues flushed")
|
||||
mem1 = get_process_memory()
|
||||
print(" - MEM:{:7.2f} MB now ".format(mem1),file=sys.stderr)
|
||||
|
||||
survexfileroot = MakeSurvexFileRoot()
|
||||
# this next makes a block_object assciated with a file_object.path = SURVEX_TOPNAME
|
||||
survexblockroot = SurvexBlock(name=ROOTBLOCK, survexpath="", cave=None, survexfile=survexfileroot,
|
||||
|
Loading…
Reference in New Issue
Block a user