diff --git a/parsers/survex.py b/parsers/survex.py index aeaf55c..cb6f2ed 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -168,9 +168,9 @@ def get_people_on_trip(survexblock): return list(set(people)) -# THIS SHOULD NOT BE GLOBAL ! Should be per instance of file loader -trip_person_record = {} # indexed by (survexblock, personexpedition) - so never needs cleaning out -trip_team_cache = {} # indexed by survexblock, so never needs cleaning out +# THIS SHOULD NOT BE GLOBAL ! Should be per instance of file loader, even though they are globally unique +trip_person_record = {} # a dict indexed by tuples (survexblock, personexpedition) = 1 +trip_team_cache = {} # a dict of lists indexed by survexblock def put_person_on_trip(survexblock, personexpedition, tm): """Uses a cache to avoid a database query if it doesn't need to. Only used for a single person""" @@ -197,6 +197,7 @@ def put_person_on_trip(survexblock, personexpedition, tm): if survexblock not in trip_team_cache: trip_team_cache[survexblock] = [] trip_team_cache[survexblock].append(personrole) + print(f"-- trip_team_cache {survexblock}, {trip_team_cache[survexblock]}, {personrole}") trip_person_record[(survexblock, personexpedition)] = 1 return False @@ -228,17 +229,17 @@ def add_to_pending(survexblock, tm): if survexblock not in person_pending_cache: person_pending_cache[survexblock] = set() - person_pending_cache[survexblock].add(tm) + print(f"-- person_pending_cache {survexblock}, {person_pending_cache[survexblock]}, {tm}") def get_team_pending(survexblock): - """A set of *team names before we get to the *date line in a survexblock + """A set of *team names added at the end of the survex block """ global person_pending_cache if survexblock in person_pending_cache: teamnames = person_pending_cache[survexblock] # a set of names - person_pending_cache[survexblock] = () + person_pending_cache[survexblock] = set() #re zero the cache return teamnames return @@ -634,9 +635,9 @@ class LoadingSurvex: # so we can't validate whether the person was on expo or not. # we will have to attach them to the survexblock anyway, and then do a # later check on whether they are valid when we get the date. - - # big changes 24/2/2025 when Olly changed survex behaviour, and unilaterally edited hundreds of - # survex files to match. So troggle has to change to match the revised, stricter syntax. + + # refactor this to collect names before and after a *date, and commit them as + # a bulk update only at the END of the survexblock if not tm: # i.e. null person inthe *team return # ignore: troggle does not need to know. Survex want to though. @@ -863,13 +864,24 @@ class LoadingSurvex: print(self.insp + message) stash_data_issue(parser='survex', message=message, url=None, sb=(survexblock.survexfile.path)) return expo - + def LoadSurvexDate(self, survexblock, line): """We now have a valid date for this survexblock, so we now know the expo it relates to and can use GetPersonExpeditionNameLookup(expo) to check whether the people are correct. - Note that a *team line can come before AND after a *date line""" + Note that a *team line can come before AND after a *date line + + REFACTOR this so that the team setting is independent of the *date line. + Keep the team stored in a pending list, and commit to database only at the end of + the survexblock in a bulk_update. + + This has turned out to be more fiddly than expected and one + attempt has already been aborted. + + caves-1623/161/triassic/henricat.svx has the *date after all the team members + in each survexblock + """ def setdate_on_survexblock(year): """Either *date comes before any *team, in which case there are no prior @@ -887,7 +899,7 @@ class LoadingSurvex: print(self.insp + message) stash_data_issue(parser='survex', message=message, url=None, sb=(survexblock.survexfile.path)) - if teamnames := get_team_pending(survexblock): # WALRUS https://docs.python.org/3/whatsnew/3.8.html#assignment-expressions + if teamnames := get_team_pending(survexblock): for tm in teamnames: if known_foreigner(tm): message = f"- *team {expo.year} '{tm}' known foreigner *date (misordered) {survexblock.survexfile.path} ({survexblock}) in '{line}'" @@ -908,7 +920,7 @@ class LoadingSurvex: ) oline = line - perps = get_people_on_trip(survexblock) # What, you don't know Judge Dredd slang ? + perps = get_people_on_trip(survexblock) # perps used for diagnostic error messages only: they are to blame if len(line) > 10: if line[10] == "-": # ie a range, just look at first date @@ -1771,15 +1783,6 @@ class LoadingSurvex: if edulcni: self.ProcessEdulcniLine(edulcni) - # def get_cave(self, path): - # """Read the file path to a survex file and guesses the cave - # """ - # path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", path) - # if path_match: - # pos_cave = f"{path_match.group(1)}-{path_match.group(2)}" - # cave = getCaveByReference(pos_cave) - # return cave - # return None def LinearLoad(self, survexblock, path, collatefilename): """Loads a single survex file. Usually used to import all the survex files which have been collated