diff --git a/parsers/survex.py b/parsers/survex.py index b55485a..d977296 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -67,12 +67,16 @@ def store_data_issues(): global dataissues print(f" - Storing {len(dataissues)} Data Issues into database") + # make a list of objects, but don't commit to database yet + di_list = [] for issue in dataissues: parser, message, url, sb = issue if url is None: if sb is not None: url = get_offending_filename(sb) - DataIssue.objects.create(parser=parser, message=message, url=url) + di_list.append(DataIssue(parser=parser, message=message, url=url)) + # Now commit to db + DataIssue.objects.bulk_create(di_list) dataissues = [] # in database now, so empty cache def get_offending_filename(path): @@ -105,17 +109,19 @@ def get_people_on_trip(survexblock): return list(set(people)) -trip_person_cache = {} # per survexblock, so robust wrt PUSH/POP begin/end +trip_person_record = {} # per survexblock, so robust wrt PUSH/POP begin/end +trip_team_cache = {} # per survexblock, so robust wrt PUSH/POP begin/end def put_person_on_trip(survexblock, personexpedition, tm): """Uses a cache to avoid a database query if it doesn't need to. Only used for a single person""" - global trip_person_cache + global trip_person_record + global trip_team_cache - if (survexblock, personexpedition) in trip_person_cache: + if (survexblock, personexpedition) in trip_person_record: return True try: - personrole = SurvexPersonRole.objects.create( + personrole = SurvexPersonRole( # does not commit to db yet survexblock=survexblock, person = personexpedition.person, personexpedition=personexpedition, @@ -128,9 +134,33 @@ def put_person_on_trip(survexblock, personexpedition, tm): parser="survex", message=message, url=None, sb=(survexblock.survexfile.path) ) - trip_person_cache[(survexblock, personexpedition)] = 1 + if survexblock not in trip_team_cache: + trip_team_cache[survexblock] = [] + trip_team_cache[survexblock].append(personrole) + + trip_person_record[(survexblock, personexpedition)] = 1 return False +def confirm_team_on_trip(survexblock): + global trip_team_cache + + if survexblock not in trip_team_cache: + return + # Now commit to db + SurvexPersonRole.objects.bulk_create(trip_team_cache[survexblock]) + trip_team_cache[survexblock] = [] # in database now, so empty cache + +def check_team_cache(): + global trip_team_cache + + message = f"! *team CACHEFAIL check_team_cache() called " + print(message) + + for block in trip_team_cache: + message = f"! *team CACHEFAIL, already created {block.survexfile.path} ({block}) " + print(message) + + person_pending_cache = {} # per survexblock, so robust wrt PUSH/POP begin/end def add_to_pending(survexblock, tm): """Collects team names before we have a date so cannot validate against @@ -493,7 +523,7 @@ class LoadingSurvex: """Either *date comes before any *team, in which case there are no prior PersonRoles attached, or *team came before this *date, in which case the names are only in 'pending'""" - global trip_person_cache + global trip_person_record expo = self.get_expo_from_year(year) survexblock.expedition = expo @@ -529,7 +559,7 @@ class LoadingSurvex: if len(line) > 10: message = "! DATE Warning LONG DATE '{}' ({}) {}".format(oline, survexblock, survexblock.survexfile.path) print(self.insp+message) - stash_data_issue(parser='survex', message=message, url=None, sb=(survexblock.survexfile.path)) + stash_data_issue(parser='svxdate', message=message, url=None, sb=(survexblock.survexfile.path)) if line[10] == "-": # ie a range, just look at first date line = line[0:10] if len(line) == 10: @@ -1412,7 +1442,6 @@ class LoadingSurvex: nlegstotal += self.legsnumber self.fix_anonymous(survexblock) - try: survexblock.parent.save() # django insists on this although it is already saved !? except: @@ -1423,6 +1452,7 @@ class LoadingSurvex: except: print(survexblock, file=sys.stderr) raise + confirm_team_on_trip(survexblock) # POP state ++++++++++++++ popblock() self.inheritteam = self.teaminheritstack.pop() @@ -1875,6 +1905,7 @@ def FindAndLoadSurvex(survexblockroot): f"\n - {len(unseens)} survex files found which were not included in main tree. ({len(svx_scan.svxfileslist)} in main tree)", file=sys.stderr, ) + check_team_cache() print(" -- Now loading the previously-omitted survex files.", file=sys.stderr) with open(Path(settings.SURVEX_DATA, "_unseens.svx"), "w") as u: @@ -1921,6 +1952,9 @@ def FindAndLoadSurvex(survexblockroot): flinear.write(f"{omit_scan.depthinclude:2} {indent} *edulcni _unseens\n") fcollate.write(";*edulcni _unseens.svx\n") + + check_team_cache() + mem1 = get_process_memory() flinear.write(f"\n - MEM:{mem1:.2f} MB STOP _unseens.svx OMIT\n") flinear.write(f" - MEM:{mem1 - mem0:.3f} MB ADDITIONALLY USED OMIT\n")