diff --git a/core/models/logbooks.py b/core/models/logbooks.py index 4d35d6e..d7b164e 100644 --- a/core/models/logbooks.py +++ b/core/models/logbooks.py @@ -260,7 +260,15 @@ class QM(TroggleModel): ) grade = models.CharField(max_length=1, blank=True, null=True, help_text="A/B/C/D/X") cave = models.ForeignKey("Cave", related_name="QMs", blank=True, null=True, on_delete=models.SET_NULL) - block = models.ForeignKey("SurvexBlock", null=True, on_delete=models.SET_NULL) # only for QMs from survex files + + # only for QMs from survex files + block = models.ForeignKey( + "SurvexBlock", + to_field="_blockid", # Explicitly point to the UUID field + null=True, + on_delete=models.SET_NULL + ) + # block = models.ForeignKey("SurvexBlock", null=True, on_delete=models.SET_NULL) blockname = models.TextField(blank=True, null=True) # NB truncated copy of survexblock name with last char added expoyear = models.CharField(max_length=4, blank=True, null=True) ticked = models.BooleanField(default=False) diff --git a/core/models/survex.py b/core/models/survex.py index 4ea8c98..ee99a37 100644 --- a/core/models/survex.py +++ b/core/models/survex.py @@ -15,7 +15,6 @@ from troggle.core.utils import height_from_utm, throw class SurvexFile(models.Model): path = models.CharField(max_length=200) - #survexdirectory = models.ForeignKey("SurvexDirectory", blank=True, null=True, on_delete=models.SET_NULL) primary = models.ForeignKey( "SurvexFile", related_name="primarysurvex", blank=True, null=True, on_delete=models.SET_NULL ) @@ -222,7 +221,8 @@ class SurvexBlock(models.Model): # This ID is generated as soon as you call SurvexBlock((). So we can use it while assembling the data # into the survexblock without having to keep doing a database transaction _blockid = models.UUIDField( - primary_key=False, + primary_key=True, + unique=True, default=uuid.uuid4, editable=False ) @@ -249,10 +249,10 @@ class SurvexBlock(models.Model): foreigners = models.BooleanField(default=False) class Meta: - ordering = ("id",) + ordering = ("_blockid",) def __str__(self): - return self.name and str(self.name) or "no_name-#" + str(self.id) + return self.name and str(self.name) or "no_name-#" + str(self.pk) #pk is primary key def isSurvexBlock(self): # Function used in templates return True diff --git a/parsers/survex.py b/parsers/survex.py index f035f32..cec97ac 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -6,6 +6,7 @@ import re import subprocess import sys import time +from collections import OrderedDict from datetime import date, datetime, timezone from pathlib import Path @@ -57,6 +58,7 @@ survexblockroot = None ROOTBLOCK = "rootblock" METRESINFEET = 3.28084 UNSEENS = "_unseens.svx" +BATCH_SIZE = 900 # limit for terms in SQL expressions for sqlite IGNOREFILES = ["dummy_file", "_dummy_file"] IGNOREPREFIX = ["surface", "kataster", "gpx", "deprecated"] #"fixedpts", @@ -135,35 +137,42 @@ def stash_data_issue(parser=None, message=None, url=None, sb=None): """Avoid hitting the database for error messages until the end of the import use a set, we do not want identically duplicate issues + BUT we have to use the sb UUID not the sb object itself """ global dataissues - # try: - # if sb: - # url2 = get_offending_filename(sb.survexfile.path) - # except Exception as e: - # print(f" ! stash_data_issue() '{e}' '{sb=}' -- '{url=}'", file=sys.stderr) - # raise - dataissues.add((parser, message, url, sb)) + + if sb: + dataissues.add((parser, message, url, sb._blockid)) + else: + dataissues.add((parser, message, url, None)) -def store_data_issues(): +def store_data_issues(loadex = None): """Take the stash and store it permanently in the database instead - use BULK creation here !""" + use BULK creation here ! + + chnage to using Class not global stash + """ global dataissues print(f" - Storing {len(dataissues)} Data Issues into database") # make a list of objects, but don't commit to database yet di_list = [] for issue in dataissues: - parser, message, url, sb = issue - if url is None: - if sb is not None: - try: - url = get_offending_filename(sb.survexfile.path) - except Exception as e: - print(f" ! store_data_issues() '{e}' '{sb=}' -- '{url=}'", file=sys.stderr) - url = get_offending_filename(sb) # assumed to be text - di_list.append(DataIssue(parser=parser, message=message, url=url)) + if not loadex: + parser, message, url, _ = issue + else: + parser, message, url, blkid = issue + if blkid: + sb = loadex._pending_block_saves[blkid] + if url is None: + if sb is not None: + try: + url = get_offending_filename(sb.survexfile.path) + except Exception as e: + print(f" ! store_data_issues() '{e}' '{sb=}' -- '{url=}'", file=sys.stderr) + url = get_offending_filename(sb) # assumed to be text + di_list.append(DataIssue(parser=parser, message=message, url=url)) # Now commit to db DataIssue.objects.bulk_create(di_list) dataissues = set() @@ -212,39 +221,6 @@ def get_people_on_trip(survexblock): return list(set(people)) -# THIS SHOULD NOT BE GLOBAL ! Should be per instance of file loader, even though they are globally unique -trip_person_record = {} # a dict indexed by tuples (survexblock, personexpedition) = 1 -trip_team_cache = {} # a dict of lists indexed by survexblock._blockid -def put_person_on_trip(survexblock, personexpedition, tm): - """Uses a cache to avoid a database query if it doesn't need to. - Only used for a single person""" - global trip_person_record - global trip_team_cache - - if (survexblock._blockid, personexpedition) in trip_person_record: - return True - - try: - personrole = SurvexPersonRole( # does not commit to db yet - survexblock=survexblock, - person = personexpedition.person, - personexpedition=personexpedition, - personname=tm - ) - except: - message = f"! *team '{tm}' FAIL, already created {survexblock.survexfile.path} ({survexblock}) " - print(self.insp + message) - stash_data_issue( - parser="survex", message=message, url=None, sb=(survexblock.survexfile.path) - ) - - if survexblock._blockid not in trip_team_cache: - trip_team_cache[survexblock._blockid] = [] - trip_team_cache[survexblock._blockid].append(personrole) - # print(f"-- trip_team_cache\n -- {survexblock=} - {survexblock._blockid}\n -- {trip_team_cache[survexblock._blockid]}\n -- {personrole}", file=sys.stderr) - - trip_person_record[(survexblock._blockid, personexpedition)] = 1 - return False def hack_save(survexblock): # #### Horrible hack to be properly written as a cache @@ -482,6 +458,7 @@ class LoadingSurvex: pending = [] adhocload = False person_pending_cache = {} # indexed per survexblock UUID, so robust wrt PUSH/POP begin/end + _pending_block_saves = OrderedDict() # not {}, retain topological sort order def __init__(self): @@ -511,60 +488,313 @@ class LoadingSurvex: parser="survex", message=message, url=None, sb=(survexblock.survexfile.path) ) - def confirm_team_on_trip(self, survexblock): - """This is only called when processing a *end statement + + + def save_survexblocks_to_db(self): + """This saves the in-memory python objects into the database, at which point + the foreign keys are enabled and one can do queries on the database. + + The sequence of survex blocks is constructed from the *include links, + depth first, so the list iof survex blocks is topologically sorted. + HOWEVER what matters to the .parent links is the topological sorting + of the *begin/*end inclusions, which may or may not match the *include sort + sequence. Yuk. """ - global trip_team_cache - if survexblock._blockid not in trip_team_cache: - return - #### STRIP THIS OUT and cache the SurvexPersonRole for the end of the survex block ! - hack_save(survexblock) + def get_toposorted_blocks(blocks): + """This is a depth-first recursive topological sort that ensures that when a survexblock + has a parent, that parent always appears earlier in the list. + """ + + # 1. Map IDs to objects for quick lookup + id_map = {b._blockid: b for b in blocks} + topo_sorted_list = [] + visited = set() + + def visit(block): + # If we've already added this ID or it's None, skip + if block is None or block._blockid in visited: + return + + # 2. Get the parent object + # If .parent is an object, we use its ID. + # If .parent is already an ID, we use it directly. + parent_val = block.parent + + # This line of code is "safety net." It ensures that no matter how + # the parent data is stored, we always end up with a UUID string + # rather than a Python object. + # getattr(object, 'attribute_name', default_value). + parent_id = getattr(parent_val, '_blockid', parent_val) + + # 3. Recursive step: Visit the parent first + if parent_id in id_map: + visit(id_map[parent_id]) + + # 4. Add current block to results + visited.add(block._blockid) + topo_sorted_list.append(block) + + for b in blocks: + visit(b) + + return topo_sorted_list + + def get_generational_chunks(sorted_blocks): + """ + Splits a topologically sorted list into chunks where no child + exists in the same chunk as its parent. + """ + chunks = [] + # Track which IDs are already "saved" (in a previous chunk) + saved_ids = set() + + # current_batch will hold blocks for the current "generation" + current_batch = [] + + for block in sorted_blocks: + parent_id = getattr(block.parent, '_blockid', block.parent) + + # If the parent is not yet 'saved', this block MUST + # go into a future batch. + if parent_id and parent_id not in saved_ids: + # Finish the current chunk and start a new one + if current_batch: + chunks.append(current_batch) + # Mark everything in the finished batch as 'saved' + saved_ids.update(b._blockid for b in current_batch) + current_batch = [] + + current_batch.append(block) + + # Safety: Even if there are no dependencies, respect the BATCH_SIZE + if len(current_batch) >= BATCH_SIZE: + chunks.append(current_batch) + saved_ids.update(b._blockid for b in current_batch) + current_batch = [] + + # Add the final trailing batch + if current_batch: + chunks.append(current_batch) + + return chunks + def get_generational_chunks_optimized(blocks): + """ + Splits a topologically sorted list into chunks where no child + exists in the same chunk as its parent. + Optimized for a shallow tree. + """ + + # 1. Map IDs to objects for quick lookup + id_map = {b._blockid: b for b in blocks} + + # 2. Dictionary to store the level (depth) of each block + # Level 0 = Root, Level 1 = Child of Root, etc. + levels = {} + + def get_level(block): + if block._blockid in levels: + return levels[block._blockid] + + parent_id = getattr(block.parent, '_blockid', block.parent) + + # If no parent OR parent is not in our current batch, it's a Root (Level 0) + if not parent_id or parent_id not in id_map: + levels[block._blockid] = 0 + return 0 + + # Otherwise, level is Parent's Level + 1 + level = get_level(id_map[parent_id]) + 1 + levels[block._blockid] = level + return level + + # Calculate levels for everyone + for b in blocks: + get_level(b) + + # 3. Group blocks by their level + from collections import defaultdict + generational_groups = defaultdict(list) + for b in blocks: + generational_groups[levels[b._blockid]].append(b) + + # 4. Final step: Split each level into batches of 900 + final_chunks = [] + for level in sorted(generational_groups.keys()): + level_blocks = generational_groups[level] + # Standard list slicing to split into BATCH_SIZE + for i in range(0, len(level_blocks), BATCH_SIZE): + final_chunks.append(level_blocks[i:i + BATCH_SIZE]) + + return final_chunks + + # construct the list. + already_saved_blocks = set(SurvexBlock.objects.values_list('_blockid', flat=True)) + blocks = [] + for blockid in self._pending_block_saves: + blocks.append(self._pending_block_saves[blockid]) + if blocks: + # valid_blocks = [] + # bad_parents = 0 + # for block in blocks: + # try: + # if block.parent: + # if block.parent not in already_saved_blocks: + # bad_parents += 1 + # # print(f" Invalid parent id: {block.survexfile}::{block} -> {block.parent}", file=sys.stderr) + # # block.full_clean() + # valid_blocks.append(block) + # except ValidationError as e: + # print(f" ! Block {block} is invalid: {e}", file=sys.stderr) + # print(f" ! Block {block} is invalid: {e}") + # print(f"\n !! {bad_parents} as-yet invalid parent ids out of {len(blocks)} blocks. {len(valid_blocks)} valid blocks", file=sys.stderr) + + topo_list = get_toposorted_blocks(blocks) + print(f"\n !! {len(topo_list)=} blocks. {len(blocks)=}", file=sys.stderr) + + safe_chunks = get_generational_chunks_optimized(topo_list) + - # Now commit to db - pr_list = trip_team_cache[survexblock._blockid] - # print(f" PR_LIST {pr_list} {survexblock._blockid }", file=sys.stderr) - valid_list = [] - for pr in pr_list: try: - # print(f"___ {pr.survexblock=} {pr.survexblock.id=} {pr.person=} {pr.personexpedition=}", file=sys.stderr) - pr.full_clean() - valid_list.append(pr) - except ValidationError as e: - print(f" ! PR is invalid: {e} {survexblock} {pr}", file=sys.stderr) - print(f" ! PR is invalid: {e} {survexblock} {pr}") + for i, chunk in enumerate(safe_chunks): + print(f"Saving Chunk {i+1} ({len(chunk)} blocks)...", file=sys.stderr) + SurvexBlock.objects.bulk_create( + chunk, + update_conflicts=True, # root item probably exists already + # update_fields needed if we allow conflict update + update_fields=['name', 'title', 'parent', 'date', + 'expedition', 'survexfile', 'scanswallet', 'legsall', 'legslength', 'foreigners',], + unique_fields=['_blockid'] + ) + print("Success: Entire tree saved.", file=sys.stderr) + except Exception as e: + print(f"Failed at chunk {i+1}: {e}", file=sys.stderr) + + return - - SurvexPersonRole.objects.bulk_create(valid_list) - # for pr in pr_list: - # print(f"+++ {pr.survexblock=} {pr.survexblock.id=} {pr.person=} {pr.personexpedition=}", file=sys.stderr) - # SurvexPersonRole.objects.create(pr).save() + try: + for i in range(0, len(blocks), 1): + valid_blocks[i].save() + except Exception as e: + print(f" !! Error in SINGLE create for survexblocks at {i}: {e}", file=sys.stderr) + return + # Nope, even topo-sorted, we can't know what a batch size is suitable + # without some of the ietms being invalid + try: + for i in range(0, len(topo_list), BATCH_SIZE): + SurvexBlock.objects.bulk_create(topo_list[i:i+BATCH_SIZE]) + except Exception as e: + print(f" !! Error in bulk_create for survexblocks at {i}: {e}", file=sys.stderr) + + trip_team_cache = {} # a dict of lists indexed by survexblock._blockid + def put_personrole_on_trip(self, survexblock, personexpedition, tm): + """ + Only used for a single person. + Creates a SurvexPersonRole object, but this is not committed to the database until + all the survexblocks have been saved. + """ + + try: + personrole = SurvexPersonRole( # does not commit to db yet + survexblock=survexblock, # survexblock has no _id yet + person = personexpedition.person, + personexpedition=personexpedition, + personname=tm + ) + except: + message = f"! *team '{tm}' FAIL, already created {survexblock.survexfile.path} ({survexblock}) " + print(self.insp + message) + stash_data_issue( + parser="survex", message=message, url=None, sb=(survexblock.survexfile.path) + ) + + if survexblock._blockid not in self.trip_team_cache: + self.trip_team_cache[survexblock._blockid] = [] + self.trip_team_cache[survexblock._blockid].append(personrole) + # print(f"-- trip_team_cache\n -- {survexblock=} - {survexblock._blockid}\n -- {trip_team_cache[survexblock._blockid]}\n -- {personrole}", file=sys.stderr) + + return False + + def process_pending_team(self, survexblock): + """This is only called when processing a *end statement - # Not working, so do not clear cache! - trip_team_cache[survexblock] = [] # in database now, so empty cache - - def check_team_cache(self, label=None): - global trip_team_cache - message = f"! check_team_cache() called.. " - print(message) - print(message, file=sys.stderr) - for block in trip_team_cache: - message = f"! *team CACHEFAIL, trip_team_cache {block.survexfile.path} ({block}). label:{label}" - print(message) - print(message, file=sys.stderr) + It converts a list of names as strings into a list of valid + PersonExpedition objects for the current expo. + SurvexPersonRoles + """ + + # Many survex blocks have no *team members at all + if not self.flush_persons_pending(survexblock._blockid): + return + + if not (expo := self.get_expo_for_block(survexblock)): + print(f" Buggeration fAIL {survexblock=}",file=sys.stderr) + return + + # Sanitise the set of names, and validate as valid people + if teamnames := self.flush_persons_pending(survexblock._blockid): + for tm in teamnames: + if known_foreigner(tm): + message = f"- *team '{tm}' known foreigner {survexblock.survexfile.path} ({survexblock})" + print(self.insp + message) + # stash_data_issue(parser='survex', message=message, url=None, sb=survexblock) + else: + pe = GetPersonExpeditionNameLookup(expo).get(tm.lower()) + if pe: + self.put_personrole_on_trip(survexblock, pe, tm) + + else: + message = f"! *team '{tm}' FAIL personexpedition {survexblock.survexfile.path} ({survexblock}) " + print(self.insp + message) + stash_data_issue( + parser="survex", + message=message, + url=None, sb=survexblock, + ) + + def save_personroles_to_db(self): + """This should be run only after all the survexblocks have + been saved to the database and so have _id that can be used as a ForeignKey + """ + for blk in self.trip_team_cache: + # hack_save(survexblock) + + # Now commit to db + pr_list = self.trip_team_cache[blk] + # print(f" PR_LIST {pr_list} {blk}", file=sys.stderr) + valid_list = [] + for pr in pr_list: + try: + # print(f"___ {pr.survexblock=} {pr.survexblock.id=} {pr.person=} {pr.personexpedition=}", file=sys.stderr) + pr.full_clean() + valid_list.append(pr) + except ValidationError as e: + print(f" ! PR is invalid: {e} {survexblock} {pr}", file=sys.stderr) + print(f" ! PR is invalid: {e} {survexblock} {pr}") + + + SurvexPersonRole.objects.bulk_create(valid_list) + # for pr in pr_list: + # print(f"+++ {pr.survexblock=} {pr.survexblock.id=} {pr.person=} {pr.personexpedition=}", file=sys.stderr) + # SurvexPersonRole.objects.create(pr).save() + + trip_team_cache = {} # in database now, so empty cache + def add_to_pending(self, survexblock, tm): - """Collects team names before we have a date so cannot validate against - expo attendance yet""" - global person_pending_cache + """Collects team names. We might not have a date so cannot validate + against expo attendance yet + """ if survexblock._blockid not in self.person_pending_cache: self.person_pending_cache[survexblock._blockid] = set() - self.person_pending_cache[survexblock._blockid].add(tm) - print(f"-- person_pending_cache {survexblock}, {self.person_pending_cache[survexblock._blockid]}, {tm}") + if tm not in self.person_pending_cache[survexblock._blockid]: + self.person_pending_cache[survexblock._blockid].add(tm) + # print(f"-- person_pending_cache '{survexblock}' {self.person_pending_cache[survexblock._blockid]} (added {tm})") - def get_team_pending(self, blockid): - """A set of *team names added at the end of the survex block + def flush_persons_pending(self, blockid): + """A set of *team names added at the end of the survex block. + Zeros the pending cache as it returns the (unvalidated) names. """ if blockid in self.person_pending_cache: teamnames = self.person_pending_cache[blockid] # a set of names @@ -583,11 +813,10 @@ class LoadingSurvex: def get_team_inherited(self, survexblock): # survexblock only used for debug mesgs """See get_team_pending(survexblock._blockid) which gets called at the same time, when we see a *date line""" - global person_pending_cache if self.inheritteam: message = ( - f"- no *team INHERITING ({survexblock.parent})>({survexblock}) {survexblock.survexfile.path} '{self.inheritteam}'" + f"- no *team on blcok so INHERITING ({survexblock.parent})>({survexblock}) {survexblock.survexfile.path} '{self.inheritteam}'" ) print(self.insp + message) # stash_data_issue( @@ -674,6 +903,17 @@ class LoadingSurvex: # expoyear = "1976" return + def get_expo_for_block(self, survexblock): + if expo := survexblock.expedition: # may be None if no *date yet + return expo + if survexblock.date: + expo = Expedition.objects.get(year=str(survexblock.date)[:4]) + return expo + if expo := survexblock.parent.expedition: # immediate parent works mostly + print(f" WARNING using parent block expo year {survexblock=}",file=sys.stderr) + return expo + return False + def fix_anonymous(self, survexblock): """Called when we reach *end of a block Checks to see if the block has no team attached, in which case it uses the @@ -689,24 +929,14 @@ class LoadingSurvex: if survexblock.parent.name == "troggle_unseens": # Bolluxed up if we try to inherit from this random junk, so don't. return - - expo = survexblock.expedition # may be None if no *date yet - if not expo: - expo = survexblock.parent.expedition # immediate parent works mostly - if not expo: - return if not self.currentteam: # i.e. if it is a dated block and has no team if teamnames := self.get_team_inherited(survexblock):# WALRUS - for tm in teamnames: - personexpedition = GetPersonExpeditionNameLookup(expo).get(tm.lower()) - if personexpedition: - put_person_on_trip(survexblock, personexpedition, tm) + self.person_pending_cache[survexblock._blockid] = teamnames return def cache_survexblock(self, survexblock): - # appends to list, creates an empty list to append to if it doen't exist yet - self._pending_block_saves.setdefault(survexblock._blockid, []).append(survexblock) + self._pending_block_saves[survexblock._blockid] = survexblock def LoadSurvexTeam(self, survexblock, line): """Interpeting the *team fields has been updated to current 2025 survex standard, @@ -731,41 +961,13 @@ class LoadingSurvex: # so we can't validate whether the person was on expo or not. # we will have to attach them to the survexblock anyway, and then do a # later check on whether they are valid when we get the date. - - # refactor this to collect names before and after a *date, and commit them as - # a bulk update only at the END of the survexblock - if not tm: # i.e. null person inthe *team + if not tm: # i.e. null person in the *team return # ignore: troggle does not need to know. Survex want to though. self.currentteam.add(tm) # used in push/pop block code - expo = survexblock.expedition # may be None if no *date yet + self.add_to_pending(survexblock, tm) - if expo: - personexpedition = GetPersonExpeditionNameLookup(expo).get(tm.lower()) - if personexpedition: - put_person_on_trip(survexblock, personexpedition, tm) - - elif known_foreigner(tm): # note, not using .lower() - message = f"- *team {expo.year} '{tm}' known foreigner on *team {survexblock.survexfile.path} ({survexblock}) in '{line=}'" - print(self.insp + message) - # stash_data_issue(parser='survex', message=message, url=None, sb=survexblock) - else: - # we know the date and expo, but can't find the person - message = f"! *team {expo.year} '{tm}' FAIL personexpedition lookup on *team {survexblock.survexfile.path} ({survexblock}) in '{line=}' {tm=}" - print(self.insp + message) - stash_data_issue( - parser="survex", message=message, url=None, sb=survexblock - ) - else: - self.add_to_pending(survexblock, tm) - # don't know the date yet, so cannot query the table about validity. - # assume the person is valid. It will get picked up with the *date appears - # There are hundreds of these.. - message = ( - f"- Team before Date: {line} ({survexblock}) {survexblock.survexfile.path}" - ) - # teamfix = r"(?i)(.*?)\s+" + roles + r"?(?:es|s)?$" -- (.*?) means a non-greedy capture if fixstyle := self.rx_teamfix.match(line): # matches the optional role at the the end of the string WALRUS tmlist = fixstyle.group(1).strip('\"') # remove quotes, if present @@ -973,27 +1175,7 @@ class LoadingSurvex: stash_data_issue(parser='survex', message=message, url=None, sb=survexblock) return expo - def process_pending_team(self, survexblock): - expo = survexblock.expedition - if teamnames := self.get_team_pending(survexblock._blockid): - for tm in teamnames: - if known_foreigner(tm): - message = f"- *team {expo.year} '{tm}' known foreigner *date (misordered) {survexblock.survexfile.path} ({survexblock})" - print(self.insp + message) - # stash_data_issue(parser='survex', message=message, url=None, sb=survexblock) - else: - pe = GetPersonExpeditionNameLookup(expo).get(tm.lower()) - if pe: - put_person_on_trip(survexblock, pe, tm) - - else: - message = f"! *team {expo.year} '{tm}' FAIL personexpedition lookup on *date {survexblock.survexfile.path} ({survexblock}) " - print(self.insp + message) - stash_data_issue( - parser="survex", - message=message, - url=None, sb=survexblock, - ) + def LoadSurvexDate(self, survexblock, line): """We now have a valid date for this survexblock, so we now know the expo @@ -1027,12 +1209,7 @@ class LoadingSurvex: if len(team) > 0: message = f"! *team {expo.year} Multiple *date in one block? Already someone on team when *date seen. {survexblock.survexfile.path} ({survexblock}) in '{line}'" print(self.insp + message) - stash_data_issue(parser='survex', message=message, url=None, sb=survexblock) - - #self.process_pending_team(survexblock) - - - + stash_data_issue(parser='survex', message=message, url=None, sb=survexblock) oline = line perps = get_people_on_trip(survexblock) # perps used for diagnostic error messages only: they are to blame @@ -1665,7 +1842,7 @@ class LoadingSurvex: self.currentsurvexfile = newfile return # abort as everything already done for object creation - newfile.save() # until we do this there is no internal id so no foreign key works + newfile.save() # until we do this there is no internal .id so no foreign key works self.currentsurvexfile = newfile newfile.primary = self.set_primary(headpath) @@ -1924,7 +2101,7 @@ class LoadingSurvex: nlegstotal = 0 self.relativefilename = path - self._pending_block_saves = {} # Cache for survex blocks to save at the end + # self._pending_block_saves = {} # Cache for survex blocks to save at the end #self.IdentifyCave(path, svxid, depth) # this will produce null for survex files which are geographic collections self.currentsurvexfile = survexblock.survexfile @@ -2024,7 +2201,6 @@ class LoadingSurvex: def starstatement(star, fullline): import time - # ...existing code... """Interprets a survex comamnd where * is the first character on the line, e.g. *begin""" nonlocal survexblock nonlocal blk_name @@ -2075,8 +2251,9 @@ class LoadingSurvex: newsurvexblock.title = ( "(" + survexblock.title + ")" ) # copy parent inititally, overwrite if it has its own + self.cache_survexblock(newsurvexblock) # note for later saving in db survexblock = newsurvexblock - survexblock.save() # Only save once, after all fields are set, or try to delay until *end using caches + # Only save to db once, after all fields are set tickle() # ---------------------------END @@ -2090,10 +2267,8 @@ class LoadingSurvex: self.fix_undated(survexblock) self.fix_anonymous(survexblock) - self.confirm_team_on_trip(survexblock) self.process_pending_team(survexblock) - self.cache_survexblock(survexblock) - # POP state ++++++++++++++ + # POP state ++++++++++++++ popblock() self.inheritteam = self.teaminheritstack.pop() self.currentteam = self.teamcurrentstack.pop() @@ -2200,24 +2375,9 @@ class LoadingSurvex: # At the end of the whole (concatenated) file, save all cached survexblocks using bulk_update - blocks = [] - for blockid in self._pending_block_saves: - blocks.append(self._pending_block_saves[blockid]) - if blocks: - # valid_blocks = [] - # for block in blocks: - # try: - # block.full_clean() - # valid_blocks.append(block) - # except ValidationError as e: - # print(f" ! Block {block} is invalid: {e}", file=sys.stderr) - # print(f" ! Block {block} is invalid: {e}") - try: - BATCH_SIZE = 900 - for i in range(0, len(blocks), BATCH_SIZE): - SurvexBlock.objects.bulk_update(blocks[i:i+BATCH_SIZE], ["legsall", "legslength", "parent"]) - except Exception as e: - print(f"\n !! Error in bulk_update for survexblocks: {e}", file=sys.stderr) + + self.save_survexblocks_to_db() + self.save_personroles_to_db() def PushdownStackScan(self, survexblock, path, finname, flinear, io_collate): """Follows the *include links in all the survex files from the root file (usually 1623.svx) @@ -2604,6 +2764,7 @@ def FindAndLoadSurvex(): io_collate.write(f";*edulcni {survexfileroot.path}\n") svx_scan.check_cache_clean() + store_data_issues(svx_scan) mem1 = get_process_memory() flinear.write(f"\n - MEM:{mem1:.2f} MB STOP {survexfileroot.path}\n") @@ -2721,6 +2882,7 @@ def FindAndLoadSurvex(): io_collate.write(f";*edulcni {UNSEENS}\n") omit_scan.check_cache_clean() + store_data_issues(omit_scan) mem1 = get_process_memory() flinear.write(f"\n - MEM:{mem1:.2f} MB STOP {UNSEENS} Unseen Oddments\n") @@ -3010,6 +3172,7 @@ def parse_one_file(fpath): # --------------------------------------in progress-- if len(sbs)<1: print(f" ! No survex blocks found. Parser failure...") for sb in sbs: + print(f" - {sb.id} re-setting survex block parent {sb=}", file=sys.stderr) print(f" - {sb.id} re-setting survex block parent {sb=}") sb.parent = existingparent # should be all the same sb.save() @@ -3227,7 +3390,7 @@ def LoadSurvexBlocks(): # duration = time.time() - start # print(f" - TIME: {duration:7.2f} s", file=sys.stderr) - store_data_issues() + # duration = time.time() - start # print(f" - TIME: {duration:7.2f} s", file=sys.stderr) if dup_includes > 0: