From bbaf52b8b77f62432264f66014192df4e762db97 Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Fri, 30 Jan 2026 23:32:09 +0000 Subject: [PATCH] tidied diagnostics --- parsers/survex.py | 300 +++++++++++++++++++++++++--------------------- 1 file changed, 164 insertions(+), 136 deletions(-) diff --git a/parsers/survex.py b/parsers/survex.py index c36bc8a..bf8c5a2 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -576,6 +576,48 @@ class LoadingSurvex: return final_chunks + def sb_save_sqlite(): + try: + SurvexBlock.objects.bulk_create( + chunk, + update_conflicts=True, # root item probably exists already + # update_fields needed if we allow conflict update + update_fields=['name', 'title', 'parent', 'date', + 'expedition', 'survexfile', 'scanswallet', 'legsall', 'legslength', 'foreigners',], + unique_fields=['_blockid'] + ) + print(f" - {len(chunk)} SurvexBlocks bulk-created in db", file=sys.stderr) + except Exception as e: + + message = f"\n ! - EXCEPTION '{e}' - in SB bulk update. Falling back onto sequential updates for this chunk" + print(message) + print(message, file=sys.stderr) + stash_data_issue(parser="survex", message=message) + + sb_save_mysql() + + + def sb_save_mysql(): + nc = 0 + ns = 0 + for sb in chunk: + got_obj, created = SurvexBlock.objects.get_or_create( + _blockid=sb._blockid, + ) + if created: + nc += 1 + else: + ns += 1 + # update the block if it changed + got_obj._blockid = sb._blockid + got_obj.save() + if (nc + ns == len(chunk)): + result = "OK" + else: + result = "OK" + print(f" - {ns}/{nc} SurvexBlocks saved/created to db {result}", file=sys.stderr) + + # construct the list. #already_saved_blocks = set(SurvexBlock.objects.values_list('_blockid', flat=True)) blocks = [] @@ -588,37 +630,11 @@ class LoadingSurvex: safe_chunks = get_generational_chunks_optimized(topo_list) try: for i, chunk in enumerate(safe_chunks): - print(f" - Saving chunk {i+1} ({len(chunk)} blocks)...", file=sys.stderr) - try: - SurvexBlock.objects.bulk_create( - chunk, - update_conflicts=True, # root item probably exists already - # update_fields needed if we allow conflict update - update_fields=['name', 'title', 'parent', 'date', - 'expedition', 'survexfile', 'scanswallet', 'legsall', 'legslength', 'foreigners',], - unique_fields=['_blockid'] - ) - except Exception as e: - - message = f"\n ! - EXCEPTION '{e}' - in SB bulk update. Falling back onto sequential updates for this chunk" - print(message) - print(message, file=sys.stderr) - stash_data_issue(parser="survex", message=message) - - n = 0 - for sb in chunk: - got_obj, created = SurvexBlock.objects.get_or_create( - _blockid=sb._blockid, - ) - if created: - n += 1 - #print(f" - {sb} Created", file=sys.stderr) - else: - # update the block if it changed - got_obj._blockid = sb._blockid - got_obj.save() - # print(f" - {sb} SAVED", file=sys.stderr) - print(f" - {len(chunk)} SBs saved to db, {n} created", file=sys.stderr) + print(f" - Saving chunk {i+1} ({len(chunk)} blocks)... ({connection.vendor})", file=sys.stderr) + if connection.vendor == 'mysql': + sb_save_mysql() + else: + sb_save_sqlite() print(" - Success: Entire tree of survexblocks saved to db.", file=sys.stderr) @@ -626,92 +642,51 @@ class LoadingSurvex: print(f"Failed at chunk {i+1}: {e}", file=sys.stderr) return - - def put_personrole_on_trip(self, survexblock, personexpedition, tm): - """ - Only used for a single person. - Creates a SurvexPersonRole object, but this is not committed to the database until - all the survexblocks have been saved. - """ - - try: - personrole = SurvexPersonRole( # does not commit to db yet - survexblock=survexblock, # survexblock has no _id yet - person = personexpedition.person, - personexpedition=personexpedition, - personname=tm - ) - except: - message = f"! *team '{tm}' FAIL, already created. {survexblock.survexfile.path} ({survexblock}) " - print(self.insp + message) - print(self.insp + message, file=sys.stderr) - stash_data_issue( - parser="survex", message=message, url=None, sb=(survexblock.survexfile.path) - ) - if survexblock._blockid not in self._pending_pr_saves: - self._pending_pr_saves[survexblock._blockid] = [] - self._pending_pr_saves[survexblock._blockid].append(personrole) - # print(f"-- _pending_pr_saves\n -- {survexblock=} - {survexblock._blockid}\n -- {self._pending_pr_saves[survexblock._blockid]}\n -- {personrole}", file=sys.stderr) - - return False - - def process_pending_team(self, survexblock): - """This is only called when processing a *end statement - - It converts a list of names as strings into a list of valid - PersonExpedition objects for the current expo. - """ - - # Many survex blocks have no *team members at all - if survexblock._blockid not in self.person_pending_cache: - return - - if not (expo := self.get_expo_for_block(survexblock)): - message = f"! Buggeration FAIL, undated, but people exist.. {survexblock=}" - print(self.insp + message) - print(message,file=sys.stderr) - stash_data_issue( - parser="survex", - message=message, - url=None, sb=survexblock, - ) - return - - # Sanitise the set of names, and validate as valid people - if teamnames := self.flush_persons_pending(survexblock._blockid): - for tm in teamnames: - if known_foreigner(tm): - message = f"- *team '{tm}' known foreigner {survexblock.survexfile.path} ({survexblock})" - print(self.insp + message) - # stash_data_issue(parser='survex', message=message, url=None, sb=survexblock) - survexblock.foreigners = True - else: - pe = GetPersonExpeditionNameLookup(expo).get(tm.lower()) - if pe: - self.put_personrole_on_trip(survexblock, pe, tm) - - else: - message = f"! *team '{tm}' FAIL personexpedition {survexblock.survexfile.path} ({survexblock}) " - print(self.insp + message) - stash_data_issue( - parser="survex", - message=message, - url=None, sb=survexblock, - ) - def save_personroles_to_db(self): """This should be run only after all the survexblocks have been saved to the database and so have _id that can be used as a ForeignKey - - survexblock = models.ForeignKey("SurvexBlock", on_delete=models.CASCADE, db_index=True) - # increasing levels of precision, Surely we only need survexblock and (either person or personexpedition)? - personname = models.CharField(max_length=100) - person = models.ForeignKey("Person", blank=True, null=True, on_delete=models.CASCADE, db_index=True) # not needed - personexpedition = models.ForeignKey("PersonExpedition", blank=True, null=True, on_delete=models.SET_NULL, db_index=True) + Fields: + survexblock = models.ForeignKey("SurvexBlock", .. + personname = models.CharField(max_length=100) + person = models.ForeignKey("Person", .. + personexpedition = models.ForeignKey("PersonExpedition", .. """ - print(f" - Saving {len(self._pending_pr_saves):,} SurvexPersonRoles to db", file=sys.stderr) + def pr_save_sqlite(): + try: + SurvexPersonRole.objects.bulk_create(valid_list) + print(f" - all SurvexPersonRoles bulk-created to db", file=sys.stderr) + except Exception as e: + message = f"\n ! - EXCEPTION '{e}' - in PR bulk update. Falling back onto sequential updates method" + print(message) + print(message, file=sys.stderr) + stash_data_issue(parser="survex", message=message) + + pr_save_mysql() + + def pr_save_mysql(): + nc = 0 + ns = 0 + for pr in valid_list: + got_obj, created = SurvexPersonRole.objects.get_or_create( + survexblock=pr.survexblock, + personname=pr.personname, + person=pr.person, + personexpedition=pr.personexpedition, + defaults={'survexblock': pr.survexblock} # Fields to set only if creating + ) + if created: + nc += 1 + print(f" - {pr} Created", file=sys.stderr) + else: + ns += 1 + # update the block if it changed + got_obj.survexblock = pr.survexblock + got_obj.save() + print(f" - {ns}/{nc} SurvexPersonRoles saved/created to db", file=sys.stderr) + + print(f" - Saving {len(self._pending_pr_saves):,} SurvexPersonRoles to db.. ({connection.vendor})", file=sys.stderr) pr_list = [] for blk in self._pending_pr_saves: pr_list + self._pending_pr_saves[blk] @@ -730,30 +705,10 @@ class LoadingSurvex: parser="survex", message=message ) - try: - SurvexPersonRole.objects.bulk_create(valid_list) - except Exception as e: - message = f"\n ! - EXCEPTION '{e}' - in PR bulk update. Falling back onto sequential updates" - print(message) - print(message, file=sys.stderr) - stash_data_issue(parser="survex", message=message) - - for pr in valid_list: - got_obj, created = SurvexPersonRole.objects.get_or_create( - survexblock=pr.survexblock, - personname=pr.personname, - person=pr.person, - personexpedition=pr.personexpedition, - defaults={'survexblockblock': pr.survexblock} # Fields to set only if creating - ) - if created: - print(f" - {pr} Created", file=sys.stderr) - else: - # update the block if it changed - got_obj.survexblock = pr.survexblock - got_obj.save() - # print(f" - {qm} SAVED", file=sys.stderr) - print(f" - PRs saved to db", file=sys.stderr) + if connection.vendor == 'mysql': + pr_save_mysql() + else: + pr_save_sqlite() _pending_pr_saves = {} # in database now, so empty cache @@ -832,7 +787,80 @@ class LoadingSurvex: got_obj.save() # print(f" - {qm} SAVED", file=sys.stderr) print(f" - QMs saved to db", file=sys.stderr) - + + def put_personrole_on_trip(self, survexblock, personexpedition, tm): + """ + Only used for a single person. + Creates a SurvexPersonRole object, but this is not committed to the database until + all the survexblocks have been saved. + """ + + try: + personrole = SurvexPersonRole( # does not commit to db yet + survexblock=survexblock, # survexblock has no _id yet + person = personexpedition.person, + personexpedition=personexpedition, + personname=tm + ) + except: + message = f"! *team '{tm}' FAIL, already created. {survexblock.survexfile.path} ({survexblock}) " + print(self.insp + message) + print(self.insp + message, file=sys.stderr) + stash_data_issue( + parser="survex", message=message, url=None, sb=(survexblock.survexfile.path) + ) + + if survexblock._blockid not in self._pending_pr_saves: + self._pending_pr_saves[survexblock._blockid] = [] + self._pending_pr_saves[survexblock._blockid].append(personrole) + # print(f"-- _pending_pr_saves\n -- {survexblock=} - {survexblock._blockid}\n -- {self._pending_pr_saves[survexblock._blockid]}\n -- {personrole}", file=sys.stderr) + + return False + + def process_pending_team(self, survexblock): + """This is only called when processing a *end statement + + It converts a list of names as strings into a list of valid + PersonExpedition objects for the current expo. + """ + + # Many survex blocks have no *team members at all + if survexblock._blockid not in self.person_pending_cache: + return + + if not (expo := self.get_expo_for_block(survexblock)): + message = f"! Buggeration FAIL, undated, but people exist.. {survexblock=}" + print(self.insp + message) + print(message,file=sys.stderr) + stash_data_issue( + parser="survex", + message=message, + url=None, sb=survexblock, + ) + return + + # Sanitise the set of names, and validate as valid people + if teamnames := self.flush_persons_pending(survexblock._blockid): + for tm in teamnames: + if known_foreigner(tm): + message = f"- *team '{tm}' known foreigner {survexblock.survexfile.path} ({survexblock})" + print(self.insp + message) + # stash_data_issue(parser='survex', message=message, url=None, sb=survexblock) + survexblock.foreigners = True + else: + pe = GetPersonExpeditionNameLookup(expo).get(tm.lower()) + if pe: + self.put_personrole_on_trip(survexblock, pe, tm) + + else: + message = f"! *team '{tm}' FAIL personexpedition {survexblock.survexfile.path} ({survexblock}) " + print(self.insp + message) + stash_data_issue( + parser="survex", + message=message, + url=None, sb=survexblock, + ) + def add_to_pending(self, survexblock, tm): """Collects team names. We might not have a date so cannot validate against expo attendance yet