From e5e960ac3ff54a1c68aa5f3ee27848fc25d7bfd9 Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Fri, 30 Jan 2026 00:29:37 +0000 Subject: [PATCH] Working. survex parsing now 2x as fast --- core/models/survex.py | 2 +- parsers/survex.py | 51 ++++++++++++++++++++++++------------------- 2 files changed, 30 insertions(+), 23 deletions(-) diff --git a/core/models/survex.py b/core/models/survex.py index ee99a37..47e0ef5 100644 --- a/core/models/survex.py +++ b/core/models/survex.py @@ -252,7 +252,7 @@ class SurvexBlock(models.Model): ordering = ("_blockid",) def __str__(self): - return self.name and str(self.name) or "no_name-#" + str(self.pk) #pk is primary key + return self.name and str(self.name) or "no_name-#" + str(self.pk)[:5] #pk is primary key def isSurvexBlock(self): # Function used in templates return True diff --git a/parsers/survex.py b/parsers/survex.py index c5a4c3b..bb41438 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -233,7 +233,8 @@ def hack_save(survexblock): print_list.append(sb) sb = sb.parent if len(print_list) > 0: - print(f" ## Horrible QM pre-save hack: {len(print_list)} survexblocks up from {survexblock}", file=sys.stderr) + pass + # print(f" ## Horrible QM pre-save hack: {len(print_list)} survexblocks up from {survexblock}", file=sys.stderr) sb_list.reverse() for sbo in sb_list: @@ -618,13 +619,13 @@ class LoadingSurvex: # print(f"\n !! {bad_parents} as-yet invalid parent ids out of {len(blocks)} blocks. {len(valid_blocks)} valid blocks", file=sys.stderr) topo_list = get_toposorted_blocks(blocks) - print(f"\n !! {len(topo_list)=} blocks. {len(blocks)=}", file=sys.stderr) + print(f"\n - {len(topo_list):,} survexblocks to save to db (topologically sorted)", file=sys.stderr) safe_chunks = get_generational_chunks_optimized(topo_list) try: for i, chunk in enumerate(safe_chunks): - print(f"Saving Chunk {i+1} ({len(chunk)} blocks)...", file=sys.stderr) + print(f" - Saving chunk {i+1} ({len(chunk)} blocks)...", file=sys.stderr) SurvexBlock.objects.bulk_create( chunk, update_conflicts=True, # root item probably exists already @@ -633,7 +634,7 @@ class LoadingSurvex: 'expedition', 'survexfile', 'scanswallet', 'legsall', 'legslength', 'foreigners',], unique_fields=['_blockid'] ) - print("Success: Entire tree saved.", file=sys.stderr) + print(" - Success: Entire tree of survexblocks saved to db.", file=sys.stderr) except Exception as e: print(f"Failed at chunk {i+1}: {e}", file=sys.stderr) @@ -668,8 +669,9 @@ class LoadingSurvex: personname=tm ) except: - message = f"! *team '{tm}' FAIL, already created {survexblock.survexfile.path} ({survexblock}) " + message = f"! *team '{tm}' FAIL, already created. {survexblock.survexfile.path} ({survexblock}) " print(self.insp + message) + print(self.insp + message, file=sys.stderr) stash_data_issue( parser="survex", message=message, url=None, sb=(survexblock.survexfile.path) ) @@ -677,7 +679,7 @@ class LoadingSurvex: if survexblock._blockid not in self._pending_pr_saves: self._pending_pr_saves[survexblock._blockid] = [] self._pending_pr_saves[survexblock._blockid].append(personrole) - # print(f"-- _pending_pr_saves\n -- {survexblock=} - {survexblock._blockid}\n -- {_pending_pr_saves[survexblock._blockid]}\n -- {personrole}", file=sys.stderr) + # print(f"-- _pending_pr_saves\n -- {survexblock=} - {survexblock._blockid}\n -- {self._pending_pr_saves[survexblock._blockid]}\n -- {personrole}", file=sys.stderr) return False @@ -686,17 +688,22 @@ class LoadingSurvex: It converts a list of names as strings into a list of valid PersonExpedition objects for the current expo. - - SurvexPersonRoles """ # Many survex blocks have no *team members at all - if not self.flush_persons_pending(survexblock._blockid): + if survexblock._blockid not in self.person_pending_cache: return if not (expo := self.get_expo_for_block(survexblock)): - print(f" Buggeration FAIL, undated but people exist {survexblock=}",file=sys.stderr) - return + message = f"! Buggeration FAIL, undated, but people exist.. {survexblock=}" + print(self.insp + message) + print(message,file=sys.stderr) + stash_data_issue( + parser="survex", + message=message, + url=None, sb=survexblock, + ) + return # Sanitise the set of names, and validate as valid people if teamnames := self.flush_persons_pending(survexblock._blockid): @@ -724,28 +731,28 @@ class LoadingSurvex: """This should be run only after all the survexblocks have been saved to the database and so have _id that can be used as a ForeignKey """ - print(f" - Saving {len(self._pending_pr_saves)} SurvexPersonRoles to db", file=sys.stderr) + # print(f" - Saving {len(self._pending_pr_saves)} SurvexPersonRoles to db", file=sys.stderr) for blk in self._pending_pr_saves: # Now commit to db pr_list = self._pending_pr_saves[blk] - print(f" PR_LIST {pr_list} {blk}", file=sys.stderr) + # print(f" PR_LIST {pr_list} {blk}", file=sys.stderr) valid_list = [] for pr in pr_list: try: - # print(f"___ {pr.survexblock=} {pr.survexblock.id=} {pr.person=} {pr.personexpedition=}", file=sys.stderr) pr.full_clean() valid_list.append(pr) except ValidationError as e: - print(f" ! PR is invalid: {e} {survexblock} {pr}", file=sys.stderr) - print(f" ! PR is invalid: {e} {survexblock} {pr}") - - + message = f" ! PR is invalid: {e} {survexblock} {pr}" + print(message, file=sys.stderr) + stash_data_issue( + parser="survex", + message=message, + url=None, sb=survexblock, + ) + SurvexPersonRole.objects.bulk_create(valid_list) - # for pr in pr_list: - # print(f"+++ {pr.survexblock=} {pr.survexblock.id=} {pr.person=} {pr.personexpedition=}", file=sys.stderr) - # SurvexPersonRole.objects.create(pr).save() - + _pending_pr_saves = {} # in database now, so empty cache def add_to_pending(self, survexblock, tm):