2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2026-02-08 10:38:25 +00:00

Working. survex parsing now 2x as fast

This commit is contained in:
2026-01-30 00:29:37 +00:00
parent 89bc27b1f8
commit e5e960ac3f
2 changed files with 30 additions and 23 deletions

View File

@@ -233,7 +233,8 @@ def hack_save(survexblock):
print_list.append(sb)
sb = sb.parent
if len(print_list) > 0:
print(f" ## Horrible QM pre-save hack: {len(print_list)} survexblocks up from {survexblock}", file=sys.stderr)
pass
# print(f" ## Horrible QM pre-save hack: {len(print_list)} survexblocks up from {survexblock}", file=sys.stderr)
sb_list.reverse()
for sbo in sb_list:
@@ -618,13 +619,13 @@ class LoadingSurvex:
# print(f"\n !! {bad_parents} as-yet invalid parent ids out of {len(blocks)} blocks. {len(valid_blocks)} valid blocks", file=sys.stderr)
topo_list = get_toposorted_blocks(blocks)
print(f"\n !! {len(topo_list)=} blocks. {len(blocks)=}", file=sys.stderr)
print(f"\n - {len(topo_list):,} survexblocks to save to db (topologically sorted)", file=sys.stderr)
safe_chunks = get_generational_chunks_optimized(topo_list)
try:
for i, chunk in enumerate(safe_chunks):
print(f"Saving Chunk {i+1} ({len(chunk)} blocks)...", file=sys.stderr)
print(f" - Saving chunk {i+1} ({len(chunk)} blocks)...", file=sys.stderr)
SurvexBlock.objects.bulk_create(
chunk,
update_conflicts=True, # root item probably exists already
@@ -633,7 +634,7 @@ class LoadingSurvex:
'expedition', 'survexfile', 'scanswallet', 'legsall', 'legslength', 'foreigners',],
unique_fields=['_blockid']
)
print("Success: Entire tree saved.", file=sys.stderr)
print(" - Success: Entire tree of survexblocks saved to db.", file=sys.stderr)
except Exception as e:
print(f"Failed at chunk {i+1}: {e}", file=sys.stderr)
@@ -668,8 +669,9 @@ class LoadingSurvex:
personname=tm
)
except:
message = f"! *team '{tm}' FAIL, already created {survexblock.survexfile.path} ({survexblock}) "
message = f"! *team '{tm}' FAIL, already created. {survexblock.survexfile.path} ({survexblock}) "
print(self.insp + message)
print(self.insp + message, file=sys.stderr)
stash_data_issue(
parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
)
@@ -677,7 +679,7 @@ class LoadingSurvex:
if survexblock._blockid not in self._pending_pr_saves:
self._pending_pr_saves[survexblock._blockid] = []
self._pending_pr_saves[survexblock._blockid].append(personrole)
# print(f"-- _pending_pr_saves\n -- {survexblock=} - {survexblock._blockid}\n -- {_pending_pr_saves[survexblock._blockid]}\n -- {personrole}", file=sys.stderr)
# print(f"-- _pending_pr_saves\n -- {survexblock=} - {survexblock._blockid}\n -- {self._pending_pr_saves[survexblock._blockid]}\n -- {personrole}", file=sys.stderr)
return False
@@ -686,17 +688,22 @@ class LoadingSurvex:
It converts a list of names as strings into a list of valid
PersonExpedition objects for the current expo.
SurvexPersonRoles
"""
# Many survex blocks have no *team members at all
if not self.flush_persons_pending(survexblock._blockid):
if survexblock._blockid not in self.person_pending_cache:
return
if not (expo := self.get_expo_for_block(survexblock)):
print(f" Buggeration FAIL, undated but people exist {survexblock=}",file=sys.stderr)
return
message = f"! Buggeration FAIL, undated, but people exist.. {survexblock=}"
print(self.insp + message)
print(message,file=sys.stderr)
stash_data_issue(
parser="survex",
message=message,
url=None, sb=survexblock,
)
return
# Sanitise the set of names, and validate as valid people
if teamnames := self.flush_persons_pending(survexblock._blockid):
@@ -724,28 +731,28 @@ class LoadingSurvex:
"""This should be run only after all the survexblocks have
been saved to the database and so have _id that can be used as a ForeignKey
"""
print(f" - Saving {len(self._pending_pr_saves)} SurvexPersonRoles to db", file=sys.stderr)
# print(f" - Saving {len(self._pending_pr_saves)} SurvexPersonRoles to db", file=sys.stderr)
for blk in self._pending_pr_saves:
# Now commit to db
pr_list = self._pending_pr_saves[blk]
print(f" PR_LIST {pr_list} {blk}", file=sys.stderr)
# print(f" PR_LIST {pr_list} {blk}", file=sys.stderr)
valid_list = []
for pr in pr_list:
try:
# print(f"___ {pr.survexblock=} {pr.survexblock.id=} {pr.person=} {pr.personexpedition=}", file=sys.stderr)
pr.full_clean()
valid_list.append(pr)
except ValidationError as e:
print(f" ! PR is invalid: {e} {survexblock} {pr}", file=sys.stderr)
print(f" ! PR is invalid: {e} {survexblock} {pr}")
message = f" ! PR is invalid: {e} {survexblock} {pr}"
print(message, file=sys.stderr)
stash_data_issue(
parser="survex",
message=message,
url=None, sb=survexblock,
)
SurvexPersonRole.objects.bulk_create(valid_list)
# for pr in pr_list:
# print(f"+++ {pr.survexblock=} {pr.survexblock.id=} {pr.person=} {pr.personexpedition=}", file=sys.stderr)
# SurvexPersonRole.objects.create(pr).save()
_pending_pr_saves = {} # in database now, so empty cache
def add_to_pending(self, survexblock, tm):