From 97c6b5459aa317fab1e8a30b6a6ff24f54ba568e Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Wed, 28 Jan 2026 15:39:10 +0000 Subject: [PATCH] bulk add mod. AI --- parsers/survex.py | 58 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 45 insertions(+), 13 deletions(-) diff --git a/parsers/survex.py b/parsers/survex.py index cb6f2ed..4f97ff5 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -2945,14 +2945,25 @@ def survexifywallets(): start = time.time() # if there is a wallet for a block, add the people to the wallet + # Batch add people to wallets to minimize DB hits + from collections import defaultdict + wallet_to_people = defaultdict(set) sprsall = SurvexPersonRole.objects.all().select_related("person").select_related("survexblock") for spr in sprsall: w = spr.survexblock.scanswallet - if w: - w.persons.add(spr.person) - + if w and spr.person: + wallet_to_people[w.id].add(spr.person) + # Fetch all wallets in one go + wallets = Wallet.objects.in_bulk(wallet_to_people.keys()) + total_added = 0 + for wid, people in wallet_to_people.items(): + w = wallets[wid] + w.persons.add(*people) + total_added += len(people) + if total_added % 1000 == 0 and total_added > 0: + print(f" - Batched {total_added} people to wallets", file=sys.stderr) duration = time.time() - start - print(f" - {duration:7.2f} s to add people to wallets ", file=sys.stderr) + print(f" - {duration:7.2f} s to batch add people to wallets (total {total_added})", file=sys.stderr) start = time.time() @@ -2962,25 +2973,41 @@ def survexifywallets(): print(f" - {duration:7.2f} s to set survexblock:wallet using JSON survexfiles ", file=sys.stderr) start = time.time() - for w in Wallet.objects.all(): - blocks = SurvexBlock.objects.filter(scanswallet=w).select_related("survexfile") - for b in blocks: - if b.survexfile.cave: - w.caves.add(b.survexfile.cave) - w.save() - + # Batch add caves to wallets + from collections import defaultdict + wallet_to_caves = defaultdict(set) + # Prefetch all SurvexBlocks and SurvexFiles in one go + blocks = SurvexBlock.objects.select_related("survexfile", "scanswallet").all() + for b in blocks: + w = b.scanswallet + if w and b.survexfile and b.survexfile.cave: + wallet_to_caves[w.id].add(b.survexfile.cave) + wallets = Wallet.objects.in_bulk(wallet_to_caves.keys()) + total_wallets = 0 + for wid, caves in wallet_to_caves.items(): + w = wallets[wid] + w.caves.add(*caves) + total_wallets += 1 + if total_wallets % 100 == 0: + print(f" - Batched {total_wallets} wallets for caves", file=sys.stderr) duration = time.time() - start - print(f" - {duration:7.2f} s to add caves to wallets ", file=sys.stderr) + print(f" - {duration:7.2f} s to batch add caves to wallets (total {total_wallets})", file=sys.stderr) start = time.time() # Find the survex blocks which are 'ours' i.e. ignore all those (ARGE etc) without expo people attached. cuccblocks = set() - for spr in SurvexPersonRole.objects.all(): + sprs = SurvexPersonRole.objects.all() + cuccblocks_count = 0 + for spr in sprs: cuccblocks.add(spr.survexblock) + cuccblocks_count += 1 + if cuccblocks_count % 1000 == 0: + print(f" - Processed {cuccblocks_count} SurvexPersonRole for cuccblocks in {time.time() - start:.2f}s", file=sys.stderr) # Because we have just run set_survexblocks(w), this should only complain if there is no *ref and no wallet that links to its parent file sentinelbad = Wallet.objects.get(walletname="1983#00") + cuccblock_count = 0 for b in cuccblocks: if b.date > date(2001, 1, 1): # do we care about older ones? 1999 certainly has different wallet system if not b.scanswallet: @@ -2993,6 +3020,9 @@ def survexifywallets(): print(message, file=sys.stderr) url = get_offending_filename(b.survexfile.path) DataIssue.objects.update_or_create(parser="ref", message=message, url=url) + cuccblock_count += 1 + if cuccblock_count % 1000 == 0: + print(f" - Checked {cuccblock_count} cuccblocks for missing *ref in {time.time() - start:.2f}s", file=sys.stderr) duration = time.time() - start print(f" - {duration:7.2f} s to check missing *ref on survexblocks ", file=sys.stderr) @@ -3034,7 +3064,9 @@ def LoadSurvexBlocks(): print(" - Loading Survex Blocks...") memstart = get_process_memory() # ---------------------------------------------------------------- + block_start = time.time() FindAndLoadSurvex() + print(f" - FindAndLoadSurvex() took {time.time() - block_start:.2f}s", file=sys.stderr) # ---------------------------------------------------------------- memend = get_process_memory() print(f" - MEMORY start:{memstart:.3f} MB end:{memend:.3f} MB increase={memend - memstart:.3f} MB")