2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2026-02-08 07:19:52 +00:00

caching works for survex blocks on *end pop-up

This commit is contained in:
2026-01-28 21:38:33 +00:00
parent 1a7e8c94cb
commit b31c404d2b
2 changed files with 45 additions and 30 deletions

View File

@@ -9,6 +9,8 @@ import time
from datetime import date, datetime, timezone
from pathlib import Path
from django.core.exceptions import ValidationError
import troggle.settings as settings
from troggle.core.models.caves import Cave, Entrance, GetCaveLookup
from troggle.core.models.logbooks import QM
@@ -47,6 +49,9 @@ todo = """
repeated readings from distox etc.. Not actually useful for pre 2022 survey data,
but good future-proofing.
Also it will be a tiny bit more accurate as these leg lengths are after loop closure fixup.
- in models.survex only ONE wallet per block. The most recent seen overwites.. ugh. Should fix this sometime.
"""
survexblockroot = None
ROOTBLOCK = "rootblock"
@@ -91,6 +96,8 @@ def datewallet(w, earliest):
global survexblock_cache
if survexblock_cache is None:
# Build cache: {scanswallet_id: [SurvexBlock, ...]}
# This assumes that all the survexblocks have been parsed by now
# which should probably be done more explicitly at a higher level
survexblock_cache = {}
for b in SurvexBlock.objects.all().select_related("survexfile", "scanswallet"):
if b.scanswallet_id not in survexblock_cache:
@@ -110,11 +117,19 @@ def datewallet(w, earliest):
return w.date
def set_walletdate(w):
'''A valid wallet may not necessarily have a valid date, though of course
in an ideal world it should.
'''
earliest = datetime.now().date()
if not w.date(): # sets .walletdate as a side-effect if it gets it from JSON
# try:
# d = w.date
# except Exception as e:
# print(f" !! BAD DATE {e}\n {w.date} {w=}", file=sys.stderr)
if w.date: # .date is a function which sets .walletdate as a side-effect if it gets it from JSON, or returns None
d = datewallet(w, earliest) # Not in JSON, so checks all the survex blocks
w.walletdate = d
w.save(update_fields=["walletdate"])
if d:
w.walletdate = d
w.save(update_fields=["walletdate"])
def stash_data_issue(parser=None, message=None, url=None, sb=None):
"""Avoid hitting the database for error messages until the end of the import
@@ -1321,12 +1336,13 @@ class LoadingSurvex:
else:
check_reused_wallet()
else:
survexblock.scanswallet = manywallets[0] # this is a ForeignKey field
# Only save if changed
survexblock.save(update_fields=["scanswallet"])
# This is where we should check that the wallet JSON contains a link to the survexfile
# and that the JSON date and walletdate are set correctly to the survexblock date.
set_walletdate(survexblock.scanswallet)
if manywallets[0]:
survexblock.scanswallet = manywallets[0] # this is a ForeignKey field
# Only save if changed
survexblock.save(update_fields=["scanswallet"])
# This is where we should check that the wallet JSON contains a link to the survexfile
# and that the JSON date and walletdate are set correctly to the survexblock date.
set_walletdate(survexblock.scanswallet)
else:
perps = get_people_on_trip(survexblock)
message = f" ! Wallet *REF bad in '{survexblock.survexfile.path}' '{refscan}' NOT in database i.e. wallet does not exist {perps}."
@@ -1821,9 +1837,8 @@ class LoadingSurvex:
nlegstotal = 0
self.relativefilename = path
# Cache for parent survex blocks to save at the end
# Cache for survex blocks to save at the end
self._pending_parent_saves = set()
# Cache for survexblocks to save at the end (legsall/legslength)
self._pending_block_saves = set()
#self.IdentifyCave(path, svxid, depth) # this will produce null for survex files which are geographic collections
@@ -1975,7 +1990,6 @@ class LoadingSurvex:
survexblock = newsurvexblock
survexblock.save() # Only save once, after all fields are set
tickle()
# ...timing removed...
# ---------------------------END
elif self.rx_end.match(cmd):
@@ -1991,16 +2005,16 @@ class LoadingSurvex:
# This is the most time-consuming step within *end processing: 47%
# Instead of saving parent here, cache for later
if hasattr(survexblock, 'parent') and survexblock.parent:
self._pending_parent_saves.add(survexblock.parent)
self._pending_block_saves.add(survexblock)
try:
# This is the second most time-consuming step within *end processing: 35%
survexblock.save(update_fields=["legsall", "legslength"]) # Only update changed fields
self._pending_block_saves.add(survexblock)
# update_fields=["legsall", "legslength"]
except Exception:
print(f"{survexblock=}", file=sys.stderr)
raise
confirm_team_on_trip(survexblock)
# POP state ++++++++++++++
# ...timing removed...
popblock()
self.inheritteam = self.teaminheritstack.pop()
self.currentteam = self.teamcurrentstack.pop()
@@ -2112,19 +2126,20 @@ class LoadingSurvex:
# At the end, save all cached survexblocks using bulk_update
blocks = list(getattr(self, '_pending_block_saves', set()))
if blocks:
# valid_blocks = []
# for block in blocks:
# try:
# block.full_clean()
# valid_blocks.append(block)
# except ValidationError as e:
# print(f" ! Block {block} is invalid: {e}", file=sys.stderr)
# print(f" ! Block {block} is invalid: {e}")
try:
SurvexBlock.objects.bulk_update(blocks, ["legsall", "legslength"])
BATCH_SIZE = 900
for i in range(0, len(valid_blocks), BATCH_SIZE):
SurvexBlock.objects.bulk_update(valid_blocks[i:i+BATCH_SIZE], ["legsall", "legslength", "parent"])
except Exception as e:
print(f"Error in bulk_update for survexblocks: {e}", file=sys.stderr)
# Then save all cached parent survexblocks using bulk_update
parents = list(getattr(self, '_pending_parent_saves', set()))
if parents:
try:
SurvexBlock.objects.bulk_update(parents)
# SurvexBlock.objects.bulk_update(parents, [f.name for f in SurvexBlock._meta.fields if f.name != 'id'])
except Exception as e:
print(f"Error in bulk_update for parent blocks: {e}", file=sys.stderr)
print(f"\n !! Error in bulk_update for survexblocks: {e}", file=sys.stderr)
def PushdownStackScan(self, survexblock, path, finname, flinear, io_collate):
"""Follows the *include links in all the survex files from the root file (usually 1623.svx)