2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2026-02-08 13:55:24 +00:00

profiling

This commit is contained in:
2026-01-28 16:58:56 +00:00
parent 5c451610ba
commit 86ba377bd8

View File

@@ -73,6 +73,12 @@ debugprinttrigger = "!"
dataissues = set() dataissues = set()
# Caches for ORM minimization
survexblock_cache = None # {scanswallet_id: [SurvexBlock, ...]}
personrole_cache = None # {survexblock_id: [SurvexPersonRole, ...]}
wallet_cache = None # {walletname: [Wallet, ...]}
trip_people_cache = {} # indexed by survexblock, so never needs cleaning out
class SurvexLeg: class SurvexLeg:
"""No longer a models.Model subclass, so no longer a database table""" """No longer a models.Model subclass, so no longer a database table"""
@@ -80,16 +86,18 @@ class SurvexLeg:
compass = 0.0 compass = 0.0
clino = 0.0 clino = 0.0
def datewallet(w, earliest): def datewallet(w, earliest):
"""Gets the date of the youngest survexblock associated with the wallet """Gets the date of the youngest survexblock associated with the wallet, using a cache."""
REFACTOR this to do the whole date-getting task global survexblock_cache
if survexblock_cache is None:
Currently there is only one SurvexBlock, but this is in anticipation of # Build cache: {scanswallet_id: [SurvexBlock, ...]}
changing the schema to allow many. survexblock_cache = {}
""" for b in SurvexBlock.objects.all().select_related("survexfile", "scanswallet"):
if b.scanswallet_id not in survexblock_cache:
survexblock_cache[b.scanswallet_id] = []
survexblock_cache[b.scanswallet_id].append(b)
first = earliest first = earliest
blocks = SurvexBlock.objects.filter(scanswallet=w).select_related("survexfile", "scanswallet") blocks = survexblock_cache.get(w.id, [])
for b in blocks: for b in blocks:
if b.date: if b.date:
if b.date < first: if b.date < first:
@@ -142,18 +150,21 @@ def get_offending_filename(path):
return "/survexfile/" + path + ".svx" return "/survexfile/" + path + ".svx"
trip_people_cache = {} # indexed by survexblock, so never needs cleaning out
def get_team_on_trip(survexblock): def get_team_on_trip(survexblock):
"""Uses a cache to avoid a database query if it doesn't need to. """Uses a cache to avoid a database query if it doesn't need to. Only used for complete team."""
Only used for complete team.""" global trip_people_cache, personrole_cache
global trip_people_cache if personrole_cache is None:
# Build cache: {survexblock_id: [SurvexPersonRole, ...]}
personrole_cache = {}
for pr in SurvexPersonRole.objects.all().select_related("person", "personexpedition"):
if pr.survexblock_id not in personrole_cache:
personrole_cache[pr.survexblock_id] = []
personrole_cache[pr.survexblock_id].append(pr)
if survexblock in trip_people_cache: if survexblock in trip_people_cache:
if len(trip_people_cache[survexblock]) > 0: if len(trip_people_cache[survexblock]) > 0:
return trip_people_cache[survexblock] return trip_people_cache[survexblock]
qpeople = personrole_cache.get(survexblock.id, [])
qpeople = SurvexPersonRole.objects.filter(survexblock=survexblock).select_related("person", "personexpedition") trip_people_cache[survexblock] = qpeople
trip_people_cache[survexblock] = qpeople # this is a query list
return qpeople return qpeople
def get_people_on_trip(survexblock): def get_people_on_trip(survexblock):
@@ -849,10 +860,23 @@ class LoadingSurvex:
if year in self._expedition_cache: if year in self._expedition_cache:
expo = self._expedition_cache[year] expo = self._expedition_cache[year]
else: else:
message = f"! DATE INCORRECT. There is no expedition for the year {year}. {survexblock.survexfile.path} ({survexblock}) - set to 1976." expeditions = Expedition.objects.filter(year=year)
print(self.insp + message) if len(expeditions) > 1:
stash_data_issue(parser='survex', message=message, url=None, sb=(survexblock.survexfile.path)) message = (
expo = self._expedition_cache.get("1976") f"! More than one expedition in year {year} '{line}' ({survexblock}) {survexblock.survexfile.path}"
)
print(self.insp + message)
stash_data_issue(
parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
)
if expeditions:
expo = expeditions[0]
self.expos[year] = expo
else:
expo = Expedition.objects.get(year="1976")
message = f"! DATE INCORRECT. There is no expedition for the year {year}. {survexblock.survexfile.path} ({survexblock}) - set to 1976."
print(self.insp + message)
stash_data_issue(parser='survex', message=message, url=None, sb=(survexblock.survexfile.path))
return expo return expo
def LoadSurvexDate(self, survexblock, line): def LoadSurvexDate(self, survexblock, line):
@@ -1275,9 +1299,15 @@ class LoadingSurvex:
stash_data_issue(parser="ref", message=message, url=url) stash_data_issue(parser="ref", message=message, url=url)
# Look to see if we have a record of this wallet already - which would be unexpected # Look to see if we have a record of this wallet already - which would be unexpected
manywallets = Wallet.objects.filter( global wallet_cache
walletname=refscan if wallet_cache is None:
) # assumes all wallets found in earlier pass of data import # Build cache: {walletname: [Wallet, ...]}
wallet_cache = {}
for w in Wallet.objects.all():
if w.walletname not in wallet_cache:
wallet_cache[w.walletname] = []
wallet_cache[w.walletname].append(w)
manywallets = wallet_cache.get(refscan, [])
if manywallets: if manywallets:
if len(manywallets) > 1: if len(manywallets) > 1:
message = f" ! Wallet *REF {refscan} - more than one found {len(manywallets)} wallets in db with same id {survexblock.survexfile.path}" message = f" ! Wallet *REF {refscan} - more than one found {len(manywallets)} wallets in db with same id {survexblock.survexfile.path}"
@@ -1290,7 +1320,6 @@ class LoadingSurvex:
pass pass
else: else:
check_reused_wallet() check_reused_wallet()
else: else:
survexblock.scanswallet = manywallets[0] # this is a ForeignKey field survexblock.scanswallet = manywallets[0] # this is a ForeignKey field
# Only save if changed # Only save if changed
@@ -2427,9 +2456,9 @@ def FindAndLoadSurvex():
io_collate.write(f";*include {survexfileroot.path}\n") io_collate.write(f";*include {survexfileroot.path}\n")
flinear.write(f"{svx_scan.depthinclude:2} {indent} *include {survexfileroot.path}\n") flinear.write(f"{svx_scan.depthinclude:2} {indent} *include {survexfileroot.path}\n")
# import cProfile import cProfile
# import pstats import pstats
# from pstats import SortKey from pstats import SortKey
# pr = cProfile.Profile() # pr = cProfile.Profile()
# pr.enable() # pr.enable()
@@ -2594,17 +2623,17 @@ def FindAndLoadSurvex():
print("\n - Loading All Survex Blocks (LinearLoad)", file=sys.stderr) print("\n - Loading All Survex Blocks (LinearLoad)", file=sys.stderr)
svx_load = LoadingSurvex() svx_load = LoadingSurvex()
# pr2 = cProfile.Profile() pr2 = cProfile.Profile()
# pr2.enable() pr2.enable()
print(" ", file=sys.stderr, end="") print(" ", file=sys.stderr, end="")
# ---------------------------------------------------------------- # ----------------------------------------------------------------
svx_load.LinearLoad(survexblockroot, survexfileroot.path, collatefilename) svx_load.LinearLoad(survexblockroot, survexfileroot.path, collatefilename)
# ---------------------------------------------------------------- # ----------------------------------------------------------------
# pr2.disable() pr2.disable()
# with open('LinearLoad.prof', 'w') as f: with open('LinearLoad.prof', 'w') as f:
# ps = pstats.Stats(pr2, stream=f) ps = pstats.Stats(pr2, stream=f)
# ps.sort_stats(SortKey.CUMULATIVE) ps.sort_stats(SortKey.CUMULATIVE)
# ps.print_stats() ps.print_stats()
mem1 = get_process_memory() mem1 = get_process_memory()
print(f"\n - MEM:{mem1:7.2f} MB STOP", file=sys.stderr) print(f"\n - MEM:{mem1:7.2f} MB STOP", file=sys.stderr)