2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2026-02-08 13:55:24 +00:00

primary key now UUID on SurvexBlock

This commit is contained in:
2026-01-29 23:06:30 +00:00
parent 7a779555ac
commit 1b7798e2fc
3 changed files with 366 additions and 195 deletions

View File

@@ -6,6 +6,7 @@ import re
import subprocess
import sys
import time
from collections import OrderedDict
from datetime import date, datetime, timezone
from pathlib import Path
@@ -57,6 +58,7 @@ survexblockroot = None
ROOTBLOCK = "rootblock"
METRESINFEET = 3.28084
UNSEENS = "_unseens.svx"
BATCH_SIZE = 900 # limit for terms in SQL expressions for sqlite
IGNOREFILES = ["dummy_file", "_dummy_file"]
IGNOREPREFIX = ["surface", "kataster", "gpx", "deprecated"] #"fixedpts",
@@ -135,35 +137,42 @@ def stash_data_issue(parser=None, message=None, url=None, sb=None):
"""Avoid hitting the database for error messages until the end of the import
use a set, we do not want identically duplicate issues
BUT we have to use the sb UUID not the sb object itself
"""
global dataissues
# try:
# if sb:
# url2 = get_offending_filename(sb.survexfile.path)
# except Exception as e:
# print(f" ! stash_data_issue() '{e}' '{sb=}' -- '{url=}'", file=sys.stderr)
# raise
dataissues.add((parser, message, url, sb))
if sb:
dataissues.add((parser, message, url, sb._blockid))
else:
dataissues.add((parser, message, url, None))
def store_data_issues():
def store_data_issues(loadex = None):
"""Take the stash and store it permanently in the database instead
use BULK creation here !"""
use BULK creation here !
chnage to using Class not global stash
"""
global dataissues
print(f" - Storing {len(dataissues)} Data Issues into database")
# make a list of objects, but don't commit to database yet
di_list = []
for issue in dataissues:
parser, message, url, sb = issue
if url is None:
if sb is not None:
try:
url = get_offending_filename(sb.survexfile.path)
except Exception as e:
print(f" ! store_data_issues() '{e}' '{sb=}' -- '{url=}'", file=sys.stderr)
url = get_offending_filename(sb) # assumed to be text
di_list.append(DataIssue(parser=parser, message=message, url=url))
if not loadex:
parser, message, url, _ = issue
else:
parser, message, url, blkid = issue
if blkid:
sb = loadex._pending_block_saves[blkid]
if url is None:
if sb is not None:
try:
url = get_offending_filename(sb.survexfile.path)
except Exception as e:
print(f" ! store_data_issues() '{e}' '{sb=}' -- '{url=}'", file=sys.stderr)
url = get_offending_filename(sb) # assumed to be text
di_list.append(DataIssue(parser=parser, message=message, url=url))
# Now commit to db
DataIssue.objects.bulk_create(di_list)
dataissues = set()
@@ -212,39 +221,6 @@ def get_people_on_trip(survexblock):
return list(set(people))
# THIS SHOULD NOT BE GLOBAL ! Should be per instance of file loader, even though they are globally unique
trip_person_record = {} # a dict indexed by tuples (survexblock, personexpedition) = 1
trip_team_cache = {} # a dict of lists indexed by survexblock._blockid
def put_person_on_trip(survexblock, personexpedition, tm):
"""Uses a cache to avoid a database query if it doesn't need to.
Only used for a single person"""
global trip_person_record
global trip_team_cache
if (survexblock._blockid, personexpedition) in trip_person_record:
return True
try:
personrole = SurvexPersonRole( # does not commit to db yet
survexblock=survexblock,
person = personexpedition.person,
personexpedition=personexpedition,
personname=tm
)
except:
message = f"! *team '{tm}' FAIL, already created {survexblock.survexfile.path} ({survexblock}) "
print(self.insp + message)
stash_data_issue(
parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
)
if survexblock._blockid not in trip_team_cache:
trip_team_cache[survexblock._blockid] = []
trip_team_cache[survexblock._blockid].append(personrole)
# print(f"-- trip_team_cache\n -- {survexblock=} - {survexblock._blockid}\n -- {trip_team_cache[survexblock._blockid]}\n -- {personrole}", file=sys.stderr)
trip_person_record[(survexblock._blockid, personexpedition)] = 1
return False
def hack_save(survexblock):
# #### Horrible hack to be properly written as a cache
@@ -482,6 +458,7 @@ class LoadingSurvex:
pending = []
adhocload = False
person_pending_cache = {} # indexed per survexblock UUID, so robust wrt PUSH/POP begin/end
_pending_block_saves = OrderedDict() # not {}, retain topological sort order
def __init__(self):
@@ -511,60 +488,313 @@ class LoadingSurvex:
parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
)
def confirm_team_on_trip(self, survexblock):
"""This is only called when processing a *end statement
def save_survexblocks_to_db(self):
"""This saves the in-memory python objects into the database, at which point
the foreign keys are enabled and one can do queries on the database.
The sequence of survex blocks is constructed from the *include links,
depth first, so the list iof survex blocks is topologically sorted.
HOWEVER what matters to the .parent links is the topological sorting
of the *begin/*end inclusions, which may or may not match the *include sort
sequence. Yuk.
"""
global trip_team_cache
if survexblock._blockid not in trip_team_cache:
return
#### STRIP THIS OUT and cache the SurvexPersonRole for the end of the survex block !
hack_save(survexblock)
def get_toposorted_blocks(blocks):
"""This is a depth-first recursive topological sort that ensures that when a survexblock
has a parent, that parent always appears earlier in the list.
"""
# 1. Map IDs to objects for quick lookup
id_map = {b._blockid: b for b in blocks}
topo_sorted_list = []
visited = set()
def visit(block):
# If we've already added this ID or it's None, skip
if block is None or block._blockid in visited:
return
# 2. Get the parent object
# If .parent is an object, we use its ID.
# If .parent is already an ID, we use it directly.
parent_val = block.parent
# This line of code is "safety net." It ensures that no matter how
# the parent data is stored, we always end up with a UUID string
# rather than a Python object.
# getattr(object, 'attribute_name', default_value).
parent_id = getattr(parent_val, '_blockid', parent_val)
# 3. Recursive step: Visit the parent first
if parent_id in id_map:
visit(id_map[parent_id])
# 4. Add current block to results
visited.add(block._blockid)
topo_sorted_list.append(block)
for b in blocks:
visit(b)
return topo_sorted_list
def get_generational_chunks(sorted_blocks):
"""
Splits a topologically sorted list into chunks where no child
exists in the same chunk as its parent.
"""
chunks = []
# Track which IDs are already "saved" (in a previous chunk)
saved_ids = set()
# current_batch will hold blocks for the current "generation"
current_batch = []
for block in sorted_blocks:
parent_id = getattr(block.parent, '_blockid', block.parent)
# If the parent is not yet 'saved', this block MUST
# go into a future batch.
if parent_id and parent_id not in saved_ids:
# Finish the current chunk and start a new one
if current_batch:
chunks.append(current_batch)
# Mark everything in the finished batch as 'saved'
saved_ids.update(b._blockid for b in current_batch)
current_batch = []
current_batch.append(block)
# Safety: Even if there are no dependencies, respect the BATCH_SIZE
if len(current_batch) >= BATCH_SIZE:
chunks.append(current_batch)
saved_ids.update(b._blockid for b in current_batch)
current_batch = []
# Add the final trailing batch
if current_batch:
chunks.append(current_batch)
return chunks
def get_generational_chunks_optimized(blocks):
"""
Splits a topologically sorted list into chunks where no child
exists in the same chunk as its parent.
Optimized for a shallow tree.
"""
# 1. Map IDs to objects for quick lookup
id_map = {b._blockid: b for b in blocks}
# 2. Dictionary to store the level (depth) of each block
# Level 0 = Root, Level 1 = Child of Root, etc.
levels = {}
def get_level(block):
if block._blockid in levels:
return levels[block._blockid]
parent_id = getattr(block.parent, '_blockid', block.parent)
# If no parent OR parent is not in our current batch, it's a Root (Level 0)
if not parent_id or parent_id not in id_map:
levels[block._blockid] = 0
return 0
# Otherwise, level is Parent's Level + 1
level = get_level(id_map[parent_id]) + 1
levels[block._blockid] = level
return level
# Calculate levels for everyone
for b in blocks:
get_level(b)
# 3. Group blocks by their level
from collections import defaultdict
generational_groups = defaultdict(list)
for b in blocks:
generational_groups[levels[b._blockid]].append(b)
# 4. Final step: Split each level into batches of 900
final_chunks = []
for level in sorted(generational_groups.keys()):
level_blocks = generational_groups[level]
# Standard list slicing to split into BATCH_SIZE
for i in range(0, len(level_blocks), BATCH_SIZE):
final_chunks.append(level_blocks[i:i + BATCH_SIZE])
return final_chunks
# construct the list.
already_saved_blocks = set(SurvexBlock.objects.values_list('_blockid', flat=True))
blocks = []
for blockid in self._pending_block_saves:
blocks.append(self._pending_block_saves[blockid])
if blocks:
# valid_blocks = []
# bad_parents = 0
# for block in blocks:
# try:
# if block.parent:
# if block.parent not in already_saved_blocks:
# bad_parents += 1
# # print(f" Invalid parent id: {block.survexfile}::{block} -> {block.parent}", file=sys.stderr)
# # block.full_clean()
# valid_blocks.append(block)
# except ValidationError as e:
# print(f" ! Block {block} is invalid: {e}", file=sys.stderr)
# print(f" ! Block {block} is invalid: {e}")
# print(f"\n !! {bad_parents} as-yet invalid parent ids out of {len(blocks)} blocks. {len(valid_blocks)} valid blocks", file=sys.stderr)
topo_list = get_toposorted_blocks(blocks)
print(f"\n !! {len(topo_list)=} blocks. {len(blocks)=}", file=sys.stderr)
safe_chunks = get_generational_chunks_optimized(topo_list)
# Now commit to db
pr_list = trip_team_cache[survexblock._blockid]
# print(f" PR_LIST {pr_list} {survexblock._blockid }", file=sys.stderr)
valid_list = []
for pr in pr_list:
try:
# print(f"___ {pr.survexblock=} {pr.survexblock.id=} {pr.person=} {pr.personexpedition=}", file=sys.stderr)
pr.full_clean()
valid_list.append(pr)
except ValidationError as e:
print(f" ! PR is invalid: {e} {survexblock} {pr}", file=sys.stderr)
print(f" ! PR is invalid: {e} {survexblock} {pr}")
for i, chunk in enumerate(safe_chunks):
print(f"Saving Chunk {i+1} ({len(chunk)} blocks)...", file=sys.stderr)
SurvexBlock.objects.bulk_create(
chunk,
update_conflicts=True, # root item probably exists already
# update_fields needed if we allow conflict update
update_fields=['name', 'title', 'parent', 'date',
'expedition', 'survexfile', 'scanswallet', 'legsall', 'legslength', 'foreigners',],
unique_fields=['_blockid']
)
print("Success: Entire tree saved.", file=sys.stderr)
except Exception as e:
print(f"Failed at chunk {i+1}: {e}", file=sys.stderr)
return
SurvexPersonRole.objects.bulk_create(valid_list)
# for pr in pr_list:
# print(f"+++ {pr.survexblock=} {pr.survexblock.id=} {pr.person=} {pr.personexpedition=}", file=sys.stderr)
# SurvexPersonRole.objects.create(pr).save()
try:
for i in range(0, len(blocks), 1):
valid_blocks[i].save()
except Exception as e:
print(f" !! Error in SINGLE create for survexblocks at {i}: {e}", file=sys.stderr)
return
# Nope, even topo-sorted, we can't know what a batch size is suitable
# without some of the ietms being invalid
try:
for i in range(0, len(topo_list), BATCH_SIZE):
SurvexBlock.objects.bulk_create(topo_list[i:i+BATCH_SIZE])
except Exception as e:
print(f" !! Error in bulk_create for survexblocks at {i}: {e}", file=sys.stderr)
trip_team_cache = {} # a dict of lists indexed by survexblock._blockid
def put_personrole_on_trip(self, survexblock, personexpedition, tm):
"""
Only used for a single person.
Creates a SurvexPersonRole object, but this is not committed to the database until
all the survexblocks have been saved.
"""
try:
personrole = SurvexPersonRole( # does not commit to db yet
survexblock=survexblock, # survexblock has no _id yet
person = personexpedition.person,
personexpedition=personexpedition,
personname=tm
)
except:
message = f"! *team '{tm}' FAIL, already created {survexblock.survexfile.path} ({survexblock}) "
print(self.insp + message)
stash_data_issue(
parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
)
if survexblock._blockid not in self.trip_team_cache:
self.trip_team_cache[survexblock._blockid] = []
self.trip_team_cache[survexblock._blockid].append(personrole)
# print(f"-- trip_team_cache\n -- {survexblock=} - {survexblock._blockid}\n -- {trip_team_cache[survexblock._blockid]}\n -- {personrole}", file=sys.stderr)
return False
def process_pending_team(self, survexblock):
"""This is only called when processing a *end statement
# Not working, so do not clear cache!
trip_team_cache[survexblock] = [] # in database now, so empty cache
def check_team_cache(self, label=None):
global trip_team_cache
message = f"! check_team_cache() called.. "
print(message)
print(message, file=sys.stderr)
for block in trip_team_cache:
message = f"! *team CACHEFAIL, trip_team_cache {block.survexfile.path} ({block}). label:{label}"
print(message)
print(message, file=sys.stderr)
It converts a list of names as strings into a list of valid
PersonExpedition objects for the current expo.
SurvexPersonRoles
"""
# Many survex blocks have no *team members at all
if not self.flush_persons_pending(survexblock._blockid):
return
if not (expo := self.get_expo_for_block(survexblock)):
print(f" Buggeration fAIL {survexblock=}",file=sys.stderr)
return
# Sanitise the set of names, and validate as valid people
if teamnames := self.flush_persons_pending(survexblock._blockid):
for tm in teamnames:
if known_foreigner(tm):
message = f"- *team '{tm}' known foreigner {survexblock.survexfile.path} ({survexblock})"
print(self.insp + message)
# stash_data_issue(parser='survex', message=message, url=None, sb=survexblock)
else:
pe = GetPersonExpeditionNameLookup(expo).get(tm.lower())
if pe:
self.put_personrole_on_trip(survexblock, pe, tm)
else:
message = f"! *team '{tm}' FAIL personexpedition {survexblock.survexfile.path} ({survexblock}) "
print(self.insp + message)
stash_data_issue(
parser="survex",
message=message,
url=None, sb=survexblock,
)
def save_personroles_to_db(self):
"""This should be run only after all the survexblocks have
been saved to the database and so have _id that can be used as a ForeignKey
"""
for blk in self.trip_team_cache:
# hack_save(survexblock)
# Now commit to db
pr_list = self.trip_team_cache[blk]
# print(f" PR_LIST {pr_list} {blk}", file=sys.stderr)
valid_list = []
for pr in pr_list:
try:
# print(f"___ {pr.survexblock=} {pr.survexblock.id=} {pr.person=} {pr.personexpedition=}", file=sys.stderr)
pr.full_clean()
valid_list.append(pr)
except ValidationError as e:
print(f" ! PR is invalid: {e} {survexblock} {pr}", file=sys.stderr)
print(f" ! PR is invalid: {e} {survexblock} {pr}")
SurvexPersonRole.objects.bulk_create(valid_list)
# for pr in pr_list:
# print(f"+++ {pr.survexblock=} {pr.survexblock.id=} {pr.person=} {pr.personexpedition=}", file=sys.stderr)
# SurvexPersonRole.objects.create(pr).save()
trip_team_cache = {} # in database now, so empty cache
def add_to_pending(self, survexblock, tm):
"""Collects team names before we have a date so cannot validate against
expo attendance yet"""
global person_pending_cache
"""Collects team names. We might not have a date so cannot validate
against expo attendance yet
"""
if survexblock._blockid not in self.person_pending_cache:
self.person_pending_cache[survexblock._blockid] = set()
self.person_pending_cache[survexblock._blockid].add(tm)
print(f"-- person_pending_cache {survexblock}, {self.person_pending_cache[survexblock._blockid]}, {tm}")
if tm not in self.person_pending_cache[survexblock._blockid]:
self.person_pending_cache[survexblock._blockid].add(tm)
# print(f"-- person_pending_cache '{survexblock}' {self.person_pending_cache[survexblock._blockid]} (added {tm})")
def get_team_pending(self, blockid):
"""A set of *team names added at the end of the survex block
def flush_persons_pending(self, blockid):
"""A set of *team names added at the end of the survex block.
Zeros the pending cache as it returns the (unvalidated) names.
"""
if blockid in self.person_pending_cache:
teamnames = self.person_pending_cache[blockid] # a set of names
@@ -583,11 +813,10 @@ class LoadingSurvex:
def get_team_inherited(self, survexblock): # survexblock only used for debug mesgs
"""See get_team_pending(survexblock._blockid) which gets called at the same time,
when we see a *date line"""
global person_pending_cache
if self.inheritteam:
message = (
f"- no *team INHERITING ({survexblock.parent})>({survexblock}) {survexblock.survexfile.path} '{self.inheritteam}'"
f"- no *team on blcok so INHERITING ({survexblock.parent})>({survexblock}) {survexblock.survexfile.path} '{self.inheritteam}'"
)
print(self.insp + message)
# stash_data_issue(
@@ -674,6 +903,17 @@ class LoadingSurvex:
# expoyear = "1976"
return
def get_expo_for_block(self, survexblock):
if expo := survexblock.expedition: # may be None if no *date yet
return expo
if survexblock.date:
expo = Expedition.objects.get(year=str(survexblock.date)[:4])
return expo
if expo := survexblock.parent.expedition: # immediate parent works mostly
print(f" WARNING using parent block expo year {survexblock=}",file=sys.stderr)
return expo
return False
def fix_anonymous(self, survexblock):
"""Called when we reach *end of a block
Checks to see if the block has no team attached, in which case it uses the
@@ -689,24 +929,14 @@ class LoadingSurvex:
if survexblock.parent.name == "troggle_unseens":
# Bolluxed up if we try to inherit from this random junk, so don't.
return
expo = survexblock.expedition # may be None if no *date yet
if not expo:
expo = survexblock.parent.expedition # immediate parent works mostly
if not expo:
return
if not self.currentteam: # i.e. if it is a dated block and has no team
if teamnames := self.get_team_inherited(survexblock):# WALRUS
for tm in teamnames:
personexpedition = GetPersonExpeditionNameLookup(expo).get(tm.lower())
if personexpedition:
put_person_on_trip(survexblock, personexpedition, tm)
self.person_pending_cache[survexblock._blockid] = teamnames
return
def cache_survexblock(self, survexblock):
# appends to list, creates an empty list to append to if it doen't exist yet
self._pending_block_saves.setdefault(survexblock._blockid, []).append(survexblock)
self._pending_block_saves[survexblock._blockid] = survexblock
def LoadSurvexTeam(self, survexblock, line):
"""Interpeting the *team fields has been updated to current 2025 survex standard,
@@ -731,41 +961,13 @@ class LoadingSurvex:
# so we can't validate whether the person was on expo or not.
# we will have to attach them to the survexblock anyway, and then do a
# later check on whether they are valid when we get the date.
# refactor this to collect names before and after a *date, and commit them as
# a bulk update only at the END of the survexblock
if not tm: # i.e. null person inthe *team
if not tm: # i.e. null person in the *team
return # ignore: troggle does not need to know. Survex want to though.
self.currentteam.add(tm) # used in push/pop block code
expo = survexblock.expedition # may be None if no *date yet
self.add_to_pending(survexblock, tm)
if expo:
personexpedition = GetPersonExpeditionNameLookup(expo).get(tm.lower())
if personexpedition:
put_person_on_trip(survexblock, personexpedition, tm)
elif known_foreigner(tm): # note, not using .lower()
message = f"- *team {expo.year} '{tm}' known foreigner on *team {survexblock.survexfile.path} ({survexblock}) in '{line=}'"
print(self.insp + message)
# stash_data_issue(parser='survex', message=message, url=None, sb=survexblock)
else:
# we know the date and expo, but can't find the person
message = f"! *team {expo.year} '{tm}' FAIL personexpedition lookup on *team {survexblock.survexfile.path} ({survexblock}) in '{line=}' {tm=}"
print(self.insp + message)
stash_data_issue(
parser="survex", message=message, url=None, sb=survexblock
)
else:
self.add_to_pending(survexblock, tm)
# don't know the date yet, so cannot query the table about validity.
# assume the person is valid. It will get picked up with the *date appears
# There are hundreds of these..
message = (
f"- Team before Date: {line} ({survexblock}) {survexblock.survexfile.path}"
)
# teamfix = r"(?i)(.*?)\s+" + roles + r"?(?:es|s)?$" -- (.*?) means a non-greedy capture
if fixstyle := self.rx_teamfix.match(line): # matches the optional role at the the end of the string WALRUS
tmlist = fixstyle.group(1).strip('\"') # remove quotes, if present
@@ -973,27 +1175,7 @@ class LoadingSurvex:
stash_data_issue(parser='survex', message=message, url=None, sb=survexblock)
return expo
def process_pending_team(self, survexblock):
expo = survexblock.expedition
if teamnames := self.get_team_pending(survexblock._blockid):
for tm in teamnames:
if known_foreigner(tm):
message = f"- *team {expo.year} '{tm}' known foreigner *date (misordered) {survexblock.survexfile.path} ({survexblock})"
print(self.insp + message)
# stash_data_issue(parser='survex', message=message, url=None, sb=survexblock)
else:
pe = GetPersonExpeditionNameLookup(expo).get(tm.lower())
if pe:
put_person_on_trip(survexblock, pe, tm)
else:
message = f"! *team {expo.year} '{tm}' FAIL personexpedition lookup on *date {survexblock.survexfile.path} ({survexblock}) "
print(self.insp + message)
stash_data_issue(
parser="survex",
message=message,
url=None, sb=survexblock,
)
def LoadSurvexDate(self, survexblock, line):
"""We now have a valid date for this survexblock, so we now know the expo
@@ -1027,12 +1209,7 @@ class LoadingSurvex:
if len(team) > 0:
message = f"! *team {expo.year} Multiple *date in one block? Already someone on team when *date seen. {survexblock.survexfile.path} ({survexblock}) in '{line}'"
print(self.insp + message)
stash_data_issue(parser='survex', message=message, url=None, sb=survexblock)
#self.process_pending_team(survexblock)
stash_data_issue(parser='survex', message=message, url=None, sb=survexblock)
oline = line
perps = get_people_on_trip(survexblock) # perps used for diagnostic error messages only: they are to blame
@@ -1665,7 +1842,7 @@ class LoadingSurvex:
self.currentsurvexfile = newfile
return # abort as everything already done for object creation
newfile.save() # until we do this there is no internal id so no foreign key works
newfile.save() # until we do this there is no internal .id so no foreign key works
self.currentsurvexfile = newfile
newfile.primary = self.set_primary(headpath)
@@ -1924,7 +2101,7 @@ class LoadingSurvex:
nlegstotal = 0
self.relativefilename = path
self._pending_block_saves = {} # Cache for survex blocks to save at the end
# self._pending_block_saves = {} # Cache for survex blocks to save at the end
#self.IdentifyCave(path, svxid, depth) # this will produce null for survex files which are geographic collections
self.currentsurvexfile = survexblock.survexfile
@@ -2024,7 +2201,6 @@ class LoadingSurvex:
def starstatement(star, fullline):
import time
# ...existing code...
"""Interprets a survex comamnd where * is the first character on the line, e.g. *begin"""
nonlocal survexblock
nonlocal blk_name
@@ -2075,8 +2251,9 @@ class LoadingSurvex:
newsurvexblock.title = (
"(" + survexblock.title + ")"
) # copy parent inititally, overwrite if it has its own
self.cache_survexblock(newsurvexblock) # note for later saving in db
survexblock = newsurvexblock
survexblock.save() # Only save once, after all fields are set, or try to delay until *end using caches
# Only save to db once, after all fields are set
tickle()
# ---------------------------END
@@ -2090,10 +2267,8 @@ class LoadingSurvex:
self.fix_undated(survexblock)
self.fix_anonymous(survexblock)
self.confirm_team_on_trip(survexblock)
self.process_pending_team(survexblock)
self.cache_survexblock(survexblock)
# POP state ++++++++++++++
# POP state ++++++++++++++
popblock()
self.inheritteam = self.teaminheritstack.pop()
self.currentteam = self.teamcurrentstack.pop()
@@ -2200,24 +2375,9 @@ class LoadingSurvex:
# At the end of the whole (concatenated) file, save all cached survexblocks using bulk_update
blocks = []
for blockid in self._pending_block_saves:
blocks.append(self._pending_block_saves[blockid])
if blocks:
# valid_blocks = []
# for block in blocks:
# try:
# block.full_clean()
# valid_blocks.append(block)
# except ValidationError as e:
# print(f" ! Block {block} is invalid: {e}", file=sys.stderr)
# print(f" ! Block {block} is invalid: {e}")
try:
BATCH_SIZE = 900
for i in range(0, len(blocks), BATCH_SIZE):
SurvexBlock.objects.bulk_update(blocks[i:i+BATCH_SIZE], ["legsall", "legslength", "parent"])
except Exception as e:
print(f"\n !! Error in bulk_update for survexblocks: {e}", file=sys.stderr)
self.save_survexblocks_to_db()
self.save_personroles_to_db()
def PushdownStackScan(self, survexblock, path, finname, flinear, io_collate):
"""Follows the *include links in all the survex files from the root file (usually 1623.svx)
@@ -2604,6 +2764,7 @@ def FindAndLoadSurvex():
io_collate.write(f";*edulcni {survexfileroot.path}\n")
svx_scan.check_cache_clean()
store_data_issues(svx_scan)
mem1 = get_process_memory()
flinear.write(f"\n - MEM:{mem1:.2f} MB STOP {survexfileroot.path}\n")
@@ -2721,6 +2882,7 @@ def FindAndLoadSurvex():
io_collate.write(f";*edulcni {UNSEENS}\n")
omit_scan.check_cache_clean()
store_data_issues(omit_scan)
mem1 = get_process_memory()
flinear.write(f"\n - MEM:{mem1:.2f} MB STOP {UNSEENS} Unseen Oddments\n")
@@ -3010,6 +3172,7 @@ def parse_one_file(fpath): # --------------------------------------in progress--
if len(sbs)<1:
print(f" ! No survex blocks found. Parser failure...")
for sb in sbs:
print(f" - {sb.id} re-setting survex block parent {sb=}", file=sys.stderr)
print(f" - {sb.id} re-setting survex block parent {sb=}")
sb.parent = existingparent # should be all the same
sb.save()
@@ -3227,7 +3390,7 @@ def LoadSurvexBlocks():
# duration = time.time() - start
# print(f" - TIME: {duration:7.2f} s", file=sys.stderr)
store_data_issues()
# duration = time.time() - start
# print(f" - TIME: {duration:7.2f} s", file=sys.stderr)
if dup_includes > 0: