mirror of
https://expo.survex.com/repositories/troggle/.git
synced 2026-02-08 04:27:45 +00:00
primary key now UUID on SurvexBlock
This commit is contained in:
@@ -260,7 +260,15 @@ class QM(TroggleModel):
|
||||
)
|
||||
grade = models.CharField(max_length=1, blank=True, null=True, help_text="A/B/C/D/X")
|
||||
cave = models.ForeignKey("Cave", related_name="QMs", blank=True, null=True, on_delete=models.SET_NULL)
|
||||
block = models.ForeignKey("SurvexBlock", null=True, on_delete=models.SET_NULL) # only for QMs from survex files
|
||||
|
||||
# only for QMs from survex files
|
||||
block = models.ForeignKey(
|
||||
"SurvexBlock",
|
||||
to_field="_blockid", # Explicitly point to the UUID field
|
||||
null=True,
|
||||
on_delete=models.SET_NULL
|
||||
)
|
||||
# block = models.ForeignKey("SurvexBlock", null=True, on_delete=models.SET_NULL)
|
||||
blockname = models.TextField(blank=True, null=True) # NB truncated copy of survexblock name with last char added
|
||||
expoyear = models.CharField(max_length=4, blank=True, null=True)
|
||||
ticked = models.BooleanField(default=False)
|
||||
|
||||
@@ -15,7 +15,6 @@ from troggle.core.utils import height_from_utm, throw
|
||||
|
||||
class SurvexFile(models.Model):
|
||||
path = models.CharField(max_length=200)
|
||||
#survexdirectory = models.ForeignKey("SurvexDirectory", blank=True, null=True, on_delete=models.SET_NULL)
|
||||
primary = models.ForeignKey(
|
||||
"SurvexFile", related_name="primarysurvex", blank=True, null=True, on_delete=models.SET_NULL
|
||||
)
|
||||
@@ -222,7 +221,8 @@ class SurvexBlock(models.Model):
|
||||
# This ID is generated as soon as you call SurvexBlock((). So we can use it while assembling the data
|
||||
# into the survexblock without having to keep doing a database transaction
|
||||
_blockid = models.UUIDField(
|
||||
primary_key=False,
|
||||
primary_key=True,
|
||||
unique=True,
|
||||
default=uuid.uuid4,
|
||||
editable=False
|
||||
)
|
||||
@@ -249,10 +249,10 @@ class SurvexBlock(models.Model):
|
||||
foreigners = models.BooleanField(default=False)
|
||||
|
||||
class Meta:
|
||||
ordering = ("id",)
|
||||
ordering = ("_blockid",)
|
||||
|
||||
def __str__(self):
|
||||
return self.name and str(self.name) or "no_name-#" + str(self.id)
|
||||
return self.name and str(self.name) or "no_name-#" + str(self.pk) #pk is primary key
|
||||
|
||||
def isSurvexBlock(self): # Function used in templates
|
||||
return True
|
||||
|
||||
@@ -6,6 +6,7 @@ import re
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from collections import OrderedDict
|
||||
from datetime import date, datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
@@ -57,6 +58,7 @@ survexblockroot = None
|
||||
ROOTBLOCK = "rootblock"
|
||||
METRESINFEET = 3.28084
|
||||
UNSEENS = "_unseens.svx"
|
||||
BATCH_SIZE = 900 # limit for terms in SQL expressions for sqlite
|
||||
|
||||
IGNOREFILES = ["dummy_file", "_dummy_file"]
|
||||
IGNOREPREFIX = ["surface", "kataster", "gpx", "deprecated"] #"fixedpts",
|
||||
@@ -135,35 +137,42 @@ def stash_data_issue(parser=None, message=None, url=None, sb=None):
|
||||
"""Avoid hitting the database for error messages until the end of the import
|
||||
|
||||
use a set, we do not want identically duplicate issues
|
||||
BUT we have to use the sb UUID not the sb object itself
|
||||
"""
|
||||
global dataissues
|
||||
# try:
|
||||
# if sb:
|
||||
# url2 = get_offending_filename(sb.survexfile.path)
|
||||
# except Exception as e:
|
||||
# print(f" ! stash_data_issue() '{e}' '{sb=}' -- '{url=}'", file=sys.stderr)
|
||||
# raise
|
||||
dataissues.add((parser, message, url, sb))
|
||||
|
||||
def store_data_issues():
|
||||
if sb:
|
||||
dataissues.add((parser, message, url, sb._blockid))
|
||||
else:
|
||||
dataissues.add((parser, message, url, None))
|
||||
|
||||
def store_data_issues(loadex = None):
|
||||
"""Take the stash and store it permanently in the database instead
|
||||
|
||||
use BULK creation here !"""
|
||||
use BULK creation here !
|
||||
|
||||
chnage to using Class not global stash
|
||||
"""
|
||||
global dataissues
|
||||
print(f" - Storing {len(dataissues)} Data Issues into database")
|
||||
|
||||
# make a list of objects, but don't commit to database yet
|
||||
di_list = []
|
||||
for issue in dataissues:
|
||||
parser, message, url, sb = issue
|
||||
if url is None:
|
||||
if sb is not None:
|
||||
try:
|
||||
url = get_offending_filename(sb.survexfile.path)
|
||||
except Exception as e:
|
||||
print(f" ! store_data_issues() '{e}' '{sb=}' -- '{url=}'", file=sys.stderr)
|
||||
url = get_offending_filename(sb) # assumed to be text
|
||||
di_list.append(DataIssue(parser=parser, message=message, url=url))
|
||||
if not loadex:
|
||||
parser, message, url, _ = issue
|
||||
else:
|
||||
parser, message, url, blkid = issue
|
||||
if blkid:
|
||||
sb = loadex._pending_block_saves[blkid]
|
||||
if url is None:
|
||||
if sb is not None:
|
||||
try:
|
||||
url = get_offending_filename(sb.survexfile.path)
|
||||
except Exception as e:
|
||||
print(f" ! store_data_issues() '{e}' '{sb=}' -- '{url=}'", file=sys.stderr)
|
||||
url = get_offending_filename(sb) # assumed to be text
|
||||
di_list.append(DataIssue(parser=parser, message=message, url=url))
|
||||
# Now commit to db
|
||||
DataIssue.objects.bulk_create(di_list)
|
||||
dataissues = set()
|
||||
@@ -212,39 +221,6 @@ def get_people_on_trip(survexblock):
|
||||
|
||||
return list(set(people))
|
||||
|
||||
# THIS SHOULD NOT BE GLOBAL ! Should be per instance of file loader, even though they are globally unique
|
||||
trip_person_record = {} # a dict indexed by tuples (survexblock, personexpedition) = 1
|
||||
trip_team_cache = {} # a dict of lists indexed by survexblock._blockid
|
||||
def put_person_on_trip(survexblock, personexpedition, tm):
|
||||
"""Uses a cache to avoid a database query if it doesn't need to.
|
||||
Only used for a single person"""
|
||||
global trip_person_record
|
||||
global trip_team_cache
|
||||
|
||||
if (survexblock._blockid, personexpedition) in trip_person_record:
|
||||
return True
|
||||
|
||||
try:
|
||||
personrole = SurvexPersonRole( # does not commit to db yet
|
||||
survexblock=survexblock,
|
||||
person = personexpedition.person,
|
||||
personexpedition=personexpedition,
|
||||
personname=tm
|
||||
)
|
||||
except:
|
||||
message = f"! *team '{tm}' FAIL, already created {survexblock.survexfile.path} ({survexblock}) "
|
||||
print(self.insp + message)
|
||||
stash_data_issue(
|
||||
parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
|
||||
)
|
||||
|
||||
if survexblock._blockid not in trip_team_cache:
|
||||
trip_team_cache[survexblock._blockid] = []
|
||||
trip_team_cache[survexblock._blockid].append(personrole)
|
||||
# print(f"-- trip_team_cache\n -- {survexblock=} - {survexblock._blockid}\n -- {trip_team_cache[survexblock._blockid]}\n -- {personrole}", file=sys.stderr)
|
||||
|
||||
trip_person_record[(survexblock._blockid, personexpedition)] = 1
|
||||
return False
|
||||
|
||||
def hack_save(survexblock):
|
||||
# #### Horrible hack to be properly written as a cache
|
||||
@@ -482,6 +458,7 @@ class LoadingSurvex:
|
||||
pending = []
|
||||
adhocload = False
|
||||
person_pending_cache = {} # indexed per survexblock UUID, so robust wrt PUSH/POP begin/end
|
||||
_pending_block_saves = OrderedDict() # not {}, retain topological sort order
|
||||
|
||||
|
||||
def __init__(self):
|
||||
@@ -511,60 +488,313 @@ class LoadingSurvex:
|
||||
parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
|
||||
)
|
||||
|
||||
def confirm_team_on_trip(self, survexblock):
|
||||
"""This is only called when processing a *end statement
|
||||
|
||||
|
||||
def save_survexblocks_to_db(self):
|
||||
"""This saves the in-memory python objects into the database, at which point
|
||||
the foreign keys are enabled and one can do queries on the database.
|
||||
|
||||
The sequence of survex blocks is constructed from the *include links,
|
||||
depth first, so the list iof survex blocks is topologically sorted.
|
||||
HOWEVER what matters to the .parent links is the topological sorting
|
||||
of the *begin/*end inclusions, which may or may not match the *include sort
|
||||
sequence. Yuk.
|
||||
"""
|
||||
global trip_team_cache
|
||||
|
||||
if survexblock._blockid not in trip_team_cache:
|
||||
return
|
||||
#### STRIP THIS OUT and cache the SurvexPersonRole for the end of the survex block !
|
||||
hack_save(survexblock)
|
||||
def get_toposorted_blocks(blocks):
|
||||
"""This is a depth-first recursive topological sort that ensures that when a survexblock
|
||||
has a parent, that parent always appears earlier in the list.
|
||||
"""
|
||||
|
||||
# 1. Map IDs to objects for quick lookup
|
||||
id_map = {b._blockid: b for b in blocks}
|
||||
topo_sorted_list = []
|
||||
visited = set()
|
||||
|
||||
def visit(block):
|
||||
# If we've already added this ID or it's None, skip
|
||||
if block is None or block._blockid in visited:
|
||||
return
|
||||
|
||||
# 2. Get the parent object
|
||||
# If .parent is an object, we use its ID.
|
||||
# If .parent is already an ID, we use it directly.
|
||||
parent_val = block.parent
|
||||
|
||||
# This line of code is "safety net." It ensures that no matter how
|
||||
# the parent data is stored, we always end up with a UUID string
|
||||
# rather than a Python object.
|
||||
# getattr(object, 'attribute_name', default_value).
|
||||
parent_id = getattr(parent_val, '_blockid', parent_val)
|
||||
|
||||
# 3. Recursive step: Visit the parent first
|
||||
if parent_id in id_map:
|
||||
visit(id_map[parent_id])
|
||||
|
||||
# 4. Add current block to results
|
||||
visited.add(block._blockid)
|
||||
topo_sorted_list.append(block)
|
||||
|
||||
for b in blocks:
|
||||
visit(b)
|
||||
|
||||
return topo_sorted_list
|
||||
|
||||
def get_generational_chunks(sorted_blocks):
|
||||
"""
|
||||
Splits a topologically sorted list into chunks where no child
|
||||
exists in the same chunk as its parent.
|
||||
"""
|
||||
chunks = []
|
||||
# Track which IDs are already "saved" (in a previous chunk)
|
||||
saved_ids = set()
|
||||
|
||||
# current_batch will hold blocks for the current "generation"
|
||||
current_batch = []
|
||||
|
||||
for block in sorted_blocks:
|
||||
parent_id = getattr(block.parent, '_blockid', block.parent)
|
||||
|
||||
# If the parent is not yet 'saved', this block MUST
|
||||
# go into a future batch.
|
||||
if parent_id and parent_id not in saved_ids:
|
||||
# Finish the current chunk and start a new one
|
||||
if current_batch:
|
||||
chunks.append(current_batch)
|
||||
# Mark everything in the finished batch as 'saved'
|
||||
saved_ids.update(b._blockid for b in current_batch)
|
||||
current_batch = []
|
||||
|
||||
current_batch.append(block)
|
||||
|
||||
# Safety: Even if there are no dependencies, respect the BATCH_SIZE
|
||||
if len(current_batch) >= BATCH_SIZE:
|
||||
chunks.append(current_batch)
|
||||
saved_ids.update(b._blockid for b in current_batch)
|
||||
current_batch = []
|
||||
|
||||
# Add the final trailing batch
|
||||
if current_batch:
|
||||
chunks.append(current_batch)
|
||||
|
||||
return chunks
|
||||
def get_generational_chunks_optimized(blocks):
|
||||
"""
|
||||
Splits a topologically sorted list into chunks where no child
|
||||
exists in the same chunk as its parent.
|
||||
Optimized for a shallow tree.
|
||||
"""
|
||||
|
||||
# 1. Map IDs to objects for quick lookup
|
||||
id_map = {b._blockid: b for b in blocks}
|
||||
|
||||
# 2. Dictionary to store the level (depth) of each block
|
||||
# Level 0 = Root, Level 1 = Child of Root, etc.
|
||||
levels = {}
|
||||
|
||||
def get_level(block):
|
||||
if block._blockid in levels:
|
||||
return levels[block._blockid]
|
||||
|
||||
parent_id = getattr(block.parent, '_blockid', block.parent)
|
||||
|
||||
# If no parent OR parent is not in our current batch, it's a Root (Level 0)
|
||||
if not parent_id or parent_id not in id_map:
|
||||
levels[block._blockid] = 0
|
||||
return 0
|
||||
|
||||
# Otherwise, level is Parent's Level + 1
|
||||
level = get_level(id_map[parent_id]) + 1
|
||||
levels[block._blockid] = level
|
||||
return level
|
||||
|
||||
# Calculate levels for everyone
|
||||
for b in blocks:
|
||||
get_level(b)
|
||||
|
||||
# 3. Group blocks by their level
|
||||
from collections import defaultdict
|
||||
generational_groups = defaultdict(list)
|
||||
for b in blocks:
|
||||
generational_groups[levels[b._blockid]].append(b)
|
||||
|
||||
# 4. Final step: Split each level into batches of 900
|
||||
final_chunks = []
|
||||
for level in sorted(generational_groups.keys()):
|
||||
level_blocks = generational_groups[level]
|
||||
# Standard list slicing to split into BATCH_SIZE
|
||||
for i in range(0, len(level_blocks), BATCH_SIZE):
|
||||
final_chunks.append(level_blocks[i:i + BATCH_SIZE])
|
||||
|
||||
return final_chunks
|
||||
|
||||
# construct the list.
|
||||
already_saved_blocks = set(SurvexBlock.objects.values_list('_blockid', flat=True))
|
||||
blocks = []
|
||||
for blockid in self._pending_block_saves:
|
||||
blocks.append(self._pending_block_saves[blockid])
|
||||
if blocks:
|
||||
# valid_blocks = []
|
||||
# bad_parents = 0
|
||||
# for block in blocks:
|
||||
# try:
|
||||
# if block.parent:
|
||||
# if block.parent not in already_saved_blocks:
|
||||
# bad_parents += 1
|
||||
# # print(f" Invalid parent id: {block.survexfile}::{block} -> {block.parent}", file=sys.stderr)
|
||||
# # block.full_clean()
|
||||
# valid_blocks.append(block)
|
||||
# except ValidationError as e:
|
||||
# print(f" ! Block {block} is invalid: {e}", file=sys.stderr)
|
||||
# print(f" ! Block {block} is invalid: {e}")
|
||||
# print(f"\n !! {bad_parents} as-yet invalid parent ids out of {len(blocks)} blocks. {len(valid_blocks)} valid blocks", file=sys.stderr)
|
||||
|
||||
topo_list = get_toposorted_blocks(blocks)
|
||||
print(f"\n !! {len(topo_list)=} blocks. {len(blocks)=}", file=sys.stderr)
|
||||
|
||||
safe_chunks = get_generational_chunks_optimized(topo_list)
|
||||
|
||||
|
||||
# Now commit to db
|
||||
pr_list = trip_team_cache[survexblock._blockid]
|
||||
# print(f" PR_LIST {pr_list} {survexblock._blockid }", file=sys.stderr)
|
||||
valid_list = []
|
||||
for pr in pr_list:
|
||||
try:
|
||||
# print(f"___ {pr.survexblock=} {pr.survexblock.id=} {pr.person=} {pr.personexpedition=}", file=sys.stderr)
|
||||
pr.full_clean()
|
||||
valid_list.append(pr)
|
||||
except ValidationError as e:
|
||||
print(f" ! PR is invalid: {e} {survexblock} {pr}", file=sys.stderr)
|
||||
print(f" ! PR is invalid: {e} {survexblock} {pr}")
|
||||
for i, chunk in enumerate(safe_chunks):
|
||||
print(f"Saving Chunk {i+1} ({len(chunk)} blocks)...", file=sys.stderr)
|
||||
SurvexBlock.objects.bulk_create(
|
||||
chunk,
|
||||
update_conflicts=True, # root item probably exists already
|
||||
# update_fields needed if we allow conflict update
|
||||
update_fields=['name', 'title', 'parent', 'date',
|
||||
'expedition', 'survexfile', 'scanswallet', 'legsall', 'legslength', 'foreigners',],
|
||||
unique_fields=['_blockid']
|
||||
)
|
||||
print("Success: Entire tree saved.", file=sys.stderr)
|
||||
except Exception as e:
|
||||
print(f"Failed at chunk {i+1}: {e}", file=sys.stderr)
|
||||
|
||||
return
|
||||
|
||||
try:
|
||||
for i in range(0, len(blocks), 1):
|
||||
valid_blocks[i].save()
|
||||
except Exception as e:
|
||||
print(f" !! Error in SINGLE create for survexblocks at {i}: {e}", file=sys.stderr)
|
||||
return
|
||||
# Nope, even topo-sorted, we can't know what a batch size is suitable
|
||||
# without some of the ietms being invalid
|
||||
try:
|
||||
for i in range(0, len(topo_list), BATCH_SIZE):
|
||||
SurvexBlock.objects.bulk_create(topo_list[i:i+BATCH_SIZE])
|
||||
except Exception as e:
|
||||
print(f" !! Error in bulk_create for survexblocks at {i}: {e}", file=sys.stderr)
|
||||
|
||||
trip_team_cache = {} # a dict of lists indexed by survexblock._blockid
|
||||
def put_personrole_on_trip(self, survexblock, personexpedition, tm):
|
||||
"""
|
||||
Only used for a single person.
|
||||
Creates a SurvexPersonRole object, but this is not committed to the database until
|
||||
all the survexblocks have been saved.
|
||||
"""
|
||||
|
||||
try:
|
||||
personrole = SurvexPersonRole( # does not commit to db yet
|
||||
survexblock=survexblock, # survexblock has no _id yet
|
||||
person = personexpedition.person,
|
||||
personexpedition=personexpedition,
|
||||
personname=tm
|
||||
)
|
||||
except:
|
||||
message = f"! *team '{tm}' FAIL, already created {survexblock.survexfile.path} ({survexblock}) "
|
||||
print(self.insp + message)
|
||||
stash_data_issue(
|
||||
parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
|
||||
)
|
||||
|
||||
if survexblock._blockid not in self.trip_team_cache:
|
||||
self.trip_team_cache[survexblock._blockid] = []
|
||||
self.trip_team_cache[survexblock._blockid].append(personrole)
|
||||
# print(f"-- trip_team_cache\n -- {survexblock=} - {survexblock._blockid}\n -- {trip_team_cache[survexblock._blockid]}\n -- {personrole}", file=sys.stderr)
|
||||
|
||||
return False
|
||||
|
||||
def process_pending_team(self, survexblock):
|
||||
"""This is only called when processing a *end statement
|
||||
|
||||
It converts a list of names as strings into a list of valid
|
||||
PersonExpedition objects for the current expo.
|
||||
|
||||
SurvexPersonRoles
|
||||
"""
|
||||
|
||||
# Many survex blocks have no *team members at all
|
||||
if not self.flush_persons_pending(survexblock._blockid):
|
||||
return
|
||||
|
||||
if not (expo := self.get_expo_for_block(survexblock)):
|
||||
print(f" Buggeration fAIL {survexblock=}",file=sys.stderr)
|
||||
return
|
||||
|
||||
# Sanitise the set of names, and validate as valid people
|
||||
if teamnames := self.flush_persons_pending(survexblock._blockid):
|
||||
for tm in teamnames:
|
||||
if known_foreigner(tm):
|
||||
message = f"- *team '{tm}' known foreigner {survexblock.survexfile.path} ({survexblock})"
|
||||
print(self.insp + message)
|
||||
# stash_data_issue(parser='survex', message=message, url=None, sb=survexblock)
|
||||
else:
|
||||
pe = GetPersonExpeditionNameLookup(expo).get(tm.lower())
|
||||
if pe:
|
||||
self.put_personrole_on_trip(survexblock, pe, tm)
|
||||
|
||||
else:
|
||||
message = f"! *team '{tm}' FAIL personexpedition {survexblock.survexfile.path} ({survexblock}) "
|
||||
print(self.insp + message)
|
||||
stash_data_issue(
|
||||
parser="survex",
|
||||
message=message,
|
||||
url=None, sb=survexblock,
|
||||
)
|
||||
|
||||
def save_personroles_to_db(self):
|
||||
"""This should be run only after all the survexblocks have
|
||||
been saved to the database and so have _id that can be used as a ForeignKey
|
||||
"""
|
||||
for blk in self.trip_team_cache:
|
||||
# hack_save(survexblock)
|
||||
|
||||
# Now commit to db
|
||||
pr_list = self.trip_team_cache[blk]
|
||||
# print(f" PR_LIST {pr_list} {blk}", file=sys.stderr)
|
||||
valid_list = []
|
||||
for pr in pr_list:
|
||||
try:
|
||||
# print(f"___ {pr.survexblock=} {pr.survexblock.id=} {pr.person=} {pr.personexpedition=}", file=sys.stderr)
|
||||
pr.full_clean()
|
||||
valid_list.append(pr)
|
||||
except ValidationError as e:
|
||||
print(f" ! PR is invalid: {e} {survexblock} {pr}", file=sys.stderr)
|
||||
print(f" ! PR is invalid: {e} {survexblock} {pr}")
|
||||
|
||||
|
||||
SurvexPersonRole.objects.bulk_create(valid_list)
|
||||
# for pr in pr_list:
|
||||
# print(f"+++ {pr.survexblock=} {pr.survexblock.id=} {pr.person=} {pr.personexpedition=}", file=sys.stderr)
|
||||
# SurvexPersonRole.objects.create(pr).save()
|
||||
SurvexPersonRole.objects.bulk_create(valid_list)
|
||||
# for pr in pr_list:
|
||||
# print(f"+++ {pr.survexblock=} {pr.survexblock.id=} {pr.person=} {pr.personexpedition=}", file=sys.stderr)
|
||||
# SurvexPersonRole.objects.create(pr).save()
|
||||
|
||||
# Not working, so do not clear cache!
|
||||
trip_team_cache[survexblock] = [] # in database now, so empty cache
|
||||
|
||||
def check_team_cache(self, label=None):
|
||||
global trip_team_cache
|
||||
message = f"! check_team_cache() called.. "
|
||||
print(message)
|
||||
print(message, file=sys.stderr)
|
||||
for block in trip_team_cache:
|
||||
message = f"! *team CACHEFAIL, trip_team_cache {block.survexfile.path} ({block}). label:{label}"
|
||||
print(message)
|
||||
print(message, file=sys.stderr)
|
||||
trip_team_cache = {} # in database now, so empty cache
|
||||
|
||||
def add_to_pending(self, survexblock, tm):
|
||||
"""Collects team names before we have a date so cannot validate against
|
||||
expo attendance yet"""
|
||||
global person_pending_cache
|
||||
"""Collects team names. We might not have a date so cannot validate
|
||||
against expo attendance yet
|
||||
"""
|
||||
|
||||
if survexblock._blockid not in self.person_pending_cache:
|
||||
self.person_pending_cache[survexblock._blockid] = set()
|
||||
self.person_pending_cache[survexblock._blockid].add(tm)
|
||||
print(f"-- person_pending_cache {survexblock}, {self.person_pending_cache[survexblock._blockid]}, {tm}")
|
||||
if tm not in self.person_pending_cache[survexblock._blockid]:
|
||||
self.person_pending_cache[survexblock._blockid].add(tm)
|
||||
# print(f"-- person_pending_cache '{survexblock}' {self.person_pending_cache[survexblock._blockid]} (added {tm})")
|
||||
|
||||
def get_team_pending(self, blockid):
|
||||
"""A set of *team names added at the end of the survex block
|
||||
def flush_persons_pending(self, blockid):
|
||||
"""A set of *team names added at the end of the survex block.
|
||||
Zeros the pending cache as it returns the (unvalidated) names.
|
||||
"""
|
||||
if blockid in self.person_pending_cache:
|
||||
teamnames = self.person_pending_cache[blockid] # a set of names
|
||||
@@ -583,11 +813,10 @@ class LoadingSurvex:
|
||||
def get_team_inherited(self, survexblock): # survexblock only used for debug mesgs
|
||||
"""See get_team_pending(survexblock._blockid) which gets called at the same time,
|
||||
when we see a *date line"""
|
||||
global person_pending_cache
|
||||
|
||||
if self.inheritteam:
|
||||
message = (
|
||||
f"- no *team INHERITING ({survexblock.parent})>({survexblock}) {survexblock.survexfile.path} '{self.inheritteam}'"
|
||||
f"- no *team on blcok so INHERITING ({survexblock.parent})>({survexblock}) {survexblock.survexfile.path} '{self.inheritteam}'"
|
||||
)
|
||||
print(self.insp + message)
|
||||
# stash_data_issue(
|
||||
@@ -674,6 +903,17 @@ class LoadingSurvex:
|
||||
# expoyear = "1976"
|
||||
return
|
||||
|
||||
def get_expo_for_block(self, survexblock):
|
||||
if expo := survexblock.expedition: # may be None if no *date yet
|
||||
return expo
|
||||
if survexblock.date:
|
||||
expo = Expedition.objects.get(year=str(survexblock.date)[:4])
|
||||
return expo
|
||||
if expo := survexblock.parent.expedition: # immediate parent works mostly
|
||||
print(f" WARNING using parent block expo year {survexblock=}",file=sys.stderr)
|
||||
return expo
|
||||
return False
|
||||
|
||||
def fix_anonymous(self, survexblock):
|
||||
"""Called when we reach *end of a block
|
||||
Checks to see if the block has no team attached, in which case it uses the
|
||||
@@ -690,23 +930,13 @@ class LoadingSurvex:
|
||||
# Bolluxed up if we try to inherit from this random junk, so don't.
|
||||
return
|
||||
|
||||
expo = survexblock.expedition # may be None if no *date yet
|
||||
if not expo:
|
||||
expo = survexblock.parent.expedition # immediate parent works mostly
|
||||
if not expo:
|
||||
return
|
||||
|
||||
if not self.currentteam: # i.e. if it is a dated block and has no team
|
||||
if teamnames := self.get_team_inherited(survexblock):# WALRUS
|
||||
for tm in teamnames:
|
||||
personexpedition = GetPersonExpeditionNameLookup(expo).get(tm.lower())
|
||||
if personexpedition:
|
||||
put_person_on_trip(survexblock, personexpedition, tm)
|
||||
self.person_pending_cache[survexblock._blockid] = teamnames
|
||||
return
|
||||
|
||||
def cache_survexblock(self, survexblock):
|
||||
# appends to list, creates an empty list to append to if it doen't exist yet
|
||||
self._pending_block_saves.setdefault(survexblock._blockid, []).append(survexblock)
|
||||
self._pending_block_saves[survexblock._blockid] = survexblock
|
||||
|
||||
def LoadSurvexTeam(self, survexblock, line):
|
||||
"""Interpeting the *team fields has been updated to current 2025 survex standard,
|
||||
@@ -732,39 +962,11 @@ class LoadingSurvex:
|
||||
# we will have to attach them to the survexblock anyway, and then do a
|
||||
# later check on whether they are valid when we get the date.
|
||||
|
||||
# refactor this to collect names before and after a *date, and commit them as
|
||||
# a bulk update only at the END of the survexblock
|
||||
|
||||
if not tm: # i.e. null person inthe *team
|
||||
if not tm: # i.e. null person in the *team
|
||||
return # ignore: troggle does not need to know. Survex want to though.
|
||||
|
||||
self.currentteam.add(tm) # used in push/pop block code
|
||||
expo = survexblock.expedition # may be None if no *date yet
|
||||
|
||||
if expo:
|
||||
personexpedition = GetPersonExpeditionNameLookup(expo).get(tm.lower())
|
||||
if personexpedition:
|
||||
put_person_on_trip(survexblock, personexpedition, tm)
|
||||
|
||||
elif known_foreigner(tm): # note, not using .lower()
|
||||
message = f"- *team {expo.year} '{tm}' known foreigner on *team {survexblock.survexfile.path} ({survexblock}) in '{line=}'"
|
||||
print(self.insp + message)
|
||||
# stash_data_issue(parser='survex', message=message, url=None, sb=survexblock)
|
||||
else:
|
||||
# we know the date and expo, but can't find the person
|
||||
message = f"! *team {expo.year} '{tm}' FAIL personexpedition lookup on *team {survexblock.survexfile.path} ({survexblock}) in '{line=}' {tm=}"
|
||||
print(self.insp + message)
|
||||
stash_data_issue(
|
||||
parser="survex", message=message, url=None, sb=survexblock
|
||||
)
|
||||
else:
|
||||
self.add_to_pending(survexblock, tm)
|
||||
# don't know the date yet, so cannot query the table about validity.
|
||||
# assume the person is valid. It will get picked up with the *date appears
|
||||
# There are hundreds of these..
|
||||
message = (
|
||||
f"- Team before Date: {line} ({survexblock}) {survexblock.survexfile.path}"
|
||||
)
|
||||
self.add_to_pending(survexblock, tm)
|
||||
|
||||
# teamfix = r"(?i)(.*?)\s+" + roles + r"?(?:es|s)?$" -- (.*?) means a non-greedy capture
|
||||
if fixstyle := self.rx_teamfix.match(line): # matches the optional role at the the end of the string WALRUS
|
||||
@@ -973,27 +1175,7 @@ class LoadingSurvex:
|
||||
stash_data_issue(parser='survex', message=message, url=None, sb=survexblock)
|
||||
return expo
|
||||
|
||||
def process_pending_team(self, survexblock):
|
||||
expo = survexblock.expedition
|
||||
if teamnames := self.get_team_pending(survexblock._blockid):
|
||||
for tm in teamnames:
|
||||
if known_foreigner(tm):
|
||||
message = f"- *team {expo.year} '{tm}' known foreigner *date (misordered) {survexblock.survexfile.path} ({survexblock})"
|
||||
print(self.insp + message)
|
||||
# stash_data_issue(parser='survex', message=message, url=None, sb=survexblock)
|
||||
else:
|
||||
pe = GetPersonExpeditionNameLookup(expo).get(tm.lower())
|
||||
if pe:
|
||||
put_person_on_trip(survexblock, pe, tm)
|
||||
|
||||
else:
|
||||
message = f"! *team {expo.year} '{tm}' FAIL personexpedition lookup on *date {survexblock.survexfile.path} ({survexblock}) "
|
||||
print(self.insp + message)
|
||||
stash_data_issue(
|
||||
parser="survex",
|
||||
message=message,
|
||||
url=None, sb=survexblock,
|
||||
)
|
||||
|
||||
def LoadSurvexDate(self, survexblock, line):
|
||||
"""We now have a valid date for this survexblock, so we now know the expo
|
||||
@@ -1028,11 +1210,6 @@ class LoadingSurvex:
|
||||
message = f"! *team {expo.year} Multiple *date in one block? Already someone on team when *date seen. {survexblock.survexfile.path} ({survexblock}) in '{line}'"
|
||||
print(self.insp + message)
|
||||
stash_data_issue(parser='survex', message=message, url=None, sb=survexblock)
|
||||
|
||||
#self.process_pending_team(survexblock)
|
||||
|
||||
|
||||
|
||||
oline = line
|
||||
perps = get_people_on_trip(survexblock) # perps used for diagnostic error messages only: they are to blame
|
||||
|
||||
@@ -1665,7 +1842,7 @@ class LoadingSurvex:
|
||||
self.currentsurvexfile = newfile
|
||||
return # abort as everything already done for object creation
|
||||
|
||||
newfile.save() # until we do this there is no internal id so no foreign key works
|
||||
newfile.save() # until we do this there is no internal .id so no foreign key works
|
||||
self.currentsurvexfile = newfile
|
||||
newfile.primary = self.set_primary(headpath)
|
||||
|
||||
@@ -1924,7 +2101,7 @@ class LoadingSurvex:
|
||||
nlegstotal = 0
|
||||
self.relativefilename = path
|
||||
|
||||
self._pending_block_saves = {} # Cache for survex blocks to save at the end
|
||||
# self._pending_block_saves = {} # Cache for survex blocks to save at the end
|
||||
#self.IdentifyCave(path, svxid, depth) # this will produce null for survex files which are geographic collections
|
||||
|
||||
self.currentsurvexfile = survexblock.survexfile
|
||||
@@ -2024,7 +2201,6 @@ class LoadingSurvex:
|
||||
|
||||
def starstatement(star, fullline):
|
||||
import time
|
||||
# ...existing code...
|
||||
"""Interprets a survex comamnd where * is the first character on the line, e.g. *begin"""
|
||||
nonlocal survexblock
|
||||
nonlocal blk_name
|
||||
@@ -2075,8 +2251,9 @@ class LoadingSurvex:
|
||||
newsurvexblock.title = (
|
||||
"(" + survexblock.title + ")"
|
||||
) # copy parent inititally, overwrite if it has its own
|
||||
self.cache_survexblock(newsurvexblock) # note for later saving in db
|
||||
survexblock = newsurvexblock
|
||||
survexblock.save() # Only save once, after all fields are set, or try to delay until *end using caches
|
||||
# Only save to db once, after all fields are set
|
||||
tickle()
|
||||
|
||||
# ---------------------------END
|
||||
@@ -2090,10 +2267,8 @@ class LoadingSurvex:
|
||||
|
||||
self.fix_undated(survexblock)
|
||||
self.fix_anonymous(survexblock)
|
||||
self.confirm_team_on_trip(survexblock)
|
||||
self.process_pending_team(survexblock)
|
||||
self.cache_survexblock(survexblock)
|
||||
# POP state ++++++++++++++
|
||||
# POP state ++++++++++++++
|
||||
popblock()
|
||||
self.inheritteam = self.teaminheritstack.pop()
|
||||
self.currentteam = self.teamcurrentstack.pop()
|
||||
@@ -2200,24 +2375,9 @@ class LoadingSurvex:
|
||||
|
||||
|
||||
# At the end of the whole (concatenated) file, save all cached survexblocks using bulk_update
|
||||
blocks = []
|
||||
for blockid in self._pending_block_saves:
|
||||
blocks.append(self._pending_block_saves[blockid])
|
||||
if blocks:
|
||||
# valid_blocks = []
|
||||
# for block in blocks:
|
||||
# try:
|
||||
# block.full_clean()
|
||||
# valid_blocks.append(block)
|
||||
# except ValidationError as e:
|
||||
# print(f" ! Block {block} is invalid: {e}", file=sys.stderr)
|
||||
# print(f" ! Block {block} is invalid: {e}")
|
||||
try:
|
||||
BATCH_SIZE = 900
|
||||
for i in range(0, len(blocks), BATCH_SIZE):
|
||||
SurvexBlock.objects.bulk_update(blocks[i:i+BATCH_SIZE], ["legsall", "legslength", "parent"])
|
||||
except Exception as e:
|
||||
print(f"\n !! Error in bulk_update for survexblocks: {e}", file=sys.stderr)
|
||||
|
||||
self.save_survexblocks_to_db()
|
||||
self.save_personroles_to_db()
|
||||
|
||||
def PushdownStackScan(self, survexblock, path, finname, flinear, io_collate):
|
||||
"""Follows the *include links in all the survex files from the root file (usually 1623.svx)
|
||||
@@ -2604,6 +2764,7 @@ def FindAndLoadSurvex():
|
||||
io_collate.write(f";*edulcni {survexfileroot.path}\n")
|
||||
|
||||
svx_scan.check_cache_clean()
|
||||
store_data_issues(svx_scan)
|
||||
|
||||
mem1 = get_process_memory()
|
||||
flinear.write(f"\n - MEM:{mem1:.2f} MB STOP {survexfileroot.path}\n")
|
||||
@@ -2721,6 +2882,7 @@ def FindAndLoadSurvex():
|
||||
io_collate.write(f";*edulcni {UNSEENS}\n")
|
||||
|
||||
omit_scan.check_cache_clean()
|
||||
store_data_issues(omit_scan)
|
||||
|
||||
mem1 = get_process_memory()
|
||||
flinear.write(f"\n - MEM:{mem1:.2f} MB STOP {UNSEENS} Unseen Oddments\n")
|
||||
@@ -3010,6 +3172,7 @@ def parse_one_file(fpath): # --------------------------------------in progress--
|
||||
if len(sbs)<1:
|
||||
print(f" ! No survex blocks found. Parser failure...")
|
||||
for sb in sbs:
|
||||
print(f" - {sb.id} re-setting survex block parent {sb=}", file=sys.stderr)
|
||||
print(f" - {sb.id} re-setting survex block parent {sb=}")
|
||||
sb.parent = existingparent # should be all the same
|
||||
sb.save()
|
||||
@@ -3227,7 +3390,7 @@ def LoadSurvexBlocks():
|
||||
|
||||
# duration = time.time() - start
|
||||
# print(f" - TIME: {duration:7.2f} s", file=sys.stderr)
|
||||
store_data_issues()
|
||||
|
||||
# duration = time.time() - start
|
||||
# print(f" - TIME: {duration:7.2f} s", file=sys.stderr)
|
||||
if dup_includes > 0:
|
||||
|
||||
Reference in New Issue
Block a user