mirror of
https://expo.survex.com/repositories/troggle/.git
synced 2026-02-08 05:58:45 +00:00
primary key now UUID on SurvexBlock
This commit is contained in:
@@ -260,7 +260,15 @@ class QM(TroggleModel):
|
|||||||
)
|
)
|
||||||
grade = models.CharField(max_length=1, blank=True, null=True, help_text="A/B/C/D/X")
|
grade = models.CharField(max_length=1, blank=True, null=True, help_text="A/B/C/D/X")
|
||||||
cave = models.ForeignKey("Cave", related_name="QMs", blank=True, null=True, on_delete=models.SET_NULL)
|
cave = models.ForeignKey("Cave", related_name="QMs", blank=True, null=True, on_delete=models.SET_NULL)
|
||||||
block = models.ForeignKey("SurvexBlock", null=True, on_delete=models.SET_NULL) # only for QMs from survex files
|
|
||||||
|
# only for QMs from survex files
|
||||||
|
block = models.ForeignKey(
|
||||||
|
"SurvexBlock",
|
||||||
|
to_field="_blockid", # Explicitly point to the UUID field
|
||||||
|
null=True,
|
||||||
|
on_delete=models.SET_NULL
|
||||||
|
)
|
||||||
|
# block = models.ForeignKey("SurvexBlock", null=True, on_delete=models.SET_NULL)
|
||||||
blockname = models.TextField(blank=True, null=True) # NB truncated copy of survexblock name with last char added
|
blockname = models.TextField(blank=True, null=True) # NB truncated copy of survexblock name with last char added
|
||||||
expoyear = models.CharField(max_length=4, blank=True, null=True)
|
expoyear = models.CharField(max_length=4, blank=True, null=True)
|
||||||
ticked = models.BooleanField(default=False)
|
ticked = models.BooleanField(default=False)
|
||||||
|
|||||||
@@ -15,7 +15,6 @@ from troggle.core.utils import height_from_utm, throw
|
|||||||
|
|
||||||
class SurvexFile(models.Model):
|
class SurvexFile(models.Model):
|
||||||
path = models.CharField(max_length=200)
|
path = models.CharField(max_length=200)
|
||||||
#survexdirectory = models.ForeignKey("SurvexDirectory", blank=True, null=True, on_delete=models.SET_NULL)
|
|
||||||
primary = models.ForeignKey(
|
primary = models.ForeignKey(
|
||||||
"SurvexFile", related_name="primarysurvex", blank=True, null=True, on_delete=models.SET_NULL
|
"SurvexFile", related_name="primarysurvex", blank=True, null=True, on_delete=models.SET_NULL
|
||||||
)
|
)
|
||||||
@@ -222,7 +221,8 @@ class SurvexBlock(models.Model):
|
|||||||
# This ID is generated as soon as you call SurvexBlock((). So we can use it while assembling the data
|
# This ID is generated as soon as you call SurvexBlock((). So we can use it while assembling the data
|
||||||
# into the survexblock without having to keep doing a database transaction
|
# into the survexblock without having to keep doing a database transaction
|
||||||
_blockid = models.UUIDField(
|
_blockid = models.UUIDField(
|
||||||
primary_key=False,
|
primary_key=True,
|
||||||
|
unique=True,
|
||||||
default=uuid.uuid4,
|
default=uuid.uuid4,
|
||||||
editable=False
|
editable=False
|
||||||
)
|
)
|
||||||
@@ -249,10 +249,10 @@ class SurvexBlock(models.Model):
|
|||||||
foreigners = models.BooleanField(default=False)
|
foreigners = models.BooleanField(default=False)
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
ordering = ("id",)
|
ordering = ("_blockid",)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return self.name and str(self.name) or "no_name-#" + str(self.id)
|
return self.name and str(self.name) or "no_name-#" + str(self.pk) #pk is primary key
|
||||||
|
|
||||||
def isSurvexBlock(self): # Function used in templates
|
def isSurvexBlock(self): # Function used in templates
|
||||||
return True
|
return True
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import re
|
|||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
|
from collections import OrderedDict
|
||||||
from datetime import date, datetime, timezone
|
from datetime import date, datetime, timezone
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
@@ -57,6 +58,7 @@ survexblockroot = None
|
|||||||
ROOTBLOCK = "rootblock"
|
ROOTBLOCK = "rootblock"
|
||||||
METRESINFEET = 3.28084
|
METRESINFEET = 3.28084
|
||||||
UNSEENS = "_unseens.svx"
|
UNSEENS = "_unseens.svx"
|
||||||
|
BATCH_SIZE = 900 # limit for terms in SQL expressions for sqlite
|
||||||
|
|
||||||
IGNOREFILES = ["dummy_file", "_dummy_file"]
|
IGNOREFILES = ["dummy_file", "_dummy_file"]
|
||||||
IGNOREPREFIX = ["surface", "kataster", "gpx", "deprecated"] #"fixedpts",
|
IGNOREPREFIX = ["surface", "kataster", "gpx", "deprecated"] #"fixedpts",
|
||||||
@@ -135,35 +137,42 @@ def stash_data_issue(parser=None, message=None, url=None, sb=None):
|
|||||||
"""Avoid hitting the database for error messages until the end of the import
|
"""Avoid hitting the database for error messages until the end of the import
|
||||||
|
|
||||||
use a set, we do not want identically duplicate issues
|
use a set, we do not want identically duplicate issues
|
||||||
|
BUT we have to use the sb UUID not the sb object itself
|
||||||
"""
|
"""
|
||||||
global dataissues
|
global dataissues
|
||||||
# try:
|
|
||||||
# if sb:
|
if sb:
|
||||||
# url2 = get_offending_filename(sb.survexfile.path)
|
dataissues.add((parser, message, url, sb._blockid))
|
||||||
# except Exception as e:
|
else:
|
||||||
# print(f" ! stash_data_issue() '{e}' '{sb=}' -- '{url=}'", file=sys.stderr)
|
dataissues.add((parser, message, url, None))
|
||||||
# raise
|
|
||||||
dataissues.add((parser, message, url, sb))
|
|
||||||
|
|
||||||
def store_data_issues():
|
def store_data_issues(loadex = None):
|
||||||
"""Take the stash and store it permanently in the database instead
|
"""Take the stash and store it permanently in the database instead
|
||||||
|
|
||||||
use BULK creation here !"""
|
use BULK creation here !
|
||||||
|
|
||||||
|
chnage to using Class not global stash
|
||||||
|
"""
|
||||||
global dataissues
|
global dataissues
|
||||||
print(f" - Storing {len(dataissues)} Data Issues into database")
|
print(f" - Storing {len(dataissues)} Data Issues into database")
|
||||||
|
|
||||||
# make a list of objects, but don't commit to database yet
|
# make a list of objects, but don't commit to database yet
|
||||||
di_list = []
|
di_list = []
|
||||||
for issue in dataissues:
|
for issue in dataissues:
|
||||||
parser, message, url, sb = issue
|
if not loadex:
|
||||||
if url is None:
|
parser, message, url, _ = issue
|
||||||
if sb is not None:
|
else:
|
||||||
try:
|
parser, message, url, blkid = issue
|
||||||
url = get_offending_filename(sb.survexfile.path)
|
if blkid:
|
||||||
except Exception as e:
|
sb = loadex._pending_block_saves[blkid]
|
||||||
print(f" ! store_data_issues() '{e}' '{sb=}' -- '{url=}'", file=sys.stderr)
|
if url is None:
|
||||||
url = get_offending_filename(sb) # assumed to be text
|
if sb is not None:
|
||||||
di_list.append(DataIssue(parser=parser, message=message, url=url))
|
try:
|
||||||
|
url = get_offending_filename(sb.survexfile.path)
|
||||||
|
except Exception as e:
|
||||||
|
print(f" ! store_data_issues() '{e}' '{sb=}' -- '{url=}'", file=sys.stderr)
|
||||||
|
url = get_offending_filename(sb) # assumed to be text
|
||||||
|
di_list.append(DataIssue(parser=parser, message=message, url=url))
|
||||||
# Now commit to db
|
# Now commit to db
|
||||||
DataIssue.objects.bulk_create(di_list)
|
DataIssue.objects.bulk_create(di_list)
|
||||||
dataissues = set()
|
dataissues = set()
|
||||||
@@ -212,39 +221,6 @@ def get_people_on_trip(survexblock):
|
|||||||
|
|
||||||
return list(set(people))
|
return list(set(people))
|
||||||
|
|
||||||
# THIS SHOULD NOT BE GLOBAL ! Should be per instance of file loader, even though they are globally unique
|
|
||||||
trip_person_record = {} # a dict indexed by tuples (survexblock, personexpedition) = 1
|
|
||||||
trip_team_cache = {} # a dict of lists indexed by survexblock._blockid
|
|
||||||
def put_person_on_trip(survexblock, personexpedition, tm):
|
|
||||||
"""Uses a cache to avoid a database query if it doesn't need to.
|
|
||||||
Only used for a single person"""
|
|
||||||
global trip_person_record
|
|
||||||
global trip_team_cache
|
|
||||||
|
|
||||||
if (survexblock._blockid, personexpedition) in trip_person_record:
|
|
||||||
return True
|
|
||||||
|
|
||||||
try:
|
|
||||||
personrole = SurvexPersonRole( # does not commit to db yet
|
|
||||||
survexblock=survexblock,
|
|
||||||
person = personexpedition.person,
|
|
||||||
personexpedition=personexpedition,
|
|
||||||
personname=tm
|
|
||||||
)
|
|
||||||
except:
|
|
||||||
message = f"! *team '{tm}' FAIL, already created {survexblock.survexfile.path} ({survexblock}) "
|
|
||||||
print(self.insp + message)
|
|
||||||
stash_data_issue(
|
|
||||||
parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
|
|
||||||
)
|
|
||||||
|
|
||||||
if survexblock._blockid not in trip_team_cache:
|
|
||||||
trip_team_cache[survexblock._blockid] = []
|
|
||||||
trip_team_cache[survexblock._blockid].append(personrole)
|
|
||||||
# print(f"-- trip_team_cache\n -- {survexblock=} - {survexblock._blockid}\n -- {trip_team_cache[survexblock._blockid]}\n -- {personrole}", file=sys.stderr)
|
|
||||||
|
|
||||||
trip_person_record[(survexblock._blockid, personexpedition)] = 1
|
|
||||||
return False
|
|
||||||
|
|
||||||
def hack_save(survexblock):
|
def hack_save(survexblock):
|
||||||
# #### Horrible hack to be properly written as a cache
|
# #### Horrible hack to be properly written as a cache
|
||||||
@@ -482,6 +458,7 @@ class LoadingSurvex:
|
|||||||
pending = []
|
pending = []
|
||||||
adhocload = False
|
adhocload = False
|
||||||
person_pending_cache = {} # indexed per survexblock UUID, so robust wrt PUSH/POP begin/end
|
person_pending_cache = {} # indexed per survexblock UUID, so robust wrt PUSH/POP begin/end
|
||||||
|
_pending_block_saves = OrderedDict() # not {}, retain topological sort order
|
||||||
|
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@@ -511,60 +488,313 @@ class LoadingSurvex:
|
|||||||
parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
|
parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
|
||||||
)
|
)
|
||||||
|
|
||||||
def confirm_team_on_trip(self, survexblock):
|
|
||||||
"""This is only called when processing a *end statement
|
|
||||||
|
def save_survexblocks_to_db(self):
|
||||||
|
"""This saves the in-memory python objects into the database, at which point
|
||||||
|
the foreign keys are enabled and one can do queries on the database.
|
||||||
|
|
||||||
|
The sequence of survex blocks is constructed from the *include links,
|
||||||
|
depth first, so the list iof survex blocks is topologically sorted.
|
||||||
|
HOWEVER what matters to the .parent links is the topological sorting
|
||||||
|
of the *begin/*end inclusions, which may or may not match the *include sort
|
||||||
|
sequence. Yuk.
|
||||||
"""
|
"""
|
||||||
global trip_team_cache
|
|
||||||
|
|
||||||
if survexblock._blockid not in trip_team_cache:
|
def get_toposorted_blocks(blocks):
|
||||||
return
|
"""This is a depth-first recursive topological sort that ensures that when a survexblock
|
||||||
#### STRIP THIS OUT and cache the SurvexPersonRole for the end of the survex block !
|
has a parent, that parent always appears earlier in the list.
|
||||||
hack_save(survexblock)
|
"""
|
||||||
|
|
||||||
|
# 1. Map IDs to objects for quick lookup
|
||||||
|
id_map = {b._blockid: b for b in blocks}
|
||||||
|
topo_sorted_list = []
|
||||||
|
visited = set()
|
||||||
|
|
||||||
|
def visit(block):
|
||||||
|
# If we've already added this ID or it's None, skip
|
||||||
|
if block is None or block._blockid in visited:
|
||||||
|
return
|
||||||
|
|
||||||
|
# 2. Get the parent object
|
||||||
|
# If .parent is an object, we use its ID.
|
||||||
|
# If .parent is already an ID, we use it directly.
|
||||||
|
parent_val = block.parent
|
||||||
|
|
||||||
|
# This line of code is "safety net." It ensures that no matter how
|
||||||
|
# the parent data is stored, we always end up with a UUID string
|
||||||
|
# rather than a Python object.
|
||||||
|
# getattr(object, 'attribute_name', default_value).
|
||||||
|
parent_id = getattr(parent_val, '_blockid', parent_val)
|
||||||
|
|
||||||
|
# 3. Recursive step: Visit the parent first
|
||||||
|
if parent_id in id_map:
|
||||||
|
visit(id_map[parent_id])
|
||||||
|
|
||||||
|
# 4. Add current block to results
|
||||||
|
visited.add(block._blockid)
|
||||||
|
topo_sorted_list.append(block)
|
||||||
|
|
||||||
|
for b in blocks:
|
||||||
|
visit(b)
|
||||||
|
|
||||||
|
return topo_sorted_list
|
||||||
|
|
||||||
|
def get_generational_chunks(sorted_blocks):
|
||||||
|
"""
|
||||||
|
Splits a topologically sorted list into chunks where no child
|
||||||
|
exists in the same chunk as its parent.
|
||||||
|
"""
|
||||||
|
chunks = []
|
||||||
|
# Track which IDs are already "saved" (in a previous chunk)
|
||||||
|
saved_ids = set()
|
||||||
|
|
||||||
|
# current_batch will hold blocks for the current "generation"
|
||||||
|
current_batch = []
|
||||||
|
|
||||||
|
for block in sorted_blocks:
|
||||||
|
parent_id = getattr(block.parent, '_blockid', block.parent)
|
||||||
|
|
||||||
|
# If the parent is not yet 'saved', this block MUST
|
||||||
|
# go into a future batch.
|
||||||
|
if parent_id and parent_id not in saved_ids:
|
||||||
|
# Finish the current chunk and start a new one
|
||||||
|
if current_batch:
|
||||||
|
chunks.append(current_batch)
|
||||||
|
# Mark everything in the finished batch as 'saved'
|
||||||
|
saved_ids.update(b._blockid for b in current_batch)
|
||||||
|
current_batch = []
|
||||||
|
|
||||||
|
current_batch.append(block)
|
||||||
|
|
||||||
|
# Safety: Even if there are no dependencies, respect the BATCH_SIZE
|
||||||
|
if len(current_batch) >= BATCH_SIZE:
|
||||||
|
chunks.append(current_batch)
|
||||||
|
saved_ids.update(b._blockid for b in current_batch)
|
||||||
|
current_batch = []
|
||||||
|
|
||||||
|
# Add the final trailing batch
|
||||||
|
if current_batch:
|
||||||
|
chunks.append(current_batch)
|
||||||
|
|
||||||
|
return chunks
|
||||||
|
def get_generational_chunks_optimized(blocks):
|
||||||
|
"""
|
||||||
|
Splits a topologically sorted list into chunks where no child
|
||||||
|
exists in the same chunk as its parent.
|
||||||
|
Optimized for a shallow tree.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# 1. Map IDs to objects for quick lookup
|
||||||
|
id_map = {b._blockid: b for b in blocks}
|
||||||
|
|
||||||
|
# 2. Dictionary to store the level (depth) of each block
|
||||||
|
# Level 0 = Root, Level 1 = Child of Root, etc.
|
||||||
|
levels = {}
|
||||||
|
|
||||||
|
def get_level(block):
|
||||||
|
if block._blockid in levels:
|
||||||
|
return levels[block._blockid]
|
||||||
|
|
||||||
|
parent_id = getattr(block.parent, '_blockid', block.parent)
|
||||||
|
|
||||||
|
# If no parent OR parent is not in our current batch, it's a Root (Level 0)
|
||||||
|
if not parent_id or parent_id not in id_map:
|
||||||
|
levels[block._blockid] = 0
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Otherwise, level is Parent's Level + 1
|
||||||
|
level = get_level(id_map[parent_id]) + 1
|
||||||
|
levels[block._blockid] = level
|
||||||
|
return level
|
||||||
|
|
||||||
|
# Calculate levels for everyone
|
||||||
|
for b in blocks:
|
||||||
|
get_level(b)
|
||||||
|
|
||||||
|
# 3. Group blocks by their level
|
||||||
|
from collections import defaultdict
|
||||||
|
generational_groups = defaultdict(list)
|
||||||
|
for b in blocks:
|
||||||
|
generational_groups[levels[b._blockid]].append(b)
|
||||||
|
|
||||||
|
# 4. Final step: Split each level into batches of 900
|
||||||
|
final_chunks = []
|
||||||
|
for level in sorted(generational_groups.keys()):
|
||||||
|
level_blocks = generational_groups[level]
|
||||||
|
# Standard list slicing to split into BATCH_SIZE
|
||||||
|
for i in range(0, len(level_blocks), BATCH_SIZE):
|
||||||
|
final_chunks.append(level_blocks[i:i + BATCH_SIZE])
|
||||||
|
|
||||||
|
return final_chunks
|
||||||
|
|
||||||
|
# construct the list.
|
||||||
|
already_saved_blocks = set(SurvexBlock.objects.values_list('_blockid', flat=True))
|
||||||
|
blocks = []
|
||||||
|
for blockid in self._pending_block_saves:
|
||||||
|
blocks.append(self._pending_block_saves[blockid])
|
||||||
|
if blocks:
|
||||||
|
# valid_blocks = []
|
||||||
|
# bad_parents = 0
|
||||||
|
# for block in blocks:
|
||||||
|
# try:
|
||||||
|
# if block.parent:
|
||||||
|
# if block.parent not in already_saved_blocks:
|
||||||
|
# bad_parents += 1
|
||||||
|
# # print(f" Invalid parent id: {block.survexfile}::{block} -> {block.parent}", file=sys.stderr)
|
||||||
|
# # block.full_clean()
|
||||||
|
# valid_blocks.append(block)
|
||||||
|
# except ValidationError as e:
|
||||||
|
# print(f" ! Block {block} is invalid: {e}", file=sys.stderr)
|
||||||
|
# print(f" ! Block {block} is invalid: {e}")
|
||||||
|
# print(f"\n !! {bad_parents} as-yet invalid parent ids out of {len(blocks)} blocks. {len(valid_blocks)} valid blocks", file=sys.stderr)
|
||||||
|
|
||||||
|
topo_list = get_toposorted_blocks(blocks)
|
||||||
|
print(f"\n !! {len(topo_list)=} blocks. {len(blocks)=}", file=sys.stderr)
|
||||||
|
|
||||||
|
safe_chunks = get_generational_chunks_optimized(topo_list)
|
||||||
|
|
||||||
|
|
||||||
# Now commit to db
|
|
||||||
pr_list = trip_team_cache[survexblock._blockid]
|
|
||||||
# print(f" PR_LIST {pr_list} {survexblock._blockid }", file=sys.stderr)
|
|
||||||
valid_list = []
|
|
||||||
for pr in pr_list:
|
|
||||||
try:
|
try:
|
||||||
# print(f"___ {pr.survexblock=} {pr.survexblock.id=} {pr.person=} {pr.personexpedition=}", file=sys.stderr)
|
for i, chunk in enumerate(safe_chunks):
|
||||||
pr.full_clean()
|
print(f"Saving Chunk {i+1} ({len(chunk)} blocks)...", file=sys.stderr)
|
||||||
valid_list.append(pr)
|
SurvexBlock.objects.bulk_create(
|
||||||
except ValidationError as e:
|
chunk,
|
||||||
print(f" ! PR is invalid: {e} {survexblock} {pr}", file=sys.stderr)
|
update_conflicts=True, # root item probably exists already
|
||||||
print(f" ! PR is invalid: {e} {survexblock} {pr}")
|
# update_fields needed if we allow conflict update
|
||||||
|
update_fields=['name', 'title', 'parent', 'date',
|
||||||
|
'expedition', 'survexfile', 'scanswallet', 'legsall', 'legslength', 'foreigners',],
|
||||||
|
unique_fields=['_blockid']
|
||||||
|
)
|
||||||
|
print("Success: Entire tree saved.", file=sys.stderr)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Failed at chunk {i+1}: {e}", file=sys.stderr)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
SurvexPersonRole.objects.bulk_create(valid_list)
|
for i in range(0, len(blocks), 1):
|
||||||
# for pr in pr_list:
|
valid_blocks[i].save()
|
||||||
# print(f"+++ {pr.survexblock=} {pr.survexblock.id=} {pr.person=} {pr.personexpedition=}", file=sys.stderr)
|
except Exception as e:
|
||||||
# SurvexPersonRole.objects.create(pr).save()
|
print(f" !! Error in SINGLE create for survexblocks at {i}: {e}", file=sys.stderr)
|
||||||
|
return
|
||||||
|
# Nope, even topo-sorted, we can't know what a batch size is suitable
|
||||||
|
# without some of the ietms being invalid
|
||||||
|
try:
|
||||||
|
for i in range(0, len(topo_list), BATCH_SIZE):
|
||||||
|
SurvexBlock.objects.bulk_create(topo_list[i:i+BATCH_SIZE])
|
||||||
|
except Exception as e:
|
||||||
|
print(f" !! Error in bulk_create for survexblocks at {i}: {e}", file=sys.stderr)
|
||||||
|
|
||||||
|
trip_team_cache = {} # a dict of lists indexed by survexblock._blockid
|
||||||
|
def put_personrole_on_trip(self, survexblock, personexpedition, tm):
|
||||||
|
"""
|
||||||
|
Only used for a single person.
|
||||||
|
Creates a SurvexPersonRole object, but this is not committed to the database until
|
||||||
|
all the survexblocks have been saved.
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
personrole = SurvexPersonRole( # does not commit to db yet
|
||||||
|
survexblock=survexblock, # survexblock has no _id yet
|
||||||
|
person = personexpedition.person,
|
||||||
|
personexpedition=personexpedition,
|
||||||
|
personname=tm
|
||||||
|
)
|
||||||
|
except:
|
||||||
|
message = f"! *team '{tm}' FAIL, already created {survexblock.survexfile.path} ({survexblock}) "
|
||||||
|
print(self.insp + message)
|
||||||
|
stash_data_issue(
|
||||||
|
parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
|
||||||
|
)
|
||||||
|
|
||||||
|
if survexblock._blockid not in self.trip_team_cache:
|
||||||
|
self.trip_team_cache[survexblock._blockid] = []
|
||||||
|
self.trip_team_cache[survexblock._blockid].append(personrole)
|
||||||
|
# print(f"-- trip_team_cache\n -- {survexblock=} - {survexblock._blockid}\n -- {trip_team_cache[survexblock._blockid]}\n -- {personrole}", file=sys.stderr)
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def process_pending_team(self, survexblock):
|
||||||
|
"""This is only called when processing a *end statement
|
||||||
|
|
||||||
# Not working, so do not clear cache!
|
It converts a list of names as strings into a list of valid
|
||||||
trip_team_cache[survexblock] = [] # in database now, so empty cache
|
PersonExpedition objects for the current expo.
|
||||||
|
|
||||||
def check_team_cache(self, label=None):
|
|
||||||
global trip_team_cache
|
|
||||||
message = f"! check_team_cache() called.. "
|
|
||||||
print(message)
|
|
||||||
print(message, file=sys.stderr)
|
|
||||||
for block in trip_team_cache:
|
|
||||||
message = f"! *team CACHEFAIL, trip_team_cache {block.survexfile.path} ({block}). label:{label}"
|
|
||||||
print(message)
|
|
||||||
print(message, file=sys.stderr)
|
|
||||||
|
|
||||||
|
SurvexPersonRoles
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Many survex blocks have no *team members at all
|
||||||
|
if not self.flush_persons_pending(survexblock._blockid):
|
||||||
|
return
|
||||||
|
|
||||||
|
if not (expo := self.get_expo_for_block(survexblock)):
|
||||||
|
print(f" Buggeration fAIL {survexblock=}",file=sys.stderr)
|
||||||
|
return
|
||||||
|
|
||||||
|
# Sanitise the set of names, and validate as valid people
|
||||||
|
if teamnames := self.flush_persons_pending(survexblock._blockid):
|
||||||
|
for tm in teamnames:
|
||||||
|
if known_foreigner(tm):
|
||||||
|
message = f"- *team '{tm}' known foreigner {survexblock.survexfile.path} ({survexblock})"
|
||||||
|
print(self.insp + message)
|
||||||
|
# stash_data_issue(parser='survex', message=message, url=None, sb=survexblock)
|
||||||
|
else:
|
||||||
|
pe = GetPersonExpeditionNameLookup(expo).get(tm.lower())
|
||||||
|
if pe:
|
||||||
|
self.put_personrole_on_trip(survexblock, pe, tm)
|
||||||
|
|
||||||
|
else:
|
||||||
|
message = f"! *team '{tm}' FAIL personexpedition {survexblock.survexfile.path} ({survexblock}) "
|
||||||
|
print(self.insp + message)
|
||||||
|
stash_data_issue(
|
||||||
|
parser="survex",
|
||||||
|
message=message,
|
||||||
|
url=None, sb=survexblock,
|
||||||
|
)
|
||||||
|
|
||||||
|
def save_personroles_to_db(self):
|
||||||
|
"""This should be run only after all the survexblocks have
|
||||||
|
been saved to the database and so have _id that can be used as a ForeignKey
|
||||||
|
"""
|
||||||
|
for blk in self.trip_team_cache:
|
||||||
|
# hack_save(survexblock)
|
||||||
|
|
||||||
|
# Now commit to db
|
||||||
|
pr_list = self.trip_team_cache[blk]
|
||||||
|
# print(f" PR_LIST {pr_list} {blk}", file=sys.stderr)
|
||||||
|
valid_list = []
|
||||||
|
for pr in pr_list:
|
||||||
|
try:
|
||||||
|
# print(f"___ {pr.survexblock=} {pr.survexblock.id=} {pr.person=} {pr.personexpedition=}", file=sys.stderr)
|
||||||
|
pr.full_clean()
|
||||||
|
valid_list.append(pr)
|
||||||
|
except ValidationError as e:
|
||||||
|
print(f" ! PR is invalid: {e} {survexblock} {pr}", file=sys.stderr)
|
||||||
|
print(f" ! PR is invalid: {e} {survexblock} {pr}")
|
||||||
|
|
||||||
|
|
||||||
|
SurvexPersonRole.objects.bulk_create(valid_list)
|
||||||
|
# for pr in pr_list:
|
||||||
|
# print(f"+++ {pr.survexblock=} {pr.survexblock.id=} {pr.person=} {pr.personexpedition=}", file=sys.stderr)
|
||||||
|
# SurvexPersonRole.objects.create(pr).save()
|
||||||
|
|
||||||
|
trip_team_cache = {} # in database now, so empty cache
|
||||||
|
|
||||||
def add_to_pending(self, survexblock, tm):
|
def add_to_pending(self, survexblock, tm):
|
||||||
"""Collects team names before we have a date so cannot validate against
|
"""Collects team names. We might not have a date so cannot validate
|
||||||
expo attendance yet"""
|
against expo attendance yet
|
||||||
global person_pending_cache
|
"""
|
||||||
|
|
||||||
if survexblock._blockid not in self.person_pending_cache:
|
if survexblock._blockid not in self.person_pending_cache:
|
||||||
self.person_pending_cache[survexblock._blockid] = set()
|
self.person_pending_cache[survexblock._blockid] = set()
|
||||||
self.person_pending_cache[survexblock._blockid].add(tm)
|
if tm not in self.person_pending_cache[survexblock._blockid]:
|
||||||
print(f"-- person_pending_cache {survexblock}, {self.person_pending_cache[survexblock._blockid]}, {tm}")
|
self.person_pending_cache[survexblock._blockid].add(tm)
|
||||||
|
# print(f"-- person_pending_cache '{survexblock}' {self.person_pending_cache[survexblock._blockid]} (added {tm})")
|
||||||
|
|
||||||
def get_team_pending(self, blockid):
|
def flush_persons_pending(self, blockid):
|
||||||
"""A set of *team names added at the end of the survex block
|
"""A set of *team names added at the end of the survex block.
|
||||||
|
Zeros the pending cache as it returns the (unvalidated) names.
|
||||||
"""
|
"""
|
||||||
if blockid in self.person_pending_cache:
|
if blockid in self.person_pending_cache:
|
||||||
teamnames = self.person_pending_cache[blockid] # a set of names
|
teamnames = self.person_pending_cache[blockid] # a set of names
|
||||||
@@ -583,11 +813,10 @@ class LoadingSurvex:
|
|||||||
def get_team_inherited(self, survexblock): # survexblock only used for debug mesgs
|
def get_team_inherited(self, survexblock): # survexblock only used for debug mesgs
|
||||||
"""See get_team_pending(survexblock._blockid) which gets called at the same time,
|
"""See get_team_pending(survexblock._blockid) which gets called at the same time,
|
||||||
when we see a *date line"""
|
when we see a *date line"""
|
||||||
global person_pending_cache
|
|
||||||
|
|
||||||
if self.inheritteam:
|
if self.inheritteam:
|
||||||
message = (
|
message = (
|
||||||
f"- no *team INHERITING ({survexblock.parent})>({survexblock}) {survexblock.survexfile.path} '{self.inheritteam}'"
|
f"- no *team on blcok so INHERITING ({survexblock.parent})>({survexblock}) {survexblock.survexfile.path} '{self.inheritteam}'"
|
||||||
)
|
)
|
||||||
print(self.insp + message)
|
print(self.insp + message)
|
||||||
# stash_data_issue(
|
# stash_data_issue(
|
||||||
@@ -674,6 +903,17 @@ class LoadingSurvex:
|
|||||||
# expoyear = "1976"
|
# expoyear = "1976"
|
||||||
return
|
return
|
||||||
|
|
||||||
|
def get_expo_for_block(self, survexblock):
|
||||||
|
if expo := survexblock.expedition: # may be None if no *date yet
|
||||||
|
return expo
|
||||||
|
if survexblock.date:
|
||||||
|
expo = Expedition.objects.get(year=str(survexblock.date)[:4])
|
||||||
|
return expo
|
||||||
|
if expo := survexblock.parent.expedition: # immediate parent works mostly
|
||||||
|
print(f" WARNING using parent block expo year {survexblock=}",file=sys.stderr)
|
||||||
|
return expo
|
||||||
|
return False
|
||||||
|
|
||||||
def fix_anonymous(self, survexblock):
|
def fix_anonymous(self, survexblock):
|
||||||
"""Called when we reach *end of a block
|
"""Called when we reach *end of a block
|
||||||
Checks to see if the block has no team attached, in which case it uses the
|
Checks to see if the block has no team attached, in which case it uses the
|
||||||
@@ -689,24 +929,14 @@ class LoadingSurvex:
|
|||||||
if survexblock.parent.name == "troggle_unseens":
|
if survexblock.parent.name == "troggle_unseens":
|
||||||
# Bolluxed up if we try to inherit from this random junk, so don't.
|
# Bolluxed up if we try to inherit from this random junk, so don't.
|
||||||
return
|
return
|
||||||
|
|
||||||
expo = survexblock.expedition # may be None if no *date yet
|
|
||||||
if not expo:
|
|
||||||
expo = survexblock.parent.expedition # immediate parent works mostly
|
|
||||||
if not expo:
|
|
||||||
return
|
|
||||||
|
|
||||||
if not self.currentteam: # i.e. if it is a dated block and has no team
|
if not self.currentteam: # i.e. if it is a dated block and has no team
|
||||||
if teamnames := self.get_team_inherited(survexblock):# WALRUS
|
if teamnames := self.get_team_inherited(survexblock):# WALRUS
|
||||||
for tm in teamnames:
|
self.person_pending_cache[survexblock._blockid] = teamnames
|
||||||
personexpedition = GetPersonExpeditionNameLookup(expo).get(tm.lower())
|
|
||||||
if personexpedition:
|
|
||||||
put_person_on_trip(survexblock, personexpedition, tm)
|
|
||||||
return
|
return
|
||||||
|
|
||||||
def cache_survexblock(self, survexblock):
|
def cache_survexblock(self, survexblock):
|
||||||
# appends to list, creates an empty list to append to if it doen't exist yet
|
self._pending_block_saves[survexblock._blockid] = survexblock
|
||||||
self._pending_block_saves.setdefault(survexblock._blockid, []).append(survexblock)
|
|
||||||
|
|
||||||
def LoadSurvexTeam(self, survexblock, line):
|
def LoadSurvexTeam(self, survexblock, line):
|
||||||
"""Interpeting the *team fields has been updated to current 2025 survex standard,
|
"""Interpeting the *team fields has been updated to current 2025 survex standard,
|
||||||
@@ -731,41 +961,13 @@ class LoadingSurvex:
|
|||||||
# so we can't validate whether the person was on expo or not.
|
# so we can't validate whether the person was on expo or not.
|
||||||
# we will have to attach them to the survexblock anyway, and then do a
|
# we will have to attach them to the survexblock anyway, and then do a
|
||||||
# later check on whether they are valid when we get the date.
|
# later check on whether they are valid when we get the date.
|
||||||
|
|
||||||
# refactor this to collect names before and after a *date, and commit them as
|
|
||||||
# a bulk update only at the END of the survexblock
|
|
||||||
|
|
||||||
if not tm: # i.e. null person inthe *team
|
if not tm: # i.e. null person in the *team
|
||||||
return # ignore: troggle does not need to know. Survex want to though.
|
return # ignore: troggle does not need to know. Survex want to though.
|
||||||
|
|
||||||
self.currentteam.add(tm) # used in push/pop block code
|
self.currentteam.add(tm) # used in push/pop block code
|
||||||
expo = survexblock.expedition # may be None if no *date yet
|
self.add_to_pending(survexblock, tm)
|
||||||
|
|
||||||
if expo:
|
|
||||||
personexpedition = GetPersonExpeditionNameLookup(expo).get(tm.lower())
|
|
||||||
if personexpedition:
|
|
||||||
put_person_on_trip(survexblock, personexpedition, tm)
|
|
||||||
|
|
||||||
elif known_foreigner(tm): # note, not using .lower()
|
|
||||||
message = f"- *team {expo.year} '{tm}' known foreigner on *team {survexblock.survexfile.path} ({survexblock}) in '{line=}'"
|
|
||||||
print(self.insp + message)
|
|
||||||
# stash_data_issue(parser='survex', message=message, url=None, sb=survexblock)
|
|
||||||
else:
|
|
||||||
# we know the date and expo, but can't find the person
|
|
||||||
message = f"! *team {expo.year} '{tm}' FAIL personexpedition lookup on *team {survexblock.survexfile.path} ({survexblock}) in '{line=}' {tm=}"
|
|
||||||
print(self.insp + message)
|
|
||||||
stash_data_issue(
|
|
||||||
parser="survex", message=message, url=None, sb=survexblock
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
self.add_to_pending(survexblock, tm)
|
|
||||||
# don't know the date yet, so cannot query the table about validity.
|
|
||||||
# assume the person is valid. It will get picked up with the *date appears
|
|
||||||
# There are hundreds of these..
|
|
||||||
message = (
|
|
||||||
f"- Team before Date: {line} ({survexblock}) {survexblock.survexfile.path}"
|
|
||||||
)
|
|
||||||
|
|
||||||
# teamfix = r"(?i)(.*?)\s+" + roles + r"?(?:es|s)?$" -- (.*?) means a non-greedy capture
|
# teamfix = r"(?i)(.*?)\s+" + roles + r"?(?:es|s)?$" -- (.*?) means a non-greedy capture
|
||||||
if fixstyle := self.rx_teamfix.match(line): # matches the optional role at the the end of the string WALRUS
|
if fixstyle := self.rx_teamfix.match(line): # matches the optional role at the the end of the string WALRUS
|
||||||
tmlist = fixstyle.group(1).strip('\"') # remove quotes, if present
|
tmlist = fixstyle.group(1).strip('\"') # remove quotes, if present
|
||||||
@@ -973,27 +1175,7 @@ class LoadingSurvex:
|
|||||||
stash_data_issue(parser='survex', message=message, url=None, sb=survexblock)
|
stash_data_issue(parser='survex', message=message, url=None, sb=survexblock)
|
||||||
return expo
|
return expo
|
||||||
|
|
||||||
def process_pending_team(self, survexblock):
|
|
||||||
expo = survexblock.expedition
|
|
||||||
if teamnames := self.get_team_pending(survexblock._blockid):
|
|
||||||
for tm in teamnames:
|
|
||||||
if known_foreigner(tm):
|
|
||||||
message = f"- *team {expo.year} '{tm}' known foreigner *date (misordered) {survexblock.survexfile.path} ({survexblock})"
|
|
||||||
print(self.insp + message)
|
|
||||||
# stash_data_issue(parser='survex', message=message, url=None, sb=survexblock)
|
|
||||||
else:
|
|
||||||
pe = GetPersonExpeditionNameLookup(expo).get(tm.lower())
|
|
||||||
if pe:
|
|
||||||
put_person_on_trip(survexblock, pe, tm)
|
|
||||||
|
|
||||||
else:
|
|
||||||
message = f"! *team {expo.year} '{tm}' FAIL personexpedition lookup on *date {survexblock.survexfile.path} ({survexblock}) "
|
|
||||||
print(self.insp + message)
|
|
||||||
stash_data_issue(
|
|
||||||
parser="survex",
|
|
||||||
message=message,
|
|
||||||
url=None, sb=survexblock,
|
|
||||||
)
|
|
||||||
|
|
||||||
def LoadSurvexDate(self, survexblock, line):
|
def LoadSurvexDate(self, survexblock, line):
|
||||||
"""We now have a valid date for this survexblock, so we now know the expo
|
"""We now have a valid date for this survexblock, so we now know the expo
|
||||||
@@ -1027,12 +1209,7 @@ class LoadingSurvex:
|
|||||||
if len(team) > 0:
|
if len(team) > 0:
|
||||||
message = f"! *team {expo.year} Multiple *date in one block? Already someone on team when *date seen. {survexblock.survexfile.path} ({survexblock}) in '{line}'"
|
message = f"! *team {expo.year} Multiple *date in one block? Already someone on team when *date seen. {survexblock.survexfile.path} ({survexblock}) in '{line}'"
|
||||||
print(self.insp + message)
|
print(self.insp + message)
|
||||||
stash_data_issue(parser='survex', message=message, url=None, sb=survexblock)
|
stash_data_issue(parser='survex', message=message, url=None, sb=survexblock)
|
||||||
|
|
||||||
#self.process_pending_team(survexblock)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
oline = line
|
oline = line
|
||||||
perps = get_people_on_trip(survexblock) # perps used for diagnostic error messages only: they are to blame
|
perps = get_people_on_trip(survexblock) # perps used for diagnostic error messages only: they are to blame
|
||||||
|
|
||||||
@@ -1665,7 +1842,7 @@ class LoadingSurvex:
|
|||||||
self.currentsurvexfile = newfile
|
self.currentsurvexfile = newfile
|
||||||
return # abort as everything already done for object creation
|
return # abort as everything already done for object creation
|
||||||
|
|
||||||
newfile.save() # until we do this there is no internal id so no foreign key works
|
newfile.save() # until we do this there is no internal .id so no foreign key works
|
||||||
self.currentsurvexfile = newfile
|
self.currentsurvexfile = newfile
|
||||||
newfile.primary = self.set_primary(headpath)
|
newfile.primary = self.set_primary(headpath)
|
||||||
|
|
||||||
@@ -1924,7 +2101,7 @@ class LoadingSurvex:
|
|||||||
nlegstotal = 0
|
nlegstotal = 0
|
||||||
self.relativefilename = path
|
self.relativefilename = path
|
||||||
|
|
||||||
self._pending_block_saves = {} # Cache for survex blocks to save at the end
|
# self._pending_block_saves = {} # Cache for survex blocks to save at the end
|
||||||
#self.IdentifyCave(path, svxid, depth) # this will produce null for survex files which are geographic collections
|
#self.IdentifyCave(path, svxid, depth) # this will produce null for survex files which are geographic collections
|
||||||
|
|
||||||
self.currentsurvexfile = survexblock.survexfile
|
self.currentsurvexfile = survexblock.survexfile
|
||||||
@@ -2024,7 +2201,6 @@ class LoadingSurvex:
|
|||||||
|
|
||||||
def starstatement(star, fullline):
|
def starstatement(star, fullline):
|
||||||
import time
|
import time
|
||||||
# ...existing code...
|
|
||||||
"""Interprets a survex comamnd where * is the first character on the line, e.g. *begin"""
|
"""Interprets a survex comamnd where * is the first character on the line, e.g. *begin"""
|
||||||
nonlocal survexblock
|
nonlocal survexblock
|
||||||
nonlocal blk_name
|
nonlocal blk_name
|
||||||
@@ -2075,8 +2251,9 @@ class LoadingSurvex:
|
|||||||
newsurvexblock.title = (
|
newsurvexblock.title = (
|
||||||
"(" + survexblock.title + ")"
|
"(" + survexblock.title + ")"
|
||||||
) # copy parent inititally, overwrite if it has its own
|
) # copy parent inititally, overwrite if it has its own
|
||||||
|
self.cache_survexblock(newsurvexblock) # note for later saving in db
|
||||||
survexblock = newsurvexblock
|
survexblock = newsurvexblock
|
||||||
survexblock.save() # Only save once, after all fields are set, or try to delay until *end using caches
|
# Only save to db once, after all fields are set
|
||||||
tickle()
|
tickle()
|
||||||
|
|
||||||
# ---------------------------END
|
# ---------------------------END
|
||||||
@@ -2090,10 +2267,8 @@ class LoadingSurvex:
|
|||||||
|
|
||||||
self.fix_undated(survexblock)
|
self.fix_undated(survexblock)
|
||||||
self.fix_anonymous(survexblock)
|
self.fix_anonymous(survexblock)
|
||||||
self.confirm_team_on_trip(survexblock)
|
|
||||||
self.process_pending_team(survexblock)
|
self.process_pending_team(survexblock)
|
||||||
self.cache_survexblock(survexblock)
|
# POP state ++++++++++++++
|
||||||
# POP state ++++++++++++++
|
|
||||||
popblock()
|
popblock()
|
||||||
self.inheritteam = self.teaminheritstack.pop()
|
self.inheritteam = self.teaminheritstack.pop()
|
||||||
self.currentteam = self.teamcurrentstack.pop()
|
self.currentteam = self.teamcurrentstack.pop()
|
||||||
@@ -2200,24 +2375,9 @@ class LoadingSurvex:
|
|||||||
|
|
||||||
|
|
||||||
# At the end of the whole (concatenated) file, save all cached survexblocks using bulk_update
|
# At the end of the whole (concatenated) file, save all cached survexblocks using bulk_update
|
||||||
blocks = []
|
|
||||||
for blockid in self._pending_block_saves:
|
self.save_survexblocks_to_db()
|
||||||
blocks.append(self._pending_block_saves[blockid])
|
self.save_personroles_to_db()
|
||||||
if blocks:
|
|
||||||
# valid_blocks = []
|
|
||||||
# for block in blocks:
|
|
||||||
# try:
|
|
||||||
# block.full_clean()
|
|
||||||
# valid_blocks.append(block)
|
|
||||||
# except ValidationError as e:
|
|
||||||
# print(f" ! Block {block} is invalid: {e}", file=sys.stderr)
|
|
||||||
# print(f" ! Block {block} is invalid: {e}")
|
|
||||||
try:
|
|
||||||
BATCH_SIZE = 900
|
|
||||||
for i in range(0, len(blocks), BATCH_SIZE):
|
|
||||||
SurvexBlock.objects.bulk_update(blocks[i:i+BATCH_SIZE], ["legsall", "legslength", "parent"])
|
|
||||||
except Exception as e:
|
|
||||||
print(f"\n !! Error in bulk_update for survexblocks: {e}", file=sys.stderr)
|
|
||||||
|
|
||||||
def PushdownStackScan(self, survexblock, path, finname, flinear, io_collate):
|
def PushdownStackScan(self, survexblock, path, finname, flinear, io_collate):
|
||||||
"""Follows the *include links in all the survex files from the root file (usually 1623.svx)
|
"""Follows the *include links in all the survex files from the root file (usually 1623.svx)
|
||||||
@@ -2604,6 +2764,7 @@ def FindAndLoadSurvex():
|
|||||||
io_collate.write(f";*edulcni {survexfileroot.path}\n")
|
io_collate.write(f";*edulcni {survexfileroot.path}\n")
|
||||||
|
|
||||||
svx_scan.check_cache_clean()
|
svx_scan.check_cache_clean()
|
||||||
|
store_data_issues(svx_scan)
|
||||||
|
|
||||||
mem1 = get_process_memory()
|
mem1 = get_process_memory()
|
||||||
flinear.write(f"\n - MEM:{mem1:.2f} MB STOP {survexfileroot.path}\n")
|
flinear.write(f"\n - MEM:{mem1:.2f} MB STOP {survexfileroot.path}\n")
|
||||||
@@ -2721,6 +2882,7 @@ def FindAndLoadSurvex():
|
|||||||
io_collate.write(f";*edulcni {UNSEENS}\n")
|
io_collate.write(f";*edulcni {UNSEENS}\n")
|
||||||
|
|
||||||
omit_scan.check_cache_clean()
|
omit_scan.check_cache_clean()
|
||||||
|
store_data_issues(omit_scan)
|
||||||
|
|
||||||
mem1 = get_process_memory()
|
mem1 = get_process_memory()
|
||||||
flinear.write(f"\n - MEM:{mem1:.2f} MB STOP {UNSEENS} Unseen Oddments\n")
|
flinear.write(f"\n - MEM:{mem1:.2f} MB STOP {UNSEENS} Unseen Oddments\n")
|
||||||
@@ -3010,6 +3172,7 @@ def parse_one_file(fpath): # --------------------------------------in progress--
|
|||||||
if len(sbs)<1:
|
if len(sbs)<1:
|
||||||
print(f" ! No survex blocks found. Parser failure...")
|
print(f" ! No survex blocks found. Parser failure...")
|
||||||
for sb in sbs:
|
for sb in sbs:
|
||||||
|
print(f" - {sb.id} re-setting survex block parent {sb=}", file=sys.stderr)
|
||||||
print(f" - {sb.id} re-setting survex block parent {sb=}")
|
print(f" - {sb.id} re-setting survex block parent {sb=}")
|
||||||
sb.parent = existingparent # should be all the same
|
sb.parent = existingparent # should be all the same
|
||||||
sb.save()
|
sb.save()
|
||||||
@@ -3227,7 +3390,7 @@ def LoadSurvexBlocks():
|
|||||||
|
|
||||||
# duration = time.time() - start
|
# duration = time.time() - start
|
||||||
# print(f" - TIME: {duration:7.2f} s", file=sys.stderr)
|
# print(f" - TIME: {duration:7.2f} s", file=sys.stderr)
|
||||||
store_data_issues()
|
|
||||||
# duration = time.time() - start
|
# duration = time.time() - start
|
||||||
# print(f" - TIME: {duration:7.2f} s", file=sys.stderr)
|
# print(f" - TIME: {duration:7.2f} s", file=sys.stderr)
|
||||||
if dup_includes > 0:
|
if dup_includes > 0:
|
||||||
|
|||||||
Reference in New Issue
Block a user