primary key now UUID on SurvexBlock

2026-02-08 13:55:24 +00:00 · 2026-01-29 23:06:30 +00:00
parent 7a779555ac
commit 1b7798e2fc
3 changed files with 366 additions and 195 deletions
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -6,6 +6,7 @@ import re
 import subprocess
 import sys
 import time
+from collections import OrderedDict
 from datetime import date, datetime, timezone
 from pathlib import Path

@@ -57,6 +58,7 @@ survexblockroot = None
 ROOTBLOCK = "rootblock"
 METRESINFEET = 3.28084
 UNSEENS = "_unseens.svx"
+BATCH_SIZE = 900 # limit for terms in SQL expressions for sqlite

 IGNOREFILES  = ["dummy_file", "_dummy_file"]
 IGNOREPREFIX = ["surface", "kataster", "gpx", "deprecated"] #"fixedpts", 
@@ -135,35 +137,42 @@ def stash_data_issue(parser=None, message=None, url=None, sb=None):
    """Avoid hitting the database for error messages until the end of the import
    
    use a set, we do not want identically duplicate issues
+    BUT we have to use the sb UUID not the sb object itself
    """
    global dataissues
-    # try:
-        # if sb:
-            # url2 = get_offending_filename(sb.survexfile.path)
-    # except Exception as e:
-        # print(f" ! stash_data_issue() '{e}' '{sb=}'  -- '{url=}'", file=sys.stderr)
-        # raise
-    dataissues.add((parser, message, url, sb))
+    
+    if sb:
+        dataissues.add((parser, message, url, sb._blockid))
+    else:
+        dataissues.add((parser, message, url, None))

-def store_data_issues():
+def store_data_issues(loadex = None):
    """Take the stash and store it permanently in the database instead
    
-    use BULK creation here !"""
+    use BULK creation here !
+    
+    chnage to using Class not global stash
+    """
    global dataissues
    print(f" - Storing {len(dataissues)} Data Issues into database")
    
    # make a list of objects, but don't commit to database yet
    di_list = []
    for issue in dataissues:
-        parser, message, url, sb = issue
-        if url is None:
-            if sb is not None:
-                try:
-                    url = get_offending_filename(sb.survexfile.path)
-                except Exception as e:
-                    print(f" ! store_data_issues() '{e}' '{sb=}'  -- '{url=}'", file=sys.stderr)
-                    url = get_offending_filename(sb) # assumed to be text
-        di_list.append(DataIssue(parser=parser, message=message, url=url))
+        if not loadex:
+            parser, message, url, _ = issue
+        else:
+            parser, message, url, blkid = issue
+            if blkid:
+                sb = loadex._pending_block_saves[blkid]
+                if url is None:
+                    if sb is not None:
+                        try:
+                            url = get_offending_filename(sb.survexfile.path)
+                        except Exception as e:
+                            print(f" ! store_data_issues() '{e}' '{sb=}'  -- '{url=}'", file=sys.stderr)
+                            url = get_offending_filename(sb) # assumed to be text
+            di_list.append(DataIssue(parser=parser, message=message, url=url))
    # Now commit to db
    DataIssue.objects.bulk_create(di_list)
    dataissues = set()
@@ -212,39 +221,6 @@ def get_people_on_trip(survexblock):
    
    return list(set(people))

-# THIS SHOULD NOT BE GLOBAL ! Should be per instance of file loader, even though they are globally unique
-trip_person_record = {}  # a dict indexed by tuples (survexblock, personexpedition) = 1
-trip_team_cache = {}  #  a dict of lists indexed by  survexblock._blockid
-def put_person_on_trip(survexblock, personexpedition, tm):
-    """Uses a cache to avoid a database query if it doesn't need to.
-    Only used for a single person"""
-    global trip_person_record
-    global trip_team_cache
-    
-    if (survexblock._blockid, personexpedition) in trip_person_record:
-        return True
-        
-    try:
-        personrole = SurvexPersonRole(  # does not commit to db yet
-            survexblock=survexblock, 
-            person = personexpedition.person, 
-            personexpedition=personexpedition, 
-            personname=tm
-            )
-    except:
-        message = f"! *team '{tm}' FAIL, already created {survexblock.survexfile.path} ({survexblock})  "
-        print(self.insp + message)
-        stash_data_issue(
-        parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
-        )
-
-    if survexblock._blockid not in trip_team_cache:
-        trip_team_cache[survexblock._blockid] = []
-    trip_team_cache[survexblock._blockid].append(personrole)
-    # print(f"-- trip_team_cache\n  -- {survexblock=} - {survexblock._blockid}\n  -- {trip_team_cache[survexblock._blockid]}\n  -- {personrole}", file=sys.stderr)
-    
-    trip_person_record[(survexblock._blockid, personexpedition)] = 1   
-    return False

 def hack_save(survexblock):
    # #### Horrible hack to be properly written as a cache
@@ -482,6 +458,7 @@ class LoadingSurvex:
    pending = []
    adhocload = False
    person_pending_cache = {}  # indexed per survexblock UUID, so robust wrt PUSH/POP begin/end
+    _pending_block_saves = OrderedDict() # not {}, retain topological sort order


    def __init__(self):
@@ -511,60 +488,313 @@ class LoadingSurvex:
                    parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
                )
                
-    def confirm_team_on_trip(self, survexblock):
-        """This is only called when processing a *end statement
+
+    
+    def save_survexblocks_to_db(self):
+        """This saves the in-memory python objects into the database, at which point
+        the foreign keys are enabled and one can do queries on the database.
+        
+        The sequence of survex blocks is constructed from the *include links,
+        depth first, so the list iof survex blocks is topologically sorted. 
+        HOWEVER what matters to the .parent links is the topological sorting
+        of the *begin/*end inclusions, which may or may not match the *include sort
+        sequence. Yuk.
        """
-        global trip_team_cache
        
-        if survexblock._blockid not in trip_team_cache:
-            return
-        #### STRIP THIS OUT and cache the SurvexPersonRole for the end of the survex block !
-        hack_save(survexblock)
+        def get_toposorted_blocks(blocks):
+            """This is a depth-first recursive topological sort that ensures that when a survexblock
+            has  a parent, that parent always appears earlier in the list.
+            """
+        
+            # 1. Map IDs to objects for quick lookup
+            id_map = {b._blockid: b for b in blocks}
+            topo_sorted_list = []
+            visited = set()
+
+            def visit(block):
+                # If we've already added this ID or it's None, skip
+                if block is None or block._blockid in visited:
+                    return
+                
+                # 2. Get the parent object
+                # If .parent is an object, we use its ID. 
+                # If .parent is already an ID, we use it directly.
+                parent_val = block.parent
+                
+                # This line of code is "safety net." It ensures that no matter how 
+                # the parent data is stored, we always end up with a UUID string
+                # rather than a Python object.
+                # getattr(object, 'attribute_name', default_value).
+                parent_id = getattr(parent_val, '_blockid', parent_val)
+
+                # 3. Recursive step: Visit the parent first
+                if parent_id in id_map:
+                    visit(id_map[parent_id])
+                
+                # 4. Add current block to results
+                visited.add(block._blockid)
+                topo_sorted_list.append(block)
+
+            for b in blocks:
+                visit(b)
+        
+            return topo_sorted_list
+            
+        def get_generational_chunks(sorted_blocks):
+            """
+            Splits a topologically sorted list into chunks where no child 
+            exists in the same chunk as its parent.
+            """
+            chunks = []
+            # Track which IDs are already "saved" (in a previous chunk)
+            saved_ids = set()
+            
+            # current_batch will hold blocks for the current "generation"
+            current_batch = []
+
+            for block in sorted_blocks:
+                parent_id = getattr(block.parent, '_blockid', block.parent)
+
+                # If the parent is not yet 'saved', this block MUST 
+                # go into a future batch.
+                if parent_id and parent_id not in saved_ids:
+                    # Finish the current chunk and start a new one
+                    if current_batch:
+                        chunks.append(current_batch)
+                        # Mark everything in the finished batch as 'saved'
+                        saved_ids.update(b._blockid for b in current_batch)
+                        current_batch = []
+                
+                current_batch.append(block)
+
+                # Safety: Even if there are no dependencies, respect the BATCH_SIZE
+                if len(current_batch) >= BATCH_SIZE:
+                    chunks.append(current_batch)
+                    saved_ids.update(b._blockid for b in current_batch)
+                    current_batch = []
+
+            # Add the final trailing batch
+            if current_batch:
+                chunks.append(current_batch)
+
+            return chunks
+        def get_generational_chunks_optimized(blocks):
+            """
+            Splits a topologically sorted list into chunks where no child 
+            exists in the same chunk as its parent.
+            Optimized for a shallow tree.
+            """
+
+            # 1. Map IDs to objects for quick lookup
+            id_map = {b._blockid: b for b in blocks}
+            
+            # 2. Dictionary to store the level (depth) of each block
+            # Level 0 = Root, Level 1 = Child of Root, etc.
+            levels = {}
+
+            def get_level(block):
+                if block._blockid in levels:
+                    return levels[block._blockid]
+                
+                parent_id = getattr(block.parent, '_blockid', block.parent)
+                
+                # If no parent OR parent is not in our current batch, it's a Root (Level 0)
+                if not parent_id or parent_id not in id_map:
+                    levels[block._blockid] = 0
+                    return 0
+                
+                # Otherwise, level is Parent's Level + 1
+                level = get_level(id_map[parent_id]) + 1
+                levels[block._blockid] = level
+                return level
+
+            # Calculate levels for everyone
+            for b in blocks:
+                get_level(b)
+
+            # 3. Group blocks by their level
+            from collections import defaultdict
+            generational_groups = defaultdict(list)
+            for b in blocks:
+                generational_groups[levels[b._blockid]].append(b)
+
+            # 4. Final step: Split each level into batches of 900
+            final_chunks = []
+            for level in sorted(generational_groups.keys()):
+                level_blocks = generational_groups[level]
+                # Standard list slicing to split into BATCH_SIZE
+                for i in range(0, len(level_blocks), BATCH_SIZE):
+                    final_chunks.append(level_blocks[i:i + BATCH_SIZE])
+
+            return final_chunks       
+        
+        # construct the list. 
+        already_saved_blocks = set(SurvexBlock.objects.values_list('_blockid', flat=True))
+        blocks = []
+        for blockid in self._pending_block_saves:
+            blocks.append(self._pending_block_saves[blockid])
+        if blocks:
+            # valid_blocks = []
+            # bad_parents = 0
+            # for block in blocks:
+                # try:
+                    # if block.parent:
+                        # if block.parent not in already_saved_blocks:
+                            # bad_parents += 1
+                            # # print(f"  Invalid parent id: {block.survexfile}::{block} -> {block.parent}", file=sys.stderr)
+                    # # block.full_clean()
+                    # valid_blocks.append(block)
+                # except ValidationError as e:
+                    # print(f"  ! Block {block} is invalid: {e}", file=sys.stderr)
+                    # print(f"  ! Block {block} is invalid: {e}")
+            # print(f"\n !! {bad_parents} as-yet invalid parent ids out of {len(blocks)} blocks. {len(valid_blocks)} valid blocks", file=sys.stderr)
+            
+            topo_list = get_toposorted_blocks(blocks)
+            print(f"\n !!  {len(topo_list)=} blocks. {len(blocks)=}", file=sys.stderr)
+            
+            safe_chunks = get_generational_chunks_optimized(topo_list)
+            
            
-        # Now commit to db
-        pr_list = trip_team_cache[survexblock._blockid]
-        # print(f" PR_LIST {pr_list} {survexblock._blockid }", file=sys.stderr)
-        valid_list = []
-        for pr in pr_list:
            try:
-                # print(f"___ {pr.survexblock=} {pr.survexblock.id=} {pr.person=} {pr.personexpedition=}", file=sys.stderr)
-                pr.full_clean()
-                valid_list.append(pr)
-            except ValidationError as e:
-                print(f"  ! PR is invalid: {e}  {survexblock} {pr}", file=sys.stderr)
-                print(f"  ! PR is invalid: {e}  {survexblock} {pr}")
+                for i, chunk in enumerate(safe_chunks):
+                    print(f"Saving Chunk {i+1} ({len(chunk)} blocks)...", file=sys.stderr)
+                    SurvexBlock.objects.bulk_create(
+                        chunk,
+                        update_conflicts=True, # root item probably exists already
+                        # update_fields needed if we allow conflict update
+                        update_fields=['name', 'title', 'parent', 'date',
+                        'expedition', 'survexfile', 'scanswallet', 'legsall', 'legslength', 'foreigners',], 
+                        unique_fields=['_blockid']
+                    )
+                print("Success: Entire tree saved.", file=sys.stderr)
+            except Exception as e:
+                print(f"Failed at chunk {i+1}: {e}", file=sys.stderr)    
+
+            return
            
-    
-        SurvexPersonRole.objects.bulk_create(valid_list)
-        # for pr in pr_list:
-            # print(f"+++ {pr.survexblock=} {pr.survexblock.id=}  {pr.person=} {pr.personexpedition=}", file=sys.stderr)
-            # SurvexPersonRole.objects.create(pr).save()
+            try:
+                 for i in range(0, len(blocks), 1):
+                    valid_blocks[i].save()
+            except Exception as e:
+                print(f" !! Error in SINGLE create for survexblocks at {i}: {e}", file=sys.stderr)
+            return
+            # Nope, even topo-sorted, we can't know what a batch size is suitable
+            # without some of the ietms being invalid
+            try:
+                 for i in range(0, len(topo_list), BATCH_SIZE):
+                    SurvexBlock.objects.bulk_create(topo_list[i:i+BATCH_SIZE])
+            except Exception as e:
+                print(f" !! Error in bulk_create for survexblocks at {i}: {e}", file=sys.stderr)
+                
+    trip_team_cache = {}  #  a dict of lists indexed by  survexblock._blockid
+    def put_personrole_on_trip(self, survexblock, personexpedition, tm):
+        """
+        Only used for a single person.
+        Creates a SurvexPersonRole object, but this is not committed to the database until
+        all the survexblocks have been saved.
+        """      
+            
+        try:
+            personrole = SurvexPersonRole(  # does not commit to db yet
+                survexblock=survexblock, # survexblock has no _id yet
+                person = personexpedition.person, 
+                personexpedition=personexpedition, 
+                personname=tm
+                )
+        except:
+            message = f"! *team '{tm}' FAIL, already created {survexblock.survexfile.path} ({survexblock})  "
+            print(self.insp + message)
+            stash_data_issue(
+            parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
+            )
+
+        if survexblock._blockid not in self.trip_team_cache:
+            self.trip_team_cache[survexblock._blockid] = []
+        self.trip_team_cache[survexblock._blockid].append(personrole)
+        # print(f"-- trip_team_cache\n  -- {survexblock=} - {survexblock._blockid}\n  -- {trip_team_cache[survexblock._blockid]}\n  -- {personrole}", file=sys.stderr)
+
+        return False
+                
+    def process_pending_team(self, survexblock):
+        """This is only called when processing a *end statement
        
-        # Not working, so do not clear cache!
-        trip_team_cache[survexblock] = [] # in database now, so empty cache    
-    
-    def check_team_cache(self, label=None):
-        global trip_team_cache
-        message = f"! check_team_cache() called.. "
-        print(message)
-        print(message, file=sys.stderr)
-        for block in trip_team_cache:
-            message = f"! *team CACHEFAIL, trip_team_cache {block.survexfile.path} ({block}). label:{label}"
-            print(message)
-            print(message, file=sys.stderr)
+        It converts a list of names as strings into a list of valid 
+        PersonExpedition objects for the current expo.
        
+        SurvexPersonRoles
+        """
+        
+        # Many survex blocks have no *team members at all
+        if not self.flush_persons_pending(survexblock._blockid):
+            return
+       
+        if not (expo := self.get_expo_for_block(survexblock)):
+           print(f" Buggeration fAIL {survexblock=}",file=sys.stderr)
+           return
+           
+        # Sanitise the set of names, and validate as valid people
+        if teamnames := self.flush_persons_pending(survexblock._blockid): 
+            for tm in teamnames:
+                if known_foreigner(tm):
+                    message = f"- *team  '{tm}' known foreigner {survexblock.survexfile.path} ({survexblock})"
+                    print(self.insp + message)
+                    # stash_data_issue(parser='survex', message=message, url=None, sb=survexblock)
+                else:
+                    pe = GetPersonExpeditionNameLookup(expo).get(tm.lower())
+                    if pe:
+                        self.put_personrole_on_trip(survexblock, pe, tm)
+                        
+                    else:
+                        message = f"! *team '{tm}' FAIL personexpedition {survexblock.survexfile.path} ({survexblock})  "
+                        print(self.insp + message)
+                        stash_data_issue(
+                            parser="survex",
+                            message=message,
+                            url=None, sb=survexblock,
+                        )  
+                        
+    def save_personroles_to_db(self):
+        """This should be run only after all the survexblocks have 
+        been saved to the database and so have _id that can be used as a ForeignKey
+        """
+        for blk in self.trip_team_cache:
+            # hack_save(survexblock)
+                
+            # Now commit to db
+            pr_list = self.trip_team_cache[blk]
+            # print(f" PR_LIST {pr_list} {blk}", file=sys.stderr)
+            valid_list = []
+            for pr in pr_list:
+                try:
+                    # print(f"___ {pr.survexblock=} {pr.survexblock.id=} {pr.person=} {pr.personexpedition=}", file=sys.stderr)
+                    pr.full_clean()
+                    valid_list.append(pr)
+                except ValidationError as e:
+                    print(f"  ! PR is invalid: {e}  {survexblock} {pr}", file=sys.stderr)
+                    print(f"  ! PR is invalid: {e}  {survexblock} {pr}")
+                
+        
+            SurvexPersonRole.objects.bulk_create(valid_list)
+            # for pr in pr_list:
+                # print(f"+++ {pr.survexblock=} {pr.survexblock.id=}  {pr.person=} {pr.personexpedition=}", file=sys.stderr)
+                # SurvexPersonRole.objects.create(pr).save()
+              
+        trip_team_cache = {} # in database now, so empty cache    
+    
    def add_to_pending(self, survexblock, tm):
-        """Collects team names before we have a date so cannot validate against 
-        expo attendance yet"""
-        global person_pending_cache 
+        """Collects team names. We might not have a date so cannot validate
+        against expo attendance yet
+        """
        
        if survexblock._blockid not in self.person_pending_cache:
            self.person_pending_cache[survexblock._blockid] = set()
-        self.person_pending_cache[survexblock._blockid].add(tm)
-        print(f"-- person_pending_cache {survexblock}, {self.person_pending_cache[survexblock._blockid]}, {tm}")
+        if tm not in self.person_pending_cache[survexblock._blockid]:
+            self.person_pending_cache[survexblock._blockid].add(tm)
+            # print(f"-- person_pending_cache '{survexblock}' {self.person_pending_cache[survexblock._blockid]} (added {tm})")

-    def get_team_pending(self, blockid):
-        """A set of *team names added at the end of the survex block
+    def flush_persons_pending(self, blockid):
+        """A set of *team names added at the end of the survex block.
+        Zeros the pending cache as it returns the (unvalidated) names.
        """
        if blockid in self.person_pending_cache:
            teamnames = self.person_pending_cache[blockid] # a set of names
@@ -583,11 +813,10 @@ class LoadingSurvex:
    def get_team_inherited(self, survexblock): # survexblock only used for debug mesgs
        """See get_team_pending(survexblock._blockid) which gets called at the same time,
        when we see a *date line"""
-        global person_pending_cache
        
        if self.inheritteam:
            message = (
-                f"- no *team INHERITING ({survexblock.parent})>({survexblock}) {survexblock.survexfile.path} '{self.inheritteam}'"
+                f"- no *team on blcok so INHERITING ({survexblock.parent})>({survexblock}) {survexblock.survexfile.path} '{self.inheritteam}'"
            )
            print(self.insp + message)
            # stash_data_issue(
@@ -674,6 +903,17 @@ class LoadingSurvex:
            # expoyear = "1976"
        return

+    def get_expo_for_block(self, survexblock):
+        if expo := survexblock.expedition:  # may be None if no *date yet
+            return expo
+        if survexblock.date:
+           expo = Expedition.objects.get(year=str(survexblock.date)[:4])
+           return expo
+        if expo := survexblock.parent.expedition: # immediate parent works mostly
+            print(f" WARNING using parent block expo year {survexblock=}",file=sys.stderr)
+            return expo
+        return False
+                
    def fix_anonymous(self, survexblock):
        """Called when we reach *end of a block
        Checks to see if the block has no team attached, in which case it uses the
@@ -689,24 +929,14 @@ class LoadingSurvex:
        if survexblock.parent.name == "troggle_unseens":
            # Bolluxed up if we try to inherit from this random junk, so don't.
            return
-            
-        expo = survexblock.expedition  # may be None if no *date yet
-        if not expo:
-            expo = survexblock.parent.expedition # immediate parent works mostly
-            if not expo:
-                return
                
        if not self.currentteam: # i.e. if it is a dated block and has no team
            if teamnames := self.get_team_inherited(survexblock):# WALRUS
-                for tm in teamnames:
-                    personexpedition = GetPersonExpeditionNameLookup(expo).get(tm.lower())
-                    if personexpedition:
-                        put_person_on_trip(survexblock, personexpedition, tm)               
+                self.person_pending_cache[survexblock._blockid] = teamnames
        return
            
    def cache_survexblock(self, survexblock):
-        # appends to list, creates an empty list to append to if it doen't exist yet
-        self._pending_block_saves.setdefault(survexblock._blockid, []).append(survexblock)
+        self._pending_block_saves[survexblock._blockid] = survexblock
        
    def LoadSurvexTeam(self, survexblock, line):
        """Interpeting the *team fields has been updated to current 2025 survex standard,
@@ -731,41 +961,13 @@ class LoadingSurvex:
            # so we can't validate whether the person was on expo or not.
            # we will have to attach them to the survexblock anyway, and then do a
            # later check on whether they are valid when we get the date.
-
-            # refactor this to collect names before and after a *date, and commit them as
-            # a bulk update only at the END of the survexblock
            
-            if not tm: # i.e. null person inthe *team
+            if not tm: # i.e. null person in the *team
                return # ignore: troggle does not need to know. Survex want to though.
                                
            self.currentteam.add(tm)  # used in push/pop block code
-            expo = survexblock.expedition  # may be None if no *date yet
+            self.add_to_pending(survexblock, tm)
            
-            if expo:
-                personexpedition = GetPersonExpeditionNameLookup(expo).get(tm.lower())
-                if personexpedition:
-                    put_person_on_trip(survexblock, personexpedition, tm)
-                    
-                elif known_foreigner(tm):  # note, not using .lower()
-                    message = f"- *team {expo.year} '{tm}' known foreigner on *team {survexblock.survexfile.path} ({survexblock})  in '{line=}'"
-                    print(self.insp + message)
-                    # stash_data_issue(parser='survex', message=message, url=None, sb=survexblock)
-                else:
-                    # we know the date and expo, but can't find the person
-                    message = f"! *team {expo.year} '{tm}' FAIL personexpedition lookup on *team {survexblock.survexfile.path} ({survexblock})  in '{line=}' {tm=}"
-                    print(self.insp + message)
-                    stash_data_issue(
-                        parser="survex", message=message, url=None, sb=survexblock
-                    )
-            else:
-                self.add_to_pending(survexblock, tm)
-                # don't know the date yet, so cannot query the table about validity.
-                # assume the person is valid. It will get picked up with the *date appears
-                # There are hundreds of these..
-                message = (
-                    f"- Team before Date: {line} ({survexblock}) {survexblock.survexfile.path}"
-                )
-                
        #    teamfix = r"(?i)(.*?)\s+" + roles + r"?(?:es|s)?$" -- (.*?) means a non-greedy capture
        if fixstyle := self.rx_teamfix.match(line):  # matches the optional role at the the end of the string WALRUS
            tmlist = fixstyle.group(1).strip('\"') # remove quotes, if present
@@ -973,27 +1175,7 @@ class LoadingSurvex:
                stash_data_issue(parser='survex', message=message, url=None, sb=survexblock)
        return expo
        
-    def process_pending_team(self, survexblock):
-        expo = survexblock.expedition
-        if teamnames := self.get_team_pending(survexblock._blockid): 
-            for tm in teamnames:
-                if known_foreigner(tm):
-                    message = f"- *team {expo.year} '{tm}' known foreigner *date (misordered) {survexblock.survexfile.path} ({survexblock})"
-                    print(self.insp + message)
-                    # stash_data_issue(parser='survex', message=message, url=None, sb=survexblock)
-                else:
-                    pe = GetPersonExpeditionNameLookup(expo).get(tm.lower())
-                    if pe:
-                        put_person_on_trip(survexblock, pe, tm)
-                        
-                    else:
-                        message = f"! *team {expo.year} '{tm}' FAIL personexpedition lookup on *date {survexblock.survexfile.path} ({survexblock})  "
-                        print(self.insp + message)
-                        stash_data_issue(
-                            parser="survex",
-                            message=message,
-                            url=None, sb=survexblock,
-                        )         
+
                        
    def LoadSurvexDate(self, survexblock, line):
        """We now have a valid date for this survexblock, so we now know the expo
@@ -1027,12 +1209,7 @@ class LoadingSurvex:
            if len(team) > 0:
                message = f"! *team {expo.year} Multiple *date in one block? Already someone on team when *date seen. {survexblock.survexfile.path} ({survexblock})  in '{line}'"
                print(self.insp + message)
-                stash_data_issue(parser='survex', message=message, url=None, sb=survexblock)
-            
-            #self.process_pending_team(survexblock)
-            
-
-
+                stash_data_issue(parser='survex', message=message, url=None, sb=survexblock)           
        oline = line
        perps = get_people_on_trip(survexblock)  # perps used for diagnostic error messages only: they are to blame

@@ -1665,7 +1842,7 @@ class LoadingSurvex:
            self.currentsurvexfile = newfile
            return # abort as everything already done for object creation 
        
-        newfile.save()  # until we do this there is no internal id so no foreign key works
+        newfile.save()  # until we do this there is no internal .id so no foreign key works
        self.currentsurvexfile = newfile
        newfile.primary = self.set_primary(headpath)
        
@@ -1924,7 +2101,7 @@ class LoadingSurvex:
        nlegstotal = 0
        self.relativefilename = path

-        self._pending_block_saves = {} # Cache for  survex blocks to save at the end
+        # self._pending_block_saves = {} # Cache for  survex blocks to save at the end
        #self.IdentifyCave(path, svxid, depth)  # this will produce null for survex files which are geographic collections

        self.currentsurvexfile = survexblock.survexfile
@@ -2024,7 +2201,6 @@ class LoadingSurvex:

        def starstatement(star, fullline):
            import time
-            # ...existing code...
            """Interprets a survex comamnd where * is the first character on the line, e.g. *begin"""
            nonlocal survexblock
            nonlocal blk_name
@@ -2075,8 +2251,9 @@ class LoadingSurvex:
                newsurvexblock.title = (
                    "(" + survexblock.title + ")"
                )  # copy parent inititally, overwrite if it has its own
+                self.cache_survexblock(newsurvexblock) # note for later saving in db
                survexblock = newsurvexblock
-                survexblock.save()  # Only save once, after all fields are set, or try to delay until *end using caches
+                # Only save to db once, after all fields are set
                tickle()

            # ---------------------------END
@@ -2090,10 +2267,8 @@ class LoadingSurvex:

                self.fix_undated(survexblock)
                self.fix_anonymous(survexblock)    
-                self.confirm_team_on_trip(survexblock)
                self.process_pending_team(survexblock)
-                self.cache_survexblock(survexblock)
-               # POP  state ++++++++++++++
+                # POP  state ++++++++++++++
                popblock()
                self.inheritteam = self.teaminheritstack.pop()
                self.currentteam = self.teamcurrentstack.pop()
@@ -2200,24 +2375,9 @@ class LoadingSurvex:


        # At the end of the whole (concatenated) file, save all cached survexblocks using bulk_update
-        blocks = []
-        for blockid in self._pending_block_saves:
-            blocks.append(self._pending_block_saves[blockid])
-        if blocks:
-            # valid_blocks = []
-            # for block in blocks:
-                # try:
-                    # block.full_clean()
-                    # valid_blocks.append(block)
-                # except ValidationError as e:
-                    # print(f"  ! Block {block} is invalid: {e}", file=sys.stderr)
-                    # print(f"  ! Block {block} is invalid: {e}")
-            try:
-                BATCH_SIZE = 900
-                for i in range(0, len(blocks), BATCH_SIZE):
-                    SurvexBlock.objects.bulk_update(blocks[i:i+BATCH_SIZE], ["legsall", "legslength", "parent"])
-            except Exception as e:
-                print(f"\n !! Error in bulk_update for survexblocks: {e}", file=sys.stderr)
+
+        self.save_survexblocks_to_db()
+        self.save_personroles_to_db()

    def PushdownStackScan(self, survexblock, path, finname, flinear, io_collate):
        """Follows the *include links in all the survex files from the root file  (usually 1623.svx)
@@ -2604,6 +2764,7 @@ def FindAndLoadSurvex():
    io_collate.write(f";*edulcni {survexfileroot.path}\n")
    
    svx_scan.check_cache_clean()
+    store_data_issues(svx_scan)
    
    mem1 = get_process_memory()
    flinear.write(f"\n    - MEM:{mem1:.2f} MB STOP {survexfileroot.path}\n")
@@ -2721,6 +2882,7 @@ def FindAndLoadSurvex():
    io_collate.write(f";*edulcni {UNSEENS}\n")

    omit_scan.check_cache_clean()
+    store_data_issues(omit_scan)

    mem1 = get_process_memory()
    flinear.write(f"\n    - MEM:{mem1:.2f} MB STOP {UNSEENS} Unseen Oddments\n")
@@ -3010,6 +3172,7 @@ def parse_one_file(fpath): # --------------------------------------in progress--
            if len(sbs)<1:
                print(f"    ! No survex blocks found. Parser failure...")
            for sb in sbs:
+                print(f"    - {sb.id} re-setting survex block parent {sb=}", file=sys.stderr)
                print(f"    - {sb.id} re-setting survex block parent {sb=}")
                sb.parent = existingparent # should be all the same
                sb.save()
@@ -3227,7 +3390,7 @@ def LoadSurvexBlocks():
 
    # duration = time.time() - start
    # print(f" - TIME: {duration:7.2f} s", file=sys.stderr)
-    store_data_issues()
+    
    # duration = time.time() - start
    # print(f" - TIME: {duration:7.2f} s", file=sys.stderr)
    if dup_includes > 0: