diff --git a/core/models/survex.py b/core/models/survex.py index d5be489..3a1d2a4 100644 --- a/core/models/survex.py +++ b/core/models/survex.py @@ -219,8 +219,8 @@ class SurvexBlock(models.Model): """ objects = SurvexBlockLookUpManager() # overwrites SurvexBlock.objects and enables lookup() - name = models.CharField(max_length=100) - title = models.CharField(max_length=200) + name = models.CharField(blank=True, max_length=100) + title = models.CharField(blank=True, max_length=200) parent = models.ForeignKey("SurvexBlock", blank=True, null=True, on_delete=models.SET_NULL, db_index=True) ref_text = models.CharField(max_length=400, blank=True, null=True) @@ -232,8 +232,8 @@ class SurvexBlock(models.Model): # survexpath = models.CharField(max_length=200, blank=True, null=True) No need for this anymore scanswallet = models.ForeignKey( - "Wallet", null=True, on_delete=models.SET_NULL, db_index=True - ) # only ONE wallet per block. The most recent seen overwites.. ugh. + "Wallet", blank=True, null=True, on_delete=models.SET_NULL, db_index=True + ) # only ONE wallet per block. The most recent seen overwites.. ugh. Should fix this sometime. legsall = models.IntegerField(null=True) # summary data for this block legslength = models.FloatField(null=True) diff --git a/parsers/survex.py b/parsers/survex.py index fb4142a..796161b 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -9,6 +9,8 @@ import time from datetime import date, datetime, timezone from pathlib import Path +from django.core.exceptions import ValidationError + import troggle.settings as settings from troggle.core.models.caves import Cave, Entrance, GetCaveLookup from troggle.core.models.logbooks import QM @@ -47,6 +49,9 @@ todo = """ repeated readings from distox etc.. Not actually useful for pre 2022 survey data, but good future-proofing. Also it will be a tiny bit more accurate as these leg lengths are after loop closure fixup. + +- in models.survex only ONE wallet per block. The most recent seen overwites.. ugh. Should fix this sometime. + """ survexblockroot = None ROOTBLOCK = "rootblock" @@ -91,6 +96,8 @@ def datewallet(w, earliest): global survexblock_cache if survexblock_cache is None: # Build cache: {scanswallet_id: [SurvexBlock, ...]} + # This assumes that all the survexblocks have been parsed by now + # which should probably be done more explicitly at a higher level survexblock_cache = {} for b in SurvexBlock.objects.all().select_related("survexfile", "scanswallet"): if b.scanswallet_id not in survexblock_cache: @@ -110,11 +117,19 @@ def datewallet(w, earliest): return w.date def set_walletdate(w): + '''A valid wallet may not necessarily have a valid date, though of course + in an ideal world it should. + ''' earliest = datetime.now().date() - if not w.date(): # sets .walletdate as a side-effect if it gets it from JSON + # try: + # d = w.date + # except Exception as e: + # print(f" !! BAD DATE {e}\n {w.date} {w=}", file=sys.stderr) + if w.date: # .date is a function which sets .walletdate as a side-effect if it gets it from JSON, or returns None d = datewallet(w, earliest) # Not in JSON, so checks all the survex blocks - w.walletdate = d - w.save(update_fields=["walletdate"]) + if d: + w.walletdate = d + w.save(update_fields=["walletdate"]) def stash_data_issue(parser=None, message=None, url=None, sb=None): """Avoid hitting the database for error messages until the end of the import @@ -1321,12 +1336,13 @@ class LoadingSurvex: else: check_reused_wallet() else: - survexblock.scanswallet = manywallets[0] # this is a ForeignKey field - # Only save if changed - survexblock.save(update_fields=["scanswallet"]) - # This is where we should check that the wallet JSON contains a link to the survexfile - # and that the JSON date and walletdate are set correctly to the survexblock date. - set_walletdate(survexblock.scanswallet) + if manywallets[0]: + survexblock.scanswallet = manywallets[0] # this is a ForeignKey field + # Only save if changed + survexblock.save(update_fields=["scanswallet"]) + # This is where we should check that the wallet JSON contains a link to the survexfile + # and that the JSON date and walletdate are set correctly to the survexblock date. + set_walletdate(survexblock.scanswallet) else: perps = get_people_on_trip(survexblock) message = f" ! Wallet *REF bad in '{survexblock.survexfile.path}' '{refscan}' NOT in database i.e. wallet does not exist {perps}." @@ -1821,9 +1837,8 @@ class LoadingSurvex: nlegstotal = 0 self.relativefilename = path - # Cache for parent survex blocks to save at the end + # Cache for survex blocks to save at the end self._pending_parent_saves = set() - # Cache for survexblocks to save at the end (legsall/legslength) self._pending_block_saves = set() #self.IdentifyCave(path, svxid, depth) # this will produce null for survex files which are geographic collections @@ -1975,7 +1990,6 @@ class LoadingSurvex: survexblock = newsurvexblock survexblock.save() # Only save once, after all fields are set tickle() - # ...timing removed... # ---------------------------END elif self.rx_end.match(cmd): @@ -1991,16 +2005,16 @@ class LoadingSurvex: # This is the most time-consuming step within *end processing: 47% # Instead of saving parent here, cache for later if hasattr(survexblock, 'parent') and survexblock.parent: - self._pending_parent_saves.add(survexblock.parent) + self._pending_block_saves.add(survexblock) try: # This is the second most time-consuming step within *end processing: 35% - survexblock.save(update_fields=["legsall", "legslength"]) # Only update changed fields + self._pending_block_saves.add(survexblock) + # update_fields=["legsall", "legslength"] except Exception: print(f"{survexblock=}", file=sys.stderr) raise confirm_team_on_trip(survexblock) # POP state ++++++++++++++ - # ...timing removed... popblock() self.inheritteam = self.teaminheritstack.pop() self.currentteam = self.teamcurrentstack.pop() @@ -2112,19 +2126,20 @@ class LoadingSurvex: # At the end, save all cached survexblocks using bulk_update blocks = list(getattr(self, '_pending_block_saves', set())) if blocks: + # valid_blocks = [] + # for block in blocks: + # try: + # block.full_clean() + # valid_blocks.append(block) + # except ValidationError as e: + # print(f" ! Block {block} is invalid: {e}", file=sys.stderr) + # print(f" ! Block {block} is invalid: {e}") try: - SurvexBlock.objects.bulk_update(blocks, ["legsall", "legslength"]) + BATCH_SIZE = 900 + for i in range(0, len(valid_blocks), BATCH_SIZE): + SurvexBlock.objects.bulk_update(valid_blocks[i:i+BATCH_SIZE], ["legsall", "legslength", "parent"]) except Exception as e: - print(f"Error in bulk_update for survexblocks: {e}", file=sys.stderr) - - # Then save all cached parent survexblocks using bulk_update - parents = list(getattr(self, '_pending_parent_saves', set())) - if parents: - try: - SurvexBlock.objects.bulk_update(parents) - # SurvexBlock.objects.bulk_update(parents, [f.name for f in SurvexBlock._meta.fields if f.name != 'id']) - except Exception as e: - print(f"Error in bulk_update for parent blocks: {e}", file=sys.stderr) + print(f"\n !! Error in bulk_update for survexblocks: {e}", file=sys.stderr) def PushdownStackScan(self, survexblock, path, finname, flinear, io_collate): """Follows the *include links in all the survex files from the root file (usually 1623.svx)