diff --git a/parsers/survex.py b/parsers/survex.py index bb41438..9b77fb1 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -220,29 +220,7 @@ def get_people_on_trip(survexblock): people.append(f"{p.personname}") return list(set(people)) - -_hack_set = set() -def hack_save(survexblock): - # #### Horrible hack to be properly written as a cache - sb_list =[] - print_list = [] - sb = survexblock - while sb.parent and sb != sb.parent: - if sb._blockid not in _hack_set: - sb_list.append((sb._blockid, sb)) - print_list.append(sb) - sb = sb.parent - if len(print_list) > 0: - pass - # print(f" ## Horrible QM pre-save hack: {len(print_list)} survexblocks up from {survexblock}", file=sys.stderr) - - sb_list.reverse() - for sbo in sb_list: - id, sb = sbo - sb.save() - _hack_set.add(sb._blockid) - # #### Horrible hack to be properly written as a cache - + class LoadingSurvex: """A 'survex block' is a *begin...*end set of cave data. A survex file can contain many begin-end blocks, which can be nested, and which can *include @@ -466,6 +444,7 @@ class LoadingSurvex: person_pending_cache = {} # indexed per survexblock UUID, so robust wrt PUSH/POP begin/end _pending_block_saves = OrderedDict() # not {}, retain topological sort order _pending_pr_saves = {} # a dict of lists indexed by survexblock UUID + _pending_qm_saves = {} # a dict of lists indexed by survexblock UUID def __init__(self): @@ -754,6 +733,46 @@ class LoadingSurvex: SurvexPersonRole.objects.bulk_create(valid_list) _pending_pr_saves = {} # in database now, so empty cache + + def save_qms_to_db(self): + """This should be run only after all the survexblocks have + been saved to the database and so have _id that can be used as a ForeignKey + """ + print(f" - Saving {len(self._pending_qm_saves)} QMs to db", file=sys.stderr) + for blk in self._pending_qm_saves: + + # Now commit to db + qm_list = self._pending_qm_saves[blk] + # print(f" PR_LIST {pr_list} {blk}", file=sys.stderr) + valid_list = [] + for qm in qm_list: + nqms = QM.objects.filter(cave=qm.cave, blockname=qm.blockname) + print(f"QM found a prior match {nqms}", file=sys.stderr) + try: + qm.full_clean() + valid_list.append(qm) + except ValidationError as e: + message = f" ! QM is invalid: {e} {survexblock} {qm}" + print(message, file=sys.stderr) + stash_data_issue( + parser="survex", + message=message, + url=None, sb=survexblock, + ) + for blk in self._pending_qm_saves: + try: + for qm in self._pending_qm_saves[blk]: + qm.save() + except Exception as e: + pass + raise + # try: + # QM.objects.bulk_create(valid_list) + # except Exception as e: + # pass + # raise + + _pending_qm_saves = {} # in database now, so empty cache def add_to_pending(self, survexblock, tm): """Collects team names. We might not have a date so cannot validate @@ -799,7 +818,8 @@ class LoadingSurvex: return self.inheritteam def fix_undated(self, survexblock): - """Called when we reach *end of a block OR when a QM is seen. + """Called when we reach *end of a block OR when a QM is seen + in an un-dated survex block. Checks to see if the block has no *date, in which case it uses the inherited date. This is fine if the inherited date is from the same SurvexFile, @@ -814,19 +834,23 @@ class LoadingSurvex: if survexblock.parent.name == "troggle_unseens": # Bolluxed up if we try to inherit from this random junk, so don't. return - + if self.currentdate: # already set if not survexblock.date: # error message = ( - f"! no survexblock.date but currentdate is set. ({survexblock})-{survexblock.survexfile.path} {self.currentdate=}" - ) + f"! no survexblock.date but currentdate is set. ({survexblock})-{survexblock.survexfile.path} {self.currentdate=}") print(self.insp + message) stash_data_issue( - parser="survex", message=message, url=None, sb=(survexblock.survexfile.path) - ) + parser="survex", message=message, url=None, sb=survexblock) return + + if survexblock.survexfile != survexblock.parent.survexfile: + # This is noteworthy, however. FORBID inheriting dates between files. NOT documented survex behaviour !! + survexblock.date = None + self.currentdate = None # unecessary duplication + return None if self.inheritdate: survexblock.date = self.inheritdate @@ -838,43 +862,12 @@ class LoadingSurvex: print(self.insp + message) # stash_data_issue( # parser="survex", message=message, url=None, sb=(survexblock.survexfile.path) # child - # ) - if survexblock.survexfile != survexblock.parent.survexfile: - # This is noteworthy, however. FORBID inheriting dates between files. NOT documented survex behaviour !! - survexblock.date = None - self.currentdate = None # unecessary duplication - return None - - # if survexblock.parent.name == "rootblock": - # # Not a sensible thing to inherit a date from, even if a date exists, which it shouldn't... - # message = ( - # f"- No *date. But not sensible to inherit from rootblock. From ({survexblock.parent})-{survexblock.parent.survexfile.path} to ({survexblock})-{survexblock.survexfile.path} {self.inheritdate:%Y-%m-%d}" - # ) - # print(self.insp + message) - # # stash_data_issue( - # # parser="survex", message=message, url=None, sb=(survexblock.survexfile.path) - # # ) - # return - # else: - # message = ( - # f"- Warning *date '{self.inheritdate:%Y-%m-%d}' INHERITED from DIFFERENT file:\n ({survexblock.parent})-{survexblock.parent.survexfile.path} to ({survexblock})-{survexblock.survexfile.path} {self.inheritdate:%Y-%m-%d}\n {self.stackbegin} {self.inheritdate:%Y-%m-%d}" - # ) - # print(self.insp + message) - # stash_data_issue( - # parser="survex", message=message, url=None, sb=(survexblock.survexfile.path) # not the parent - # ) - # return self.inheritdate + # ) else: # This is not an error in the Expo dataset. # Many files just holding *include lines do not have dates. # Hardly _any_ of the ARGE survex files have dates ! pass - # message = f" ! No survexblock.date inheritable in '{survexblock}' in '{survexblock.survexfile.path}', setting to 1976" - # print(self.insp + message) - # stash_data_issue( - # parser="survex", message=message, url=None, sb=survexblock - # ) - # expoyear = "1976" return def get_expo_for_block(self, survexblock): @@ -903,10 +896,15 @@ class LoadingSurvex: if survexblock.parent.name == "troggle_unseens": # Bolluxed up if we try to inherit from this random junk, so don't. return + if survexblock.survexfile != survexblock.parent.survexfile: + # This is noteworthy, however. FORBID inheriting between files. NOT documented survex behaviour !! + return if not self.currentteam: # i.e. if it is a dated block and has no team if teamnames := self.get_team_inherited(survexblock):# WALRUS self.person_pending_cache[survexblock._blockid] = teamnames + + return None return def cache_survexblock(self, survexblock): @@ -1944,19 +1942,24 @@ class LoadingSurvex: if survexblock.survexfile.cave: survexblock.survexfile.cave.slug() - self.fix_undated(survexblock) # null-op if already set + self.fix_undated(survexblock) # null-op if already set, inherits date if needed try: expoyear = str(survexblock.date.year) - except: - print(f">> why is survexblock not set ?! in LoadSurvexQM()/n {survexblock.survexfile.path}") + except Exception as e: + message = ( + f"! QM in undated survex block ({survexblock})-{survexblock.survexfile.path} {self.currentdate=}" + ) + print(self.insp + message) + stash_data_issue( + parser="survex", message=message, url=None, sb=survexblock) + # we could look at child blocks in the same survexfile and see if they have dates, + # and if all such things are int he same year, that would be unambiguous. + # But better to flag it as a DataIssue to be fixed in the survex file. + expoyear = settings.EPOCH.year # 1970 - - ### HORRIBLE HACK, replace with cache - hack_save(survexblock) - ### HORRIBLE HACK, replace with cache try: - qm = QM.objects.create( + qm = QM( number=qm_no, # nearest_station=a_survex_station_object, # can be null resolution_station_name=resolution_station_name, @@ -1969,22 +1972,13 @@ class LoadingSurvex: expoyear=expoyear, cave=survexblock.survexfile.cave, ) - qm.save - except: - qms = QM.objects.filter( - number=qm_no, - # nearest_station=a_survex_station_object, # can be null - resolution_station_name=resolution_station_name, - nearest_station_name=qm_nearest, - ticked=qm_ticked, - grade=qm_grade.upper(), - location_description=qm_notes, - block=survexblock, # only set for survex-imported QMs - blockname=blockname, # only set for survex-imported QMs - expoyear=expoyear, - cave=survexblock.survexfile.cave, - ) - message = f" ! QM{qm_no} FAIL to create {qm_nearest} in'{survexblock.survexfile.path}' found {len(qms)}:{qms}" + # qm.save + if survexblock._blockid not in self._pending_qm_saves: + self._pending_qm_saves[survexblock._blockid] = [] + self._pending_qm_saves[survexblock._blockid].append(qm) + + except Exception as e: + message = f" ! QM{qm_no} FAIL '{e}' to create {qm_nearest} in '{survexblock.survexfile.path}'" print(insp + message) stash_data_issue( parser="survex", message=message, url=None, sb=survexblock @@ -2352,6 +2346,7 @@ class LoadingSurvex: self.save_survexblocks_to_db() self.save_personroles_to_db() + self.save_qms_to_db() def PushdownStackScan(self, survexblock, path, finname, flinear, io_collate): """Follows the *include links in all the survex files from the root file (usually 1623.svx) @@ -3335,6 +3330,10 @@ def LoadSurvexBlocks(): SurvexFile.objects.all().delete() SurvexPersonRole.objects.all().delete() SurvexStation.objects.all().delete() + qms_to_go = QM.objects.filter(block__isnull=False) + print(f" - Flushing {len(qms_to_go)} previously loaded QMs") + qms_to_go .delete() + # QM.objects.all().delete() mem1 = get_process_memory() print(f" - MEM:{mem1:7.2f} MB now. Foreign key objects loaded on deletion. ", file=sys.stderr)