diff --git a/parsers/survex.py b/parsers/survex.py index 003760b..1360d4b 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -485,10 +485,11 @@ class LoadingSurvex: the foreign keys are enabled and one can do queries on the database. The sequence of survex blocks is constructed from the *include links, - depth first, so the list iof survex blocks is topologically sorted. + depth first, so the list of survex blocks is topologically sorted. HOWEVER what matters to the .parent links is the topological sorting of the *begin/*end inclusions, which may or may not match the *include sort - sequence. Yuk. + sequence. The survex dataset is currently OK, but pathological data + in future could expose this vulnerability. """ def get_toposorted_blocks(blocks): @@ -588,8 +589,8 @@ class LoadingSurvex: chunk, update_conflicts=True, # root item probably exists already # update_fields needed if we allow conflict update - update_fields=['name', 'title', 'parent', 'date', - 'expedition', 'survexfile', 'scanswallet', 'legsall', 'legslength', 'foreigners',], + update_fields=['name', 'title', 'parent', 'ref_text', 'date', + 'expedition', 'survexfile', 'scanswallet', 'legsall', 'legslength', 'foreigners',], # i.e. all of them unique_fields=['_blockid'] ) print(f" - {len(chunk)} SurvexBlocks bulk-created in db", file=sys.stderr) @@ -599,7 +600,7 @@ class LoadingSurvex: print(message) print(message, file=sys.stderr) stash_data_issue(parser="survex", message=message) - + # if failure, go and do them individually: sb_save_mysql() def sb_save_mysql(): @@ -608,7 +609,8 @@ class LoadingSurvex: for sb in chunk: sb.save() # simplest is best, if it works continue - + # Not using on MariaDB yet: tricky to get foreign keys working + # and survex blocks have a foreign key to itself: the "parent" got_obj, created = SurvexBlock.objects.get_or_create( _blockid=sb._blockid, ) # missing most fields @@ -667,7 +669,8 @@ class LoadingSurvex: # return try: SurvexPersonRole.objects.bulk_create(valid_list, - update_fields = ['survexblock', 'personname', 'person', 'personexpedition'], + update_fields = ['survexblock', 'personname', 'person', + 'personexpedition'], unique_fields = ['survexblock', 'personname', 'person', 'personexpedition'] ) print(f" - all {len(valid_list)} SurvexPersonRoles bulk-created to db", file=sys.stderr) @@ -684,7 +687,8 @@ class LoadingSurvex: ns = 0 for pr in valid_list: pr.save() # simplest is best, if it works - continue + continue + # Not using on MariaDB yet: tricky to get foreign keys working # This is not the complete set of fields we need: got_obj, created = SurvexPersonRole.objects.get_or_create( survexblock=pr.survexblock, @@ -721,7 +725,7 @@ class LoadingSurvex: parser="survex", message=message ) - print(f" - {len(valid_list)} SurvexPersonRoles in list", file=sys.stderr) + print(f" - {len(valid_list)} SurvexPersonRoles in ist, savibg.. ({connection.vendor})", file=sys.stderr) if connection.vendor == 'mysql': pr_save_mysql() else: @@ -733,28 +737,30 @@ class LoadingSurvex: """This should be run only after all the survexblocks have been saved to the database and so have _blockid that can be used as a ForeignKey - Actually we need to update all the fields, not just the co-unique ones, - so this is not working correctly at all. + Actually we need to create all the fields, not just the co-unique ones, + but that is the default. "update_fields" has effect only in conflicts. """ def qm_save_sqlite(): - qm_save_mysql() - return - # MariaDB/MySQL do not support (and don't need) unique_fields by sqlite neeeds them + # qm_save_mysql() + # return + # MariaDB/MySQL do not support (and don't need) unique_fields but sqlite needs them bulk_kwargs = { "update_conflicts": True, - "update_fields": ['block', 'cave', 'blockname', 'grade', 'number', 'expoyear'], + # https://docs.djangoproject.com/en/6.0/ref/models/querysets/#bulk-create + "update_fields": ['block', 'cave', 'blockname', 'grade', + 'number', 'expoyear'], # these updated IF a conflict fails, see documn. "unique_fields": ['cave', 'blockname', 'grade', 'number', 'expoyear'], } - bulk_kwargs try: - QM.objects.bulk_create(qms_valid_list, **bulk_kwargs) + QM.objects.bulk_create(qms_valid_list, **bulk_kwargs) + print(f" - QMs bulk-created in db", file=sys.stderr) except Exception as e: message = f"\n ! - EXCEPTION '{e}' - in QM bulk update. Falling back onto sequential updates" print(message) print(message, file=sys.stderr) stash_data_issue(parser="survex", message=message) - + # if failure, go and do them individually: qm_save_mysql() def qm_save_mysql(): @@ -763,8 +769,8 @@ class LoadingSurvex: for qm in qms_valid_list: qm.save() # simplest is best, if it works continue - - # This is not the complete set of fields we need: + # Not using on MariaDB yet: tricky to get foreign keys working + # This is not the complete set of fields we need. Needs extendin got_obj, created = QM.objects.get_or_create( cave_id=qm.cave_id, blockname=qm.blockname, @@ -786,7 +792,7 @@ class LoadingSurvex: qms = [] for blk in self._pending_qm_saves: qms += self._pending_qm_saves[blk] - print(f" - Saving {len(qms)} QMs to db", file=sys.stderr) + print(f" - Saving {len(qms)} QMs to db, saving.. ({connection.vendor})", file=sys.stderr) qms_valid_list = self.validify(qms, "QM") if connection.vendor == 'mysql':