From 9237a6262ef310d57df6e40631c7de738cdc2f05 Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Sun, 7 Jun 2020 17:49:58 +0100 Subject: [PATCH] Make import robust against duplicate kataster numbers --- core/views_caves.py | 3 ++- parsers/caves.py | 59 ++++++++++++++++++++++++++++----------------- urls.py | 2 +- 3 files changed, 40 insertions(+), 24 deletions(-) diff --git a/core/views_caves.py b/core/views_caves.py index 8613790..4853dbe 100644 --- a/core/views_caves.py +++ b/core/views_caves.py @@ -68,7 +68,8 @@ def numericalcmp(x, y): def caveKey(x): """python3 function for sort. - Note that cave kataster numbers are not generally integers + Note that cave kataster numbers are not generally integers. + This needs to be fixed make a decent sort order. """ return x.kataster_number diff --git a/parsers/caves.py b/parsers/caves.py index 0d2ddfb..22b195e 100644 --- a/parsers/caves.py +++ b/parsers/caves.py @@ -122,26 +122,41 @@ def readcave(filename): url = getXML(cavecontents, "url", maxItems = 1, context = context) entrances = getXML(cavecontents, "entrance", context = context) if len(non_public) == 1 and len(slugs) >= 1 and len(official_name) == 1 and len(areas) >= 1 and len(kataster_code) == 1 and len(kataster_number) == 1 and len(unofficial_number) == 1 and len(explorers) == 1 and len(underground_description) == 1 and len(equipment) == 1 and len(references) == 1 and len(survey) == 1 and len(kataster_status) == 1 and len(underground_centre_line) == 1 and len(notes) == 1 and len(length) == 1 and len(depth) == 1 and len(extent) == 1 and len(survex_file) == 1 and len(description_file ) == 1 and len(url) == 1 and len(entrances) >= 1: - c, state = models_caves.Cave.objects.update_or_create(non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]], - official_name = official_name[0], - kataster_code = kataster_code[0], - kataster_number = kataster_number[0], - unofficial_number = unofficial_number[0], - explorers = explorers[0], - underground_description = underground_description[0], - equipment = equipment[0], - references = references[0], - survey = survey[0], - kataster_status = kataster_status[0], - underground_centre_line = underground_centre_line[0], - notes = notes[0], - length = length[0], - depth = depth[0], - extent = extent[0], - survex_file = survex_file[0], - description_file = description_file[0], - url = url[0], - filename = filename) + try: + c, state = models_caves.Cave.objects.update_or_create(non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]], + official_name = official_name[0], + kataster_code = kataster_code[0], + kataster_number = kataster_number[0], + unofficial_number = unofficial_number[0], + explorers = explorers[0], + underground_description = underground_description[0], + equipment = equipment[0], + references = references[0], + survey = survey[0], + kataster_status = kataster_status[0], + underground_centre_line = underground_centre_line[0], + notes = notes[0], + length = length[0], + depth = depth[0], + extent = extent[0], + survex_file = survex_file[0], + description_file = description_file[0], + url = url[0], + filename = filename) + except: + # need to cope with duplicates + print(" ! FAILED to get only one cave when updating using: "+filename) + kaves = models_caves.Cave.objects.all().filter(kataster_number=kataster_number[0]) + for k in kaves: + message = " ! - DUPLICATES in db. kataster:"+ str(k.kataster_number) + ", slug:" + str(k.slug()) + models.DataIssue.objects.create(parser='caves', message=message) + print(message) + for k in kaves: + if k.slug() != None: + print(" ! - OVERWRITING this one: slug:"+ str(k.slug())) + k.notes = "DUPLICATE kataster number found on import. Please fix\n" + k.notes + c = k + for area_slug in areas: area = models_caves.Area.objects.filter(short_name = area_slug) if area: @@ -157,7 +172,7 @@ def readcave(filename): slug = slug, primary = primary) except: - message = " ! Can't find text (slug): %s, skipping %s" % (slug, context) + message = " ! Cave update/create failure: %s, skipping file %s" % (slug, context) models.DataIssue.objects.create(parser='caves', message=message) print(message) @@ -169,7 +184,7 @@ def readcave(filename): entrance = models_caves.Entrance.objects.get(entranceslug__slug = slug) ce = models_caves.CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance) except: - message = " ! Entrance text (slug) %s missing %s" % (slug, context) + message = " ! Entrance setting failure, slug: %s letter: %s" % (slug, letter) models.DataIssue.objects.create(parser='caves', message=message) print(message) diff --git a/urls.py b/urls.py index 7d28763..95f35a4 100644 --- a/urls.py +++ b/urls.py @@ -23,7 +23,7 @@ actualurlpatterns = patterns('', url(r'^troggle$', views_other.frontpage, name="frontpage"), - url(r'^caves/?$', views_caves.caveindex, name="caveindex"), + url(r'^caves$', views_caves.caveindex, name="caveindex"), url(r'^people/?$', views_logbooks.personindex, name="personindex"), url(r'^newqmnumber/?$', views_other.ajax_QM_number, ),