From 704ff8335dc78b497586e773c4ed7a596672b920 Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Tue, 2 Jul 2024 20:01:15 +0300 Subject: [PATCH] still some bugs in cave ids --- core/models/caves.py | 17 +++++--- parsers/caves.py | 100 ++++++++++++++++++++++++++++--------------- 2 files changed, 75 insertions(+), 42 deletions(-) diff --git a/core/models/caves.py b/core/models/caves.py index 691959b..c9405ee 100644 --- a/core/models/caves.py +++ b/core/models/caves.py @@ -108,6 +108,7 @@ class Cave(TroggleModel): ordering = ("kataster_code", "unofficial_number") def slug(self): + return self.newslug() primarySlugs = self.caveslug_set.filter(primary=True) if primarySlugs: return primarySlugs[0].slug @@ -115,6 +116,8 @@ class Cave(TroggleModel): slugs = self.caveslug_set.filter() if slugs: return slugs[0].slug + else: + return str(self.id) def newslug(self): return f"{self.areacode}-{self.number()}" @@ -521,9 +524,12 @@ def GetCaveLookup(): checkcaveid(cave, fn) if cave.slug(): - # also possibly done already - slug = cave.slug().lower() - checkcaveid(cave, slug) + # also possibly done already. checking for weird slug values.. + try: + slug = cave.slug().lower() + checkcaveid(cave, slug) + except: + print(cave, cave.slug()) # These might alse create more duplicate entries # Yes, this should be set in, and imported from, an easily editable file @@ -667,9 +673,7 @@ def GetCaveLookup(): ("gsh", "1623-291"), ("1623-2023-lc-01", "1623-318"), ("tempest", "1623-2023-lc-01"), - - ("1623-2023-kt-02", "2023-kt-02"), - + # from the git output after Becka's changes, used to construct this list.. # rename caves-1623/{2023-ASH-15/2023-ASH-15.svx => 303/303.svx} (94%) # rename caves-1623/{2023-mg-02/2023-mg-02.svx => 304/304.svx} (90%) @@ -753,7 +757,6 @@ def GetCaveLookup(): DataIssue.objects.update_or_create(parser="aliases", message=message) else: message = f" * Coding or cave existence mistake, cave for id '{alias}' does not exist. Expecting to set alias '{key}' to it" - print(message) DataIssue.objects.update_or_create(parser="aliases", message=message) addmore = {} diff --git a/parsers/caves.py b/parsers/caves.py index da55ea7..efaa0e9 100644 --- a/parsers/caves.py +++ b/parsers/caves.py @@ -458,7 +458,7 @@ def read_entrance(filename, ent=None): /1623/1/1623-1_cave_edit/ Args: - filename: The name of the .html file. + filename: The name of the entrance_data .html file, e.g. 1623-JS-01a.html ent: The entrance object, if it already exists. Returns: @@ -479,10 +479,8 @@ def read_entrance(filename, ent=None): contents = f.read() context = filename - - - # Derive the letter, entrance slug and cave slug fromthe filename - entslug_fn = filename[:-5] + # Derive the letter, entrance slug and cave slug from the filename + entslug_fn = filename[:-5] # remove .html if entslug_fn[-1] in LETTERS: caveslug_fn = entslug_fn[:-1] letter_fn = entslug_fn[-1] @@ -490,18 +488,30 @@ def read_entrance(filename, ent=None): caveslug_fn = entslug_fn letter_fn = "" - cave_name_fn = caveslug_fn[5:] # remove initial "1623-" ent_area = filename[:4] + cave_name = caveslug_fn[5:] # remove initial 1623- + + ent_edit_url = f"/{caveslug_fn}:{entslug_fn}_entrance_edit" + cave_edit_url = f"/{ent_area}/{cave_name}/{cave_name}_cave_edit" - ent_edit_url = f"/{caveslug_fn}:{entslug_fn}_entrance_edit" - cave_edit_url = f"/{ent_area}/{cave_name_fn}/{caveslug_fn}_cave_edit" # validate filename, check areacode if ent_area not in AREACODES: message = f'! BAD AREA CODE in "{filename}". Not recognised.' DataIssue.objects.create(parser="entrances", message=message, url=ent_edit_url) print(message) - + + # New system 2024, create the Cave object when parsing Entrances, not Caves + cave = make_cave(caveslug_fn) + + # try: + # cs = CaveSlug.objects.update_or_create(cave=cave, slug=caveslug_fn, primary=True) + # except Exception as ex: + # #raise + # # This fails to do an update! It just crashes.. to be fixed + # message = f" ! Entrances: CaveSlug {cave} update/create failure : {caveslug_fn}, skipping cave_data file {filename} with exception\nException: {ex.__class__}" + # DataIssue.objects.create(parser="caves", message=message, url=context) + # print(message) entrancecontentslist = getXML(contents, "entrance", maxItems=1, context=context) if len(entrancecontentslist) != 1: @@ -511,7 +521,7 @@ def read_entrance(filename, ent=None): return None entrancecontents = entrancecontentslist[0] - slugs = getXMLmin0("slug") + slugs = getXMLmin0("slug") # not the full slug, just the id: i.e. without the 1623- prefix # we ignore all these, because we now just use the filename. But if they are there, we validate them. if len(slugs) > 0 : slug = slugs[0] @@ -590,6 +600,27 @@ def read_entrance(filename, ent=None): ent.save() return ent +def make_cave(slug): + """Making a Cave object, but when we have multiple entrances, the Cave object may already exist""" + filename = f"{slug}.html" + try: + cave, state = Cave.objects.update_or_create(filename=filename) # replace with slug when CaveSlug tidied up + #print(f" - created:{state} cave.id:{cave} with {filename=}") + + except: + print(f" ! FAILED to get only one CAVE in db when updating using: {filename} or not found.") + kaves = Cave.objects.all().filter(filename=filename) # replace with slug when CaveSlug tidied up + for k in kaves: + message = " ! - DUPLICATES in db. kataster:" + str(k.kataster_number) + ", slug:" + str(k.slug()) + DataIssue.objects.create(parser="caves", message=message, url=context) + print(message) + for k in kaves: + if k.slug() is not None: + print(" ! - OVERWRITING this one: slug:" + str(k.slug())) + k.notes = "DUPLICATE kataster number found on import. Please fix\n" + k.notes + cave = k + return cave + def read_cave(filename, mvf=None, cave=None): """Reads an entrance description from the .html file Convoluted. Sorry. Needs rewriting @@ -687,11 +718,12 @@ def read_cave(filename, mvf=None, cave=None): read_entrance(entrance.filename, ent=entrance) entrance.save() - def do_caveslugstuff(): + def do_caveslugstuff(context): """This may be a fossil. We only have one slug per cave in troggle. - Pending destruction of this whole concept and Class CaveSlug - What is Class CaveSlug for? - """ + Pending destruction of this whole concept and Class CaveSlug + What is Class CaveSlug for? + """ + return primary = True # this sets the first thing we find to be primary=True and all the others =False for slug in slugs: if slug in caves_xslug: @@ -703,10 +735,10 @@ def read_cave(filename, mvf=None, cave=None): except Exception as ex: #raise # This fails to do an update! It just crashes.. to be fixed - message = f" ! CaveSlug update/create failure : {slug}, skipping file cave_data/{context} with exception\nException: {ex.__class__}" - DataIssue.objects.create(parser="caves", message=message, url=f"{cave.url}_edit/") + message = f" ! CaveSlug update/create failure : {slug}, skipping cave_data file {slug} with exception\nException: {ex.__class__}" + DataIssue.objects.create(parser="caves", message=message, url=context) print(message) - primary = False + primary = False def check_directory(areacode, caveid, url, cave): dir = Path(settings.EXPOWEB, areacode, caveid) @@ -820,26 +852,24 @@ def read_cave(filename, mvf=None, cave=None): manual_edit = True if not cave: # we are parsing using databaseReset.py not an online edit - # we have already checked for uniqueness so we do not need the 'update' thinggy + # we have already checked for uniqueness but the Cave object may/should be already created by the Entrance parsing manual_edit = False - - try: - cave, state = Cave.objects.update_or_create(filename=filename) # replace with slug when CaveSlug tidied up - except: - print(" ! FAILED to get only one CAVE in db when updating using: " + filename) - kaves = Cave.objects.all().filter(filename=filename) # replace with slug when CaveSlug tidied up - for k in kaves: - message = " ! - DUPLICATES in db. kataster:" + str(k.kataster_number) + ", slug:" + str(k.slug()) - DataIssue.objects.create(parser="caves", message=message, url=context) - print(message) - for k in kaves: - if k.slug() is not None: - print(" ! - OVERWRITING this one: slug:" + str(k.slug())) - k.notes = "DUPLICATE kataster number found on import. Please fix\n" + k.notes - cave = k + + # The Cave object might be known by another (alias) name + caves = Cave.objects.filter(filename=filename) + if len(caves) ==1: + cave = caves[0] + else: + c = Cave.objects.filter(filename=filename.lower()) + if len(c) ==1: + cave = c[0] + else: + print(f" * Cannot find single Cave object for cave_data/{filename} from entrance_data file. {len(caves)} found") + return False + # From here on the code applies to both edited and newly-imported caves (mostly!) - do_caveslugstuff() # needs cave!=None + do_caveslugstuff(context) # needs cave!=None # We no longer need the tag to define 1623 etc as we get that from the filename. areas = getXML(cavecontents, "area", context=context, minItems=0) # can be multiple tags @@ -855,7 +885,7 @@ def read_cave(filename, mvf=None, cave=None): cave.kataster_code=kataster_code[0] if "+" in kataster_code[0]: cave.fully_explored = True - print(f"{kataster_code[0]} {slug}") + # print(f"{kataster_code[0]} {slug}") cave.kataster_number=kataster_number[0] cave.unofficial_number=unofficial_number[0] cave.explorers=explorers[0]