diff --git a/core/models/caves.py b/core/models/caves.py index bf495bc..a8dc3ec 100644 --- a/core/models/caves.py +++ b/core/models/caves.py @@ -515,7 +515,7 @@ def GetCaveLookup(): checkcaveid(cave, unoffn) if cave.filename: - # this is the slug - usually.. but usually done as as f'{cave.area}-{cave.kataster_number}' + # this is the slug - or should be fn = cave.filename.replace(".html", "").lower() checkcaveid(cave, fn) @@ -711,9 +711,15 @@ def GetCaveLookup(): ("hc", "2018-dm-07"), ("loveshack", "1626-2018-ad-03"), ("crushed-garlic", "1626-2018-ad-03"), - ("BuzzardHole", "1626-2023-buzzardhole"), - ("2023-BuzzardHole", "1626-2023-buzzardhole"), - ("1626-2023-BuzzardHole", "1626-2023-buzzardhole"), + + # Renaming cave ids which end in a letter + ("1623-2002-XX", "1623-2002-FB-01"), + ("1623-2002-X09B", "1623-2002-XB09"), + ("1623-2007-neu", "1623-2007-NEU-01"), + ("BuzzardHole", "1626-2023-BZ-01"), + ("2023-BuzzardHole", "1626-2023-BZ-01"), + ("1626-2023-BuzzardHole", "1626-2023-BZ-01"), + ("1626-2023-buzzardhole","1626-2023-BZ-01"), ] diff --git a/parsers/caves.py b/parsers/caves.py index c2559d4..ff54c41 100644 --- a/parsers/caves.py +++ b/parsers/caves.py @@ -509,18 +509,21 @@ def read_entrance(filename, ent=None): entrancecontents = entrancecontentslist[0] slugs = getXML(entrancecontents, "slug", context=context) - slug = slugs[0] + # we ignore all these, because we now just use the filename. But if they are there, we validate them. + if len(slugs) > 0 : + slug = slugs[0] - if len(slugs) >1: - # Only ever one of these per entrance in the expo dataset - message = f" ! - More than one slug for an entrance: {entrance}, slugs: {slugs}. Ignoring all except first." - DataIssue.objects.create(parser="entrances", message=message, url=cave_edit_url) - print(message) - - if slug != entslug_fn: - message = f" ! - Warning, mismatch between entrance slug and filename: {slug=} {filename=}. " - DataIssue.objects.create(parser="xEntrances", message=message, url=cave_edit_url) - print(message) + if len(slugs) >1: + # Only ever one of these per entrance in the expo dataset + message = f" ! - More than one slug for an entrance: {entrance}, slugs: {slugs}. Ignoring all of them." + DataIssue.objects.create(parser="entrances", message=message, url=cave_edit_url) + print(message) + + if slug != entslug_fn: + message = f" ! - Warning, mismatch between entrance slug and filename: {slug=} {filename=}. Ignoring slug field, using filename." + DataIssue.objects.create(parser="xEntrances", message=message, url=cave_edit_url) + print(message) + slug = entslug_fn # force lastvisit = getXML(entrancecontents, "lastvisit", maxItems=1, minItems=0, context=cave_edit_url) lat_wgs84 = getXML(entrancecontents, "lat_wgs84", maxItems=1, minItems=0, context=cave_edit_url) @@ -730,7 +733,7 @@ def read_cave(filename, mvf=None, cave=None): if slug == f"{areacode}-{unofficial_number}": return slug if slug.lower() == f"{areacode}-{unofficial_number.lower()}": - message = f" ! Cave Slug capitalisation incorrect (unofficial): '{slug}' != '{areacode}-{unofficial_number}' {url=} in file {filename}." + message = f" ! Cave Slug capitalisation incorrect (unofficial): '{slug}' != '{areacode}-{unofficial_number}' {url=} in file {filename}. IGNORING caveslug field in the .html file." correctslug = slug.lower() else: message = f" ! Cave Slug mismatch (unofficial): '{slug}' != '{areacode}-{unofficial_number}' {url=} in file {filename} IGNORING caveslug field in the .html file." @@ -741,10 +744,10 @@ def read_cave(filename, mvf=None, cave=None): mvtext = f"mv {filename} {correctslug}.html" #print(mvtext) if filename != f"{correctslug}.html" : - message = f" ! Filename is not the same as the cave slug '{slug}' != '{areacode}-{unofficial_number}' {url=} in file {filename} so use /tmp/mvscript.sh to fix." + message = f" ! Filename is not the same as the cave slug '{slug}' != '{areacode}-{unofficial_number}' {url=} in file {filename} IGNORING caveslug field in the .html file." DataIssue.objects.create(parser="caves", message=message, url=msgurl) # url here is for where the file actually is, for editing mvf.write(mvtext + "\n") - print(message) + print(message) return correctslug global entrances_xslug @@ -789,6 +792,12 @@ def read_cave(filename, mvf=None, cave=None): slug = filename[:-5] # strip off the ".html" at the end of the filename slugs = [slug] #print(f"{filename=} {slug=}") + #print(slug[-1].lower(), slug) + if slug[-1].lower() in LETTERS: + message = f" ! Cave name ends in a letter not a number. Fix this! in file {filename} " + DataIssue.objects.create(parser="caves", message=message, url=context) # url here is for where the file actually is, for editing + print(message) + non_public = getXMLmax1("non_public") @@ -809,8 +818,9 @@ def read_cave(filename, mvf=None, cave=None): extent = getXMLmax1("extent") survex_file = getXMLmax1("survex_file") description_file = getXMLmax1("description_file") - - contextguess = f"/{slug[0:4]}/{slug}_cave_edit/" # guess as we havent read areacode yet. This is used for error messages + areacode = slug[:4] + + contextguess = f"/{slug[0:4]}/{slug}_cave_edit/" manual_edit = True if not cave: @@ -837,15 +847,13 @@ def read_cave(filename, mvf=None, cave=None): do_caveslugstuff() # needs cave!=None # We no longer need the tag to define 1623 etc as we get that from the filename. - areas = getXML(cavecontents, "area", context=contextguess) # can be multiple tags + areas = getXML(cavecontents, "area", context=contextguess, minItems=0) # can be multiple tags for area_slug in areas: - if area_slug in AREACODES: # ignore sub areas which are in another tag - cave.areacode = area_slug - else: + if area_slug not in AREACODES: # only detect subareas cave.subarea = area_slug if not cave.areacode: - if slug[0:4] in AREACODES: - cave.areacode = slug[0:4] + if areacode in AREACODES: + cave.areacode = areacode context = f"/{cave.areacode}/{slug}_cave_edit/"