diff --git a/core/models/caves.py b/core/models/caves.py
index bf495bc..a8dc3ec 100644
--- a/core/models/caves.py
+++ b/core/models/caves.py
@@ -515,7 +515,7 @@ def GetCaveLookup():
checkcaveid(cave, unoffn)
if cave.filename:
- # this is the slug - usually.. but usually done as as f'{cave.area}-{cave.kataster_number}'
+ # this is the slug - or should be
fn = cave.filename.replace(".html", "").lower()
checkcaveid(cave, fn)
@@ -711,9 +711,15 @@ def GetCaveLookup():
("hc", "2018-dm-07"),
("loveshack", "1626-2018-ad-03"),
("crushed-garlic", "1626-2018-ad-03"),
- ("BuzzardHole", "1626-2023-buzzardhole"),
- ("2023-BuzzardHole", "1626-2023-buzzardhole"),
- ("1626-2023-BuzzardHole", "1626-2023-buzzardhole"),
+
+ # Renaming cave ids which end in a letter
+ ("1623-2002-XX", "1623-2002-FB-01"),
+ ("1623-2002-X09B", "1623-2002-XB09"),
+ ("1623-2007-neu", "1623-2007-NEU-01"),
+ ("BuzzardHole", "1626-2023-BZ-01"),
+ ("2023-BuzzardHole", "1626-2023-BZ-01"),
+ ("1626-2023-BuzzardHole", "1626-2023-BZ-01"),
+ ("1626-2023-buzzardhole","1626-2023-BZ-01"),
]
diff --git a/parsers/caves.py b/parsers/caves.py
index c2559d4..ff54c41 100644
--- a/parsers/caves.py
+++ b/parsers/caves.py
@@ -509,18 +509,21 @@ def read_entrance(filename, ent=None):
entrancecontents = entrancecontentslist[0]
slugs = getXML(entrancecontents, "slug", context=context)
- slug = slugs[0]
+ # we ignore all these, because we now just use the filename. But if they are there, we validate them.
+ if len(slugs) > 0 :
+ slug = slugs[0]
- if len(slugs) >1:
- # Only ever one of these per entrance in the expo dataset
- message = f" ! - More than one slug for an entrance: {entrance}, slugs: {slugs}. Ignoring all except first."
- DataIssue.objects.create(parser="entrances", message=message, url=cave_edit_url)
- print(message)
-
- if slug != entslug_fn:
- message = f" ! - Warning, mismatch between entrance slug and filename: {slug=} {filename=}. "
- DataIssue.objects.create(parser="xEntrances", message=message, url=cave_edit_url)
- print(message)
+ if len(slugs) >1:
+ # Only ever one of these per entrance in the expo dataset
+ message = f" ! - More than one slug for an entrance: {entrance}, slugs: {slugs}. Ignoring all of them."
+ DataIssue.objects.create(parser="entrances", message=message, url=cave_edit_url)
+ print(message)
+
+ if slug != entslug_fn:
+ message = f" ! - Warning, mismatch between entrance slug and filename: {slug=} {filename=}. Ignoring slug field, using filename."
+ DataIssue.objects.create(parser="xEntrances", message=message, url=cave_edit_url)
+ print(message)
+ slug = entslug_fn # force
lastvisit = getXML(entrancecontents, "lastvisit", maxItems=1, minItems=0, context=cave_edit_url)
lat_wgs84 = getXML(entrancecontents, "lat_wgs84", maxItems=1, minItems=0, context=cave_edit_url)
@@ -730,7 +733,7 @@ def read_cave(filename, mvf=None, cave=None):
if slug == f"{areacode}-{unofficial_number}":
return slug
if slug.lower() == f"{areacode}-{unofficial_number.lower()}":
- message = f" ! Cave Slug capitalisation incorrect (unofficial): '{slug}' != '{areacode}-{unofficial_number}' {url=} in file {filename}."
+ message = f" ! Cave Slug capitalisation incorrect (unofficial): '{slug}' != '{areacode}-{unofficial_number}' {url=} in file {filename}. IGNORING caveslug field in the .html file."
correctslug = slug.lower()
else:
message = f" ! Cave Slug mismatch (unofficial): '{slug}' != '{areacode}-{unofficial_number}' {url=} in file {filename} IGNORING caveslug field in the .html file."
@@ -741,10 +744,10 @@ def read_cave(filename, mvf=None, cave=None):
mvtext = f"mv {filename} {correctslug}.html"
#print(mvtext)
if filename != f"{correctslug}.html" :
- message = f" ! Filename is not the same as the cave slug '{slug}' != '{areacode}-{unofficial_number}' {url=} in file {filename} so use /tmp/mvscript.sh to fix."
+ message = f" ! Filename is not the same as the cave slug '{slug}' != '{areacode}-{unofficial_number}' {url=} in file {filename} IGNORING caveslug field in the .html file."
DataIssue.objects.create(parser="caves", message=message, url=msgurl) # url here is for where the file actually is, for editing
mvf.write(mvtext + "\n")
- print(message)
+ print(message)
return correctslug
global entrances_xslug
@@ -789,6 +792,12 @@ def read_cave(filename, mvf=None, cave=None):
slug = filename[:-5] # strip off the ".html" at the end of the filename
slugs = [slug]
#print(f"{filename=} {slug=}")
+ #print(slug[-1].lower(), slug)
+ if slug[-1].lower() in LETTERS:
+ message = f" ! Cave name ends in a letter not a number. Fix this! in file {filename} "
+ DataIssue.objects.create(parser="caves", message=message, url=context) # url here is for where the file actually is, for editing
+ print(message)
+
non_public = getXMLmax1("non_public")
@@ -809,8 +818,9 @@ def read_cave(filename, mvf=None, cave=None):
extent = getXMLmax1("extent")
survex_file = getXMLmax1("survex_file")
description_file = getXMLmax1("description_file")
-
- contextguess = f"/{slug[0:4]}/{slug}_cave_edit/" # guess as we havent read areacode yet. This is used for error messages
+ areacode = slug[:4]
+
+ contextguess = f"/{slug[0:4]}/{slug}_cave_edit/"
manual_edit = True
if not cave:
@@ -837,15 +847,13 @@ def read_cave(filename, mvf=None, cave=None):
do_caveslugstuff() # needs cave!=None
# We no longer need the tag to define 1623 etc as we get that from the filename.
- areas = getXML(cavecontents, "area", context=contextguess) # can be multiple tags
+ areas = getXML(cavecontents, "area", context=contextguess, minItems=0) # can be multiple tags
for area_slug in areas:
- if area_slug in AREACODES: # ignore sub areas which are in another tag
- cave.areacode = area_slug
- else:
+ if area_slug not in AREACODES: # only detect subareas
cave.subarea = area_slug
if not cave.areacode:
- if slug[0:4] in AREACODES:
- cave.areacode = slug[0:4]
+ if areacode in AREACODES:
+ cave.areacode = areacode
context = f"/{cave.areacode}/{slug}_cave_edit/"