big rewrite of cave alias lookup system

This commit is contained in:
Philip Sargent 2022-07-25 02:58:13 +03:00
parent 9c090f0383
commit 3577d8cb68

View File

@ -576,9 +576,23 @@ def GetCaveLookup():
Does NOT detect duplicates! Needs fixing.
Needs to be a proper funciton that raises an exception if there is a duplicate.
OR we could set it to return None if there are duplictes, and require the caller to
fall back on doing the actual database query it wants rathe rthna using this cache shortcut
OR we could set it to return None if there are duplicates, and require the caller to
fall back on doing the actual database query it wants rather thna using this cache shortcut
"""
def checkcaveid(cave, id):
global Gcavelookup
if id not in Gcavelookup:
Gcavelookup[id] = cave
Gcave_count[id] += 1
else:
if cave == Gcavelookup[id]:
pass # same id, same cave
else: # same id but different cave
message = f" - Warning: same alias id '{id:3}' for two caves '{Gcavelookup[id]}' and '{cave}'. Removing this shorthand alias entirely."
Gcavelookup.pop(id)
print(message)
DataIssue.objects.create(parser='caves', message=message)
global Gcavelookup
if Gcavelookup:
return Gcavelookup
@ -588,161 +602,168 @@ def GetCaveLookup():
for cave in Cave.objects.all():
key = cave.official_name.lower()
Gcavelookup[key] = cave
Gcave_count[key] += 1
if key != "" and key != "unamed" and key != "unnamed":
Gcavelookup[key] = cave
Gcave_count[key] += 1
if cave.kataster_number:
Gcavelookup[cave.kataster_number] = cave # DUPLICATE as we have 1623-55 and 1626-55
Gcavelookup[f'{cave.area}-{cave.kataster_number}'] = cave
Gcave_count[cave.kataster_number] += 1
Gcave_count[f'{cave.area}-{cave.kataster_number}'] += 1
checkcaveid(cave,cave.kataster_number) # we do expect 1623/55 and 1626/55 to cause a warning message
# the rest of these are 'nice to have' but may validly already be set
if cave.unofficial_number:
Gcavelookup[cave.unofficial_number.lower()] = cave
Gcave_count[cave.unofficial_number.lower()] += 1
unoffn = cave.unofficial_number.lower()
checkcaveid(cave,unoffn)
if cave.filename:
# this is the slug - usually..
Gcavelookup[cave.filename.replace(".html","").lower()] = cave
Gcave_count[cave.filename.replace(".html","").lower()] += 1
# this is the slug - usually.. but usually done as as f'{cave.area}-{cave.kataster_number}'
fn = cave.filename.replace(".html","").lower()
checkcaveid(cave,fn)
if cave.slug():
slug = cave.slug()
Gcavelookup[slug.lower()] = cave
Gcave_count[slug.lower()] += 1
# These are exact matches! edit to check for prefix only!
# mostly taken from expoweb/noinfo/cave-number-index
# and Becka's email of 25 may 2020 on new kataster numbers
# These might alse create more duplicate entries, so re-write it to check
# skip any missing keys as this gets called during tests when the database is not loaded
try:
Gcavelookup["1987-02"] = Gcavelookup["267"]
Gcavelookup["1990-01"] = Gcavelookup["171"]
Gcavelookup["1990-02"] = Gcavelookup["172"]
Gcavelookup["1990-03"] = Gcavelookup["173"]
Gcavelookup["1990-04"] = Gcavelookup["174"]
Gcavelookup["1990-05"] = Gcavelookup["175"]
Gcavelookup["1990-06"] = Gcavelookup["176"]
Gcavelookup["1990-07"] = Gcavelookup["177"]
Gcavelookup["1990-08"] = Gcavelookup["178"]
Gcavelookup["1990-09"] = Gcavelookup["179"]
Gcavelookup["1990-10"] = Gcavelookup["180"]
Gcavelookup["1990-11"] = Gcavelookup["181"]
Gcavelookup["1990-12"] = Gcavelookup["182"]
Gcavelookup["1990-13"] = Gcavelookup["183"]
Gcavelookup["1990-14"] = Gcavelookup["184"]
Gcavelookup["1990-18"] = Gcavelookup["188"]
Gcavelookup["1990-adam"] = Gcavelookup["225"]
Gcavelookup["1993-01"] = Gcavelookup["200"]
Gcavelookup["1996-02"] = Gcavelookup["224"]
Gcavelookup["1996-03"] = Gcavelookup["223"]
Gcavelookup["1996-04"] = Gcavelookup["222"]
Gcavelookup["1996wk2"] = Gcavelookup["207"]
Gcavelookup["1996wk3"] = Gcavelookup["208"]
Gcavelookup["1996wk5"] = Gcavelookup["219"]
Gcavelookup["1996wk6"] = Gcavelookup["218"]
Gcavelookup["1996wk8"] = Gcavelookup["209"]
Gcavelookup["1996wk11"] = Gcavelookup["268"]
Gcavelookup["96wk11"] = Gcavelookup["268"]
Gcavelookup["1998-01"] = Gcavelookup["201"]
Gcavelookup["1998-03"] = Gcavelookup["210"]
Gcavelookup["1999-03"] = Gcavelookup["204"]
Gcavelookup["1999-04"] = Gcavelookup["230"]
Gcavelookup["1999-10"] = Gcavelookup["162"]
Gcavelookup["1999-bo-01"] = Gcavelookup["205"]
Gcavelookup["1999-ob-01"] = Gcavelookup["205"]
Gcavelookup["1999-ob-03"] = Gcavelookup["226"]
Gcavelookup["1999-ob-04"] = Gcavelookup["227"]
Gcavelookup["2000-01"] = Gcavelookup["231"]
Gcavelookup["2000-03"] = Gcavelookup["214"]
Gcavelookup["2000-04"] = Gcavelookup["220"]
Gcavelookup["2000-05"] = Gcavelookup["215"]
Gcavelookup["2000-06"] = Gcavelookup["216"]
Gcavelookup["2000-07"] = Gcavelookup["217"]
Gcavelookup["2000-09"] = Gcavelookup["234"]
Gcavelookup["2000-aa-01"] = Gcavelookup["250"]
Gcavelookup["2001-04"] = Gcavelookup["239"]
Gcavelookup["2001-05"] = Gcavelookup["243"]
Gcavelookup["2002-01"] = Gcavelookup["249"]
Gcavelookup["2002-02"] = Gcavelookup["234"]
Gcavelookup["2002-04"] = Gcavelookup["242"]
Gcavelookup["2002-05"] = Gcavelookup["294"]
Gcavelookup["2003-01"] = Gcavelookup["256"]
Gcavelookup["2003-02"] = Gcavelookup["248"]
Gcavelookup["2003-03"] = Gcavelookup["247"]
Gcavelookup["2003-04"] = Gcavelookup["241"]
Gcavelookup["2003-05"] = Gcavelookup["246"]
Gcavelookup["2003-06"] = Gcavelookup["161"]
Gcavelookup["2003-08"] = Gcavelookup["240"]
Gcavelookup["2003-09"] = Gcavelookup["245"]
Gcavelookup["2003-10"] = Gcavelookup["244"]
Gcavelookup["2004-01"] = Gcavelookup["269"]
Gcavelookup["2004-03"] = Gcavelookup["270"]
Gcavelookup["2004-11"] = Gcavelookup["251"]
Gcavelookup["2004-12"] = Gcavelookup["161"]
Gcavelookup["2004-15"] = Gcavelookup["253"]
Gcavelookup["2004-19"] = Gcavelookup["254"]
Gcavelookup["2004-20"] = Gcavelookup["255"]
Gcavelookup["2005-04"] = Gcavelookup["204"]
Gcavelookup["2005-05"] = Gcavelookup["264"]
Gcavelookup["2005-07"] = Gcavelookup["257"]
Gcavelookup["2006-08"] = Gcavelookup["285"]
Gcavelookup["2006-09"] = Gcavelookup["298"]
Gcavelookup["2007-71"] = Gcavelookup["271"]
Gcavelookup["2010-01"] = Gcavelookup["263"]
Gcavelookup["2010-03"] = Gcavelookup["293"]
Gcavelookup["2011-01"] = Gcavelookup["292"]
Gcavelookup["2012-dd-05"] = Gcavelookup["286"]
Gcavelookup["2012-ns-13"] = Gcavelookup["292"]
Gcavelookup["2014-neo-01"] = Gcavelookup["273"]
Gcavelookup["2014-sd-01"] = Gcavelookup["274"]
Gcavelookup["2014-ms-14"] = Gcavelookup["287"]
Gcavelookup["2015-mf-06"] = Gcavelookup["288"]
Gcavelookup["2016-jb-01"] = Gcavelookup["289"]
Gcavelookup["2017-pw-01"] = Gcavelookup["277"]
Gcavelookup["2018-dm-07"] = Gcavelookup["359"] # NB this is 1626
Gcavelookup["2017_cucc_24"] = Gcavelookup["291"] # note _ not -
Gcavelookup["2017_cucc_23"] = Gcavelookup["295"] # note _ not -
Gcavelookup["2017_cucc_28"] = Gcavelookup["290"] # note _ not -
Gcavelookup["bs17"] = Gcavelookup["283"]
Gcavelookup["1976/b11"] = Gcavelookup["198"]
Gcavelookup["1976/b8"] = Gcavelookup["197"]
Gcavelookup["1976/b9"] = Gcavelookup["190"]
Gcavelookup["b11"] = Gcavelookup["1976/b11"]
Gcavelookup["b8"] = Gcavelookup["1976/b8"]
Gcavelookup["b9"] = Gcavelookup["1976/b9"]
# also possibly done already
slug = cave.slug().lower()
checkcaveid(cave,slug)
Gcavelookup["2011-01-bs30"] = Gcavelookup["190"]
Gcavelookup["bs30"] = Gcavelookup["190"]
Gcavelookup["87"] = Gcavelookup["190"]
Gcavelookup["2011-01"] = Gcavelookup["190"]
Gcavelookup["quarriesd"] = Gcavelookup["2002-08"]
Gcavelookup["2002-x11"] = Gcavelookup["2005-08"]
Gcavelookup["2002-x12"] = Gcavelookup["2005-07"]
Gcavelookup["2002-x13"] = Gcavelookup["2005-06"]
Gcavelookup["2002-x14"] = Gcavelookup["2005-05"]
Gcavelookup["kh"] = Gcavelookup["161"]
Gcavelookup["161-kh"] = Gcavelookup["161"]
Gcavelookup["204-steinBH"] = Gcavelookup["204"]
Gcavelookup["stonebridge"] = Gcavelookup["204"]
Gcavelookup["hauchhole"] = Gcavelookup["234"]
Gcavelookup["hauch"] = Gcavelookup["234"]
Gcavelookup["234-hauch"] = Gcavelookup["234"]
Gcavelookup["tunnocks"] = Gcavelookup["258"]
Gcavelookup["balcony"] = Gcavelookup["264"]
Gcavelookup["balkon"] = Gcavelookup["264"]
Gcavelookup["fgh"] = Gcavelookup["290"]
Gcavelookup["gsh"] = Gcavelookup["291"]
# These might alse create more duplicate entries
aliases =[
("1987-02", "267"),
("1990-01", "171"),
("1990-02", "172"),
("1990-03", "173"),
("1990-04", "174"),
("1990-05", "175"),
("1990-06", "176"),
("1990-07", "177"),
("1990-08", "178"),
("1990-09", "179"),
("1990-10", "180"),
("1990-11", "181"),
("1990-12", "182"),
("1990-13", "183"),
("1990-14", "184"),
("1990-18", "188"),
("1990-adam", "225"),
("1993-01", "200"),
("1996-02", "224"),
("1996-03", "223"),
("1996-04", "222"),
("1996wk2", "207"),
("1996wk3", "208"),
("1996wk5", "219"),
("1996wk6", "218"),
("1996wk8", "209"),
("1996wk11", "268"),
("96wk11", "268"),
("1998-01", "201"),
("1998-03", "210"),
("1999-03", "204"),
("1999-04", "230"),
("1999-10", "162"),
("1999-bo-01", "205"),
("1999-ob-03", "226"),
("1999-ob-04", "227"),
("2000-01", "231"),
("2000-03", "214"),
("2000-04", "220"),
("2000-05", "215"),
("2000-06", "216"),
("2000-07", "217"),
("2000-09", "234"),
("2000-aa-01", "250"),
("2001-04", "239"),
("2001-05", "243"),
("2002-01", "249"),
("2002-02", "234"),
("2002-04", "242"),
("2002-05", "294"),
("2003-01", "256"),
("2003-02", "248"),
("2003-03", "247"),
("2003-04", "241"),
("2003-05", "246"),
("2003-06", "161"),
("2003-08", "240"),
("2003-09", "245"),
("2003-10", "244"),
("2004-01", "269"),
("2004-03", "270"),
("2004-11", "251"),
("2004-12", "161"),
("2004-15", "253"),
("2004-19", "254"),
("2004-20", "255"),
("2005-04", "204"),
("2005-05", "264"),
("2005-07", "257"),
("2006-08", "285"),
("2006-09", "298"),
("2007-71", "271"),
("2010-01", "263"),
("2010-03", "293"),
("2011-01", "292"),
("2012-dd-05", "286"),
("2012-ns-13", "292"),
("2014-neo-01", "273"),
("2014-sd-01", "274"),
("2014-ms-14", "287"),
("2015-mf-06", "288"),
("2016-jb-01", "289"),
("2017-pw-01", "277"),
("2018-dm-07", "359"), # NB this is 1626
("2017_cucc_24", "291"), # note _ not -
("2017_cucc_23", "295"), # note _ not -
("2017_cucc_28", "290"), # note _ not -
("bs17", "283"),
Gcavelookup["homecoming"] = Gcavelookup["2018-dm-07"]
Gcavelookup["heimkommen"] = Gcavelookup["2018-dm-07"]
Gcavelookup["99ob02"] = Gcavelookup["1999-ob-02"]
#Gcavelookup["1626-354"] = Gcavelookup["354"]
("1976/b11", "198"),
("1976/b8", "197"),
("1976/b9", "190"),
("b11", "1976/b11"),
("b8", "1976/b8"),
("b9", "1976/b9"),
("2011-01-bs30", "190"),
("bs30", "190"),
("2011-01", "190"),
("quarriesd", "2002-08"),
("2002-x11", "2005-08"),
("2002-x12", "2005-07"),
("2002-x13", "2005-06"),
("2002-x14", "2005-05"),
("kh", "161"),
("161-kh", "161"),
("204-steinBH", "204"),
("stonebridge", "204"),
("hauchhole", "234"),
("hauch", "234"),
("234-hauch", "234"),
("tunnocks", "258"),
("balcony", "264"),
("balkon", "264"),
("fgh", "290"),
("gsh", "291"),
("homecoming", "2018-dm-07"),
("heimkommen", "2018-dm-07"),
("99ob02", "1999-ob-02"),
("1626-354", "354"),
]
for i in aliases:
if i[1] in Gcavelookup:
if i[0] in Gcavelookup:
# already set by a different method, but is it the same cave?
if Gcavelookup[i[0]] == Gcavelookup[i[1]]:
pass
else:
Gcave_count[i[0]] += 1
Gcavelookup[i[0]] = Gcavelookup[i[1]]
else:
message = f" * Coding or cave existence mistake, cave for id '{i[1]}' does not exist. Expecting to set alias '{i[0]}' to it"
#print(message)
DataIssue.objects.create(parser='caves', message=message)
except:
raise
addmore = {}
for id in Gcavelookup:
addmore[id.replace("-","_")] = Gcavelookup[id]
@ -751,21 +772,19 @@ def GetCaveLookup():
Gcavelookup = {**addmore, **Gcavelookup}
addmore ={}
for id in Gcavelookup:
if not Gcavelookup[id]:
pass
elif Gcavelookup[id].kataster_number:
addmore[id] = Gcavelookup[id].kataster_number
elif Gcavelookup[id].unofficial_number:
addmore[id] = Gcavelookup[id].unofficial_number.lower()
# with open("cave-lookup.json", 'w') as f: # no permissions on server by default
# json.dump(addmore, f)
# for id in Gcavelookup:
# if not Gcavelookup[id]:
# pass
# elif Gcavelookup[id].kataster_number:
# addmore[id] = Gcavelookup[id].kataster_number
# elif Gcavelookup[id].unofficial_number:
# addmore[id] = Gcavelookup[id].unofficial_number.lower()
for c in Gcave_count:
if Gcave_count[c] > 1:
message = " ** Duplicate cave id: {}:{}:{}".format(Gcave_count[c], Gcavelookup[c], c)
#print(message)
#DataIssue.objects.create(parser='caves', message=message)
message = f" ** Duplicate cave id count={Gcave_count[c]} id:'{Gcavelookup[c]}' cave __str__:'{c}'"
print(message)
DataIssue.objects.create(parser='caves', message=message)
# logdataissues[Gcavelookup[c]]=message # pending troggle-wide issues logging system
return Gcavelookup