From 3577d8cb68df0ccf852cb89f7a6f8377477d18d8 Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Mon, 25 Jul 2022 02:58:13 +0300 Subject: [PATCH] big rewrite of cave alias lookup system --- core/models/caves.py | 345 +++++++++++++++++++++++-------------------- 1 file changed, 182 insertions(+), 163 deletions(-) diff --git a/core/models/caves.py b/core/models/caves.py index 8d1f8ca..fdfca8e 100644 --- a/core/models/caves.py +++ b/core/models/caves.py @@ -576,9 +576,23 @@ def GetCaveLookup(): Does NOT detect duplicates! Needs fixing. Needs to be a proper funciton that raises an exception if there is a duplicate. - OR we could set it to return None if there are duplictes, and require the caller to - fall back on doing the actual database query it wants rathe rthna using this cache shortcut + OR we could set it to return None if there are duplicates, and require the caller to + fall back on doing the actual database query it wants rather thna using this cache shortcut """ + def checkcaveid(cave, id): + global Gcavelookup + if id not in Gcavelookup: + Gcavelookup[id] = cave + Gcave_count[id] += 1 + else: + if cave == Gcavelookup[id]: + pass # same id, same cave + else: # same id but different cave + message = f" - Warning: same alias id '{id:3}' for two caves '{Gcavelookup[id]}' and '{cave}'. Removing this shorthand alias entirely." + Gcavelookup.pop(id) + print(message) + DataIssue.objects.create(parser='caves', message=message) + global Gcavelookup if Gcavelookup: return Gcavelookup @@ -588,161 +602,168 @@ def GetCaveLookup(): for cave in Cave.objects.all(): key = cave.official_name.lower() - Gcavelookup[key] = cave - Gcave_count[key] += 1 + if key != "" and key != "unamed" and key != "unnamed": + Gcavelookup[key] = cave + Gcave_count[key] += 1 if cave.kataster_number: - Gcavelookup[cave.kataster_number] = cave # DUPLICATE as we have 1623-55 and 1626-55 - Gcavelookup[f'{cave.area}-{cave.kataster_number}'] = cave - Gcave_count[cave.kataster_number] += 1 - Gcave_count[f'{cave.area}-{cave.kataster_number}'] += 1 + checkcaveid(cave,cave.kataster_number) # we do expect 1623/55 and 1626/55 to cause a warning message + + # the rest of these are 'nice to have' but may validly already be set if cave.unofficial_number: - Gcavelookup[cave.unofficial_number.lower()] = cave - Gcave_count[cave.unofficial_number.lower()] += 1 + unoffn = cave.unofficial_number.lower() + checkcaveid(cave,unoffn) + if cave.filename: - # this is the slug - usually.. - Gcavelookup[cave.filename.replace(".html","").lower()] = cave - Gcave_count[cave.filename.replace(".html","").lower()] += 1 + # this is the slug - usually.. but usually done as as f'{cave.area}-{cave.kataster_number}' + fn = cave.filename.replace(".html","").lower() + checkcaveid(cave,fn) + if cave.slug(): - slug = cave.slug() - Gcavelookup[slug.lower()] = cave - Gcave_count[slug.lower()] += 1 - # These are exact matches! edit to check for prefix only! - # mostly taken from expoweb/noinfo/cave-number-index - # and Becka's email of 25 may 2020 on new kataster numbers - - # These might alse create more duplicate entries, so re-write it to check - # skip any missing keys as this gets called during tests when the database is not loaded - try: - Gcavelookup["1987-02"] = Gcavelookup["267"] - Gcavelookup["1990-01"] = Gcavelookup["171"] - Gcavelookup["1990-02"] = Gcavelookup["172"] - Gcavelookup["1990-03"] = Gcavelookup["173"] - Gcavelookup["1990-04"] = Gcavelookup["174"] - Gcavelookup["1990-05"] = Gcavelookup["175"] - Gcavelookup["1990-06"] = Gcavelookup["176"] - Gcavelookup["1990-07"] = Gcavelookup["177"] - Gcavelookup["1990-08"] = Gcavelookup["178"] - Gcavelookup["1990-09"] = Gcavelookup["179"] - Gcavelookup["1990-10"] = Gcavelookup["180"] - Gcavelookup["1990-11"] = Gcavelookup["181"] - Gcavelookup["1990-12"] = Gcavelookup["182"] - Gcavelookup["1990-13"] = Gcavelookup["183"] - Gcavelookup["1990-14"] = Gcavelookup["184"] - Gcavelookup["1990-18"] = Gcavelookup["188"] - Gcavelookup["1990-adam"] = Gcavelookup["225"] - Gcavelookup["1993-01"] = Gcavelookup["200"] - Gcavelookup["1996-02"] = Gcavelookup["224"] - Gcavelookup["1996-03"] = Gcavelookup["223"] - Gcavelookup["1996-04"] = Gcavelookup["222"] - Gcavelookup["1996wk2"] = Gcavelookup["207"] - Gcavelookup["1996wk3"] = Gcavelookup["208"] - Gcavelookup["1996wk5"] = Gcavelookup["219"] - Gcavelookup["1996wk6"] = Gcavelookup["218"] - Gcavelookup["1996wk8"] = Gcavelookup["209"] - Gcavelookup["1996wk11"] = Gcavelookup["268"] - Gcavelookup["96wk11"] = Gcavelookup["268"] - Gcavelookup["1998-01"] = Gcavelookup["201"] - Gcavelookup["1998-03"] = Gcavelookup["210"] - Gcavelookup["1999-03"] = Gcavelookup["204"] - Gcavelookup["1999-04"] = Gcavelookup["230"] - Gcavelookup["1999-10"] = Gcavelookup["162"] - Gcavelookup["1999-bo-01"] = Gcavelookup["205"] - Gcavelookup["1999-ob-01"] = Gcavelookup["205"] - Gcavelookup["1999-ob-03"] = Gcavelookup["226"] - Gcavelookup["1999-ob-04"] = Gcavelookup["227"] - Gcavelookup["2000-01"] = Gcavelookup["231"] - Gcavelookup["2000-03"] = Gcavelookup["214"] - Gcavelookup["2000-04"] = Gcavelookup["220"] - Gcavelookup["2000-05"] = Gcavelookup["215"] - Gcavelookup["2000-06"] = Gcavelookup["216"] - Gcavelookup["2000-07"] = Gcavelookup["217"] - Gcavelookup["2000-09"] = Gcavelookup["234"] - Gcavelookup["2000-aa-01"] = Gcavelookup["250"] - Gcavelookup["2001-04"] = Gcavelookup["239"] - Gcavelookup["2001-05"] = Gcavelookup["243"] - Gcavelookup["2002-01"] = Gcavelookup["249"] - Gcavelookup["2002-02"] = Gcavelookup["234"] - Gcavelookup["2002-04"] = Gcavelookup["242"] - Gcavelookup["2002-05"] = Gcavelookup["294"] - Gcavelookup["2003-01"] = Gcavelookup["256"] - Gcavelookup["2003-02"] = Gcavelookup["248"] - Gcavelookup["2003-03"] = Gcavelookup["247"] - Gcavelookup["2003-04"] = Gcavelookup["241"] - Gcavelookup["2003-05"] = Gcavelookup["246"] - Gcavelookup["2003-06"] = Gcavelookup["161"] - Gcavelookup["2003-08"] = Gcavelookup["240"] - Gcavelookup["2003-09"] = Gcavelookup["245"] - Gcavelookup["2003-10"] = Gcavelookup["244"] - Gcavelookup["2004-01"] = Gcavelookup["269"] - Gcavelookup["2004-03"] = Gcavelookup["270"] - Gcavelookup["2004-11"] = Gcavelookup["251"] - Gcavelookup["2004-12"] = Gcavelookup["161"] - Gcavelookup["2004-15"] = Gcavelookup["253"] - Gcavelookup["2004-19"] = Gcavelookup["254"] - Gcavelookup["2004-20"] = Gcavelookup["255"] - Gcavelookup["2005-04"] = Gcavelookup["204"] - Gcavelookup["2005-05"] = Gcavelookup["264"] - Gcavelookup["2005-07"] = Gcavelookup["257"] - Gcavelookup["2006-08"] = Gcavelookup["285"] - Gcavelookup["2006-09"] = Gcavelookup["298"] - Gcavelookup["2007-71"] = Gcavelookup["271"] - Gcavelookup["2010-01"] = Gcavelookup["263"] - Gcavelookup["2010-03"] = Gcavelookup["293"] - Gcavelookup["2011-01"] = Gcavelookup["292"] - Gcavelookup["2012-dd-05"] = Gcavelookup["286"] - Gcavelookup["2012-ns-13"] = Gcavelookup["292"] - Gcavelookup["2014-neo-01"] = Gcavelookup["273"] - Gcavelookup["2014-sd-01"] = Gcavelookup["274"] - Gcavelookup["2014-ms-14"] = Gcavelookup["287"] - Gcavelookup["2015-mf-06"] = Gcavelookup["288"] - Gcavelookup["2016-jb-01"] = Gcavelookup["289"] - Gcavelookup["2017-pw-01"] = Gcavelookup["277"] - Gcavelookup["2018-dm-07"] = Gcavelookup["359"] # NB this is 1626 - Gcavelookup["2017_cucc_24"] = Gcavelookup["291"] # note _ not - - Gcavelookup["2017_cucc_23"] = Gcavelookup["295"] # note _ not - - Gcavelookup["2017_cucc_28"] = Gcavelookup["290"] # note _ not - - Gcavelookup["bs17"] = Gcavelookup["283"] - - Gcavelookup["1976/b11"] = Gcavelookup["198"] - Gcavelookup["1976/b8"] = Gcavelookup["197"] - Gcavelookup["1976/b9"] = Gcavelookup["190"] - Gcavelookup["b11"] = Gcavelookup["1976/b11"] - Gcavelookup["b8"] = Gcavelookup["1976/b8"] - Gcavelookup["b9"] = Gcavelookup["1976/b9"] + # also possibly done already + slug = cave.slug().lower() + checkcaveid(cave,slug) - Gcavelookup["2011-01-bs30"] = Gcavelookup["190"] - Gcavelookup["bs30"] = Gcavelookup["190"] - Gcavelookup["87"] = Gcavelookup["190"] - Gcavelookup["2011-01"] = Gcavelookup["190"] - - Gcavelookup["quarriesd"] = Gcavelookup["2002-08"] - Gcavelookup["2002-x11"] = Gcavelookup["2005-08"] - Gcavelookup["2002-x12"] = Gcavelookup["2005-07"] - Gcavelookup["2002-x13"] = Gcavelookup["2005-06"] - Gcavelookup["2002-x14"] = Gcavelookup["2005-05"] - - Gcavelookup["kh"] = Gcavelookup["161"] - Gcavelookup["161-kh"] = Gcavelookup["161"] - Gcavelookup["204-steinBH"] = Gcavelookup["204"] - Gcavelookup["stonebridge"] = Gcavelookup["204"] - Gcavelookup["hauchhole"] = Gcavelookup["234"] - Gcavelookup["hauch"] = Gcavelookup["234"] - Gcavelookup["234-hauch"] = Gcavelookup["234"] - Gcavelookup["tunnocks"] = Gcavelookup["258"] - Gcavelookup["balcony"] = Gcavelookup["264"] - Gcavelookup["balkon"] = Gcavelookup["264"] - Gcavelookup["fgh"] = Gcavelookup["290"] - Gcavelookup["gsh"] = Gcavelookup["291"] + # These might alse create more duplicate entries + aliases =[ + ("1987-02", "267"), + ("1990-01", "171"), + ("1990-02", "172"), + ("1990-03", "173"), + ("1990-04", "174"), + ("1990-05", "175"), + ("1990-06", "176"), + ("1990-07", "177"), + ("1990-08", "178"), + ("1990-09", "179"), + ("1990-10", "180"), + ("1990-11", "181"), + ("1990-12", "182"), + ("1990-13", "183"), + ("1990-14", "184"), + ("1990-18", "188"), + ("1990-adam", "225"), + ("1993-01", "200"), + ("1996-02", "224"), + ("1996-03", "223"), + ("1996-04", "222"), + ("1996wk2", "207"), + ("1996wk3", "208"), + ("1996wk5", "219"), + ("1996wk6", "218"), + ("1996wk8", "209"), + ("1996wk11", "268"), + ("96wk11", "268"), + ("1998-01", "201"), + ("1998-03", "210"), + ("1999-03", "204"), + ("1999-04", "230"), + ("1999-10", "162"), + ("1999-bo-01", "205"), + ("1999-ob-03", "226"), + ("1999-ob-04", "227"), + ("2000-01", "231"), + ("2000-03", "214"), + ("2000-04", "220"), + ("2000-05", "215"), + ("2000-06", "216"), + ("2000-07", "217"), + ("2000-09", "234"), + ("2000-aa-01", "250"), + ("2001-04", "239"), + ("2001-05", "243"), + ("2002-01", "249"), + ("2002-02", "234"), + ("2002-04", "242"), + ("2002-05", "294"), + ("2003-01", "256"), + ("2003-02", "248"), + ("2003-03", "247"), + ("2003-04", "241"), + ("2003-05", "246"), + ("2003-06", "161"), + ("2003-08", "240"), + ("2003-09", "245"), + ("2003-10", "244"), + ("2004-01", "269"), + ("2004-03", "270"), + ("2004-11", "251"), + ("2004-12", "161"), + ("2004-15", "253"), + ("2004-19", "254"), + ("2004-20", "255"), + ("2005-04", "204"), + ("2005-05", "264"), + ("2005-07", "257"), + ("2006-08", "285"), + ("2006-09", "298"), + ("2007-71", "271"), + ("2010-01", "263"), + ("2010-03", "293"), + ("2011-01", "292"), + ("2012-dd-05", "286"), + ("2012-ns-13", "292"), + ("2014-neo-01", "273"), + ("2014-sd-01", "274"), + ("2014-ms-14", "287"), + ("2015-mf-06", "288"), + ("2016-jb-01", "289"), + ("2017-pw-01", "277"), + ("2018-dm-07", "359"), # NB this is 1626 + ("2017_cucc_24", "291"), # note _ not - + ("2017_cucc_23", "295"), # note _ not - + ("2017_cucc_28", "290"), # note _ not - + ("bs17", "283"), - Gcavelookup["homecoming"] = Gcavelookup["2018-dm-07"] - Gcavelookup["heimkommen"] = Gcavelookup["2018-dm-07"] - Gcavelookup["99ob02"] = Gcavelookup["1999-ob-02"] - - #Gcavelookup["1626-354"] = Gcavelookup["354"] + ("1976/b11", "198"), + ("1976/b8", "197"), + ("1976/b9", "190"), + ("b11", "1976/b11"), + ("b8", "1976/b8"), + ("b9", "1976/b9"), + + ("2011-01-bs30", "190"), + ("bs30", "190"), + ("2011-01", "190"), + + ("quarriesd", "2002-08"), + ("2002-x11", "2005-08"), + ("2002-x12", "2005-07"), + ("2002-x13", "2005-06"), + ("2002-x14", "2005-05"), + + ("kh", "161"), + ("161-kh", "161"), + ("204-steinBH", "204"), + ("stonebridge", "204"), + ("hauchhole", "234"), + ("hauch", "234"), + ("234-hauch", "234"), + ("tunnocks", "258"), + ("balcony", "264"), + ("balkon", "264"), + ("fgh", "290"), + ("gsh", "291"), + + ("homecoming", "2018-dm-07"), + ("heimkommen", "2018-dm-07"), + ("99ob02", "1999-ob-02"), + ("1626-354", "354"), + ] + + for i in aliases: + if i[1] in Gcavelookup: + if i[0] in Gcavelookup: + # already set by a different method, but is it the same cave? + if Gcavelookup[i[0]] == Gcavelookup[i[1]]: + pass + else: + Gcave_count[i[0]] += 1 + Gcavelookup[i[0]] = Gcavelookup[i[1]] + else: + message = f" * Coding or cave existence mistake, cave for id '{i[1]}' does not exist. Expecting to set alias '{i[0]}' to it" + #print(message) + DataIssue.objects.create(parser='caves', message=message) - except: - raise - addmore = {} for id in Gcavelookup: addmore[id.replace("-","_")] = Gcavelookup[id] @@ -751,21 +772,19 @@ def GetCaveLookup(): Gcavelookup = {**addmore, **Gcavelookup} addmore ={} - for id in Gcavelookup: - if not Gcavelookup[id]: - pass - elif Gcavelookup[id].kataster_number: - addmore[id] = Gcavelookup[id].kataster_number - elif Gcavelookup[id].unofficial_number: - addmore[id] = Gcavelookup[id].unofficial_number.lower() - # with open("cave-lookup.json", 'w') as f: # no permissions on server by default - # json.dump(addmore, f) - + # for id in Gcavelookup: + # if not Gcavelookup[id]: + # pass + # elif Gcavelookup[id].kataster_number: + # addmore[id] = Gcavelookup[id].kataster_number + # elif Gcavelookup[id].unofficial_number: + # addmore[id] = Gcavelookup[id].unofficial_number.lower() + for c in Gcave_count: if Gcave_count[c] > 1: - message = " ** Duplicate cave id: {}:{}:{}".format(Gcave_count[c], Gcavelookup[c], c) - #print(message) - #DataIssue.objects.create(parser='caves', message=message) + message = f" ** Duplicate cave id count={Gcave_count[c]} id:'{Gcavelookup[c]}' cave __str__:'{c}'" + print(message) + DataIssue.objects.create(parser='caves', message=message) # logdataissues[Gcavelookup[c]]=message # pending troggle-wide issues logging system return Gcavelookup