From 11756fa0bb4d59185be3b6457782e7ff165baa78 Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Sun, 14 Jul 2024 21:39:56 +0200 Subject: [PATCH] relax duplicate alias removal: assume 1623 area --- core/models/caves.py | 241 +++++-------------------------------------- parsers/logbooks.py | 2 +- 2 files changed, 27 insertions(+), 216 deletions(-) diff --git a/core/models/caves.py b/core/models/caves.py index b63df2e..c03b169 100644 --- a/core/models/caves.py +++ b/core/models/caves.py @@ -475,6 +475,17 @@ def GetCaveLookup(): OR we could set it to return None if there are duplicates, and require the caller to fall back on doing the actual database query it wants rather than using this cache shortcut """ + def bad_alias(a,k): + # this is an error + if a.lower() in Gcavelookup: + Gcavelookup[key] = Gcavelookup[a.lower()] + message = f" - Warning, capitalisation error in alias list. cave for id '{a}' does not exist but {a.lower()} does." + print(message) + DataIssue.objects.update_or_create(parser="aliases", message=message) + else: + message = f" * Coding or cave existence mistake, cave for id '{a}' does not exist. Expecting to set key alias '{k}' to it" + DataIssue.objects.update_or_create(parser="aliases", message=message) + duplicates = {} @@ -487,6 +498,9 @@ def GetCaveLookup(): if cave == Gcavelookup[id]: pass # same id, same cave else: # same id but different cave, e.g. 122 => 1623-122 and 1626-122 + # We want to keep the 1623- and get rid of the other one + if cave.areacode == "1623": + Gcavelookup[id] = cave duplicates[id] = 1 global Gcavelookup @@ -540,211 +554,11 @@ def GetCaveLookup(): # On reset, these aliases only work if the cave already properly exists with an entry in :expoweb:/cave_data/ # but as the aliases are recomputed repeatedly, eventually they work on PENDING caves too - - # oldaliases are NOT USED. We are reading from the files instead now. Pending deletion.. - oldaliases = [ - ("1987-02", "1623-267"), - ("1990-01", "1623-171"), - ("1990-02", "1623-172"), - ("1990-03", "1623-173"), - ("1990-04", "1623-174"), - ("1990-05", "1623-175"), - ("1990-06", "1623-176"), - ("1990-07", "1623-177"), - ("1990-08", "1623-178"), - ("1990-09", "1623-179"), - ("1990-10", "1623-180"), - ("1990-11", "1623-181"), - ("1990-12", "1623-182"), - ("1990-13", "1623-183"), - ("1990-14", "1623-184"), - ("1990-18", "1623-188"), - ("1990-adam", "1623-225"), - ("1993-01", "1623-200"), - ("1996-02", "1623-224"), - ("1996-03", "1623-223"), - ("1996-04", "1623-222"), - ("1996wk2", "1623-207"), - ("1996wk3", "1623-208"), - ("1996wk5", "1623-219"), - ("1996wk6", "1623-218"), - ("1996wk8", "1623-209"), - ("1996wk11", "1623-268"), - ("96wk11", "1623-268"), - ("1998-01", "1623-201"), - ("1998-03", "1623-210"), - ("1999-03", "1623-204"), - ("1999-04", "1623-230"), - ("1999-10", "1623-162"), - ("1999-bo-01", "1623-205"), - ("1999-ob-03", "1623-226"), - ("1999-ob-04", "1623-227"), - ("99ob02", "1999-ob-02"), # exists? pending - ("1623-99ob02", "1999-ob-02"), - ("gassischacht", "1623-259"), - ("1623-gassischacht", "1623-259"), - ("2007-gassischacht", "1623-259"), - ("2000-03", "1623-214"), - ("2000-04", "1623-220"), - ("2000-05", "1623-215"), - ("2000-06", "1623-216"), - ("2000-07", "1623-217"), - ("2000-09", "1623-234"), - ("2000-aa-01", "1623-250"), - ("2001-04", "1623-239"), - ("2001-05", "1623-243"), - ("2002-01", "1623-249"), - ("2002-02", "1623-234"), - ("2002-04", "1623-242"), - ("2002-05", "1623-294"), - ("quarriesd", "1623-2002-08"), - ("1623-quarriesd", "1623-2002-08"), - ("2002-08", "1623-2002-08"), - ("2003-01", "1623-256"), - ("2003-02", "1623-248"), - ("2003-03", "1623-247"), - ("2003-04", "1623-241"), - ("2003-05", "1623-246"), - ("2003-06", "1623-161"), - ("2003-08", "1623-240"), - ("2003-09", "1623-245"), - ("2003-10", "1623-244"), - ("2004-01", "1623-269"), - ("2004-03", "1623-270"), - ("2004-11", "1623-251"), - ("2004-12", "1623-161"), - ("2004-15", "1623-253"), - ("2004-19", "1623-254"), - ("2004-20", "1623-255"), - ("2005-04", "1623-204"), - ("2005-05", "1623-264"), - ("2005-07", "1623-257"), - ("2006-08", "1623-285"), - ("2006-09", "1623-298"), - ("2007-71", "1623-271"), - ("2010-01", "1623-263"), - ("2010-03", "1623-293"), - ("2012-70", "1623-296"), - ("1623-2012-70", "1623-296"), - ("2012-dd-05", "1623-286"), - ("2012-dd-08", "1623-297"), - # ("2011-01", "1623-292"), seems to be a mistake - ("2012-dd-05", "1623-286"), - ("2012-0w-01", "2012-ow-01"), # catch the typo: zero for 'O' - ("2012-ns-13", "1623-292"), - ("2014-neo-01", "1623-273"), - ("2014-sd-01", "1623-274"), - ("2014-ms-14", "1623-287"), - ("2015-mf-06", "1623-288"), - ("2016-jb-01", "1623-289"), - ("2016-01", "1623-2012-ns-07"), - ("2016-03", "1623-2012-ns-12"), - ("2016-04", "1623-2012-ns-10"), - ("2017-pw-01", "1623-277"), - ("2017_cucc_24", "1623-291"), # note _ not - - ("2017_cucc_23", "1623-295"), # note _ not - - ("2017_cucc_28", "1623-290"), # note _ not - - ("2013-cucc-03", "1623-2013-03"), - ("2018-ps-01", "1623-114"), - ("bs17", "1623-283"), - ("1976/b11", "1623-198"), # / in name with crash url resolution, bad idea, fix in original doc - ("1976/b8", "1623-197"), # / in name with crash url resolution, bad idea, fix in original doc - ("1976/b9", "1623-190"), # / in name with crash url resolution, bad idea, fix in original doc - ("1976-b11", "1623-198"), - ("1976-b8", "1623-197"), - ("1976-b9", "1623-190"), - ("b11", "1623-198"), - ("b8", "1623-197"), - ("b9", "1623-190"), - ("2011-01-bs30", "1623-190"), - ("bs30", "1623-190"), - ("2011-01", "1623-190"), - ("2002-x11", "1623-2005-08"), - ("2002-x12", "2005-07"), - ("2002-x13", "1623-2005-06"), - ("2002-x14", "2005-05"), - - # various funnies to cope with cave names used in logbooks - ("kh", "1623-161"), - ("161-kh", "1623-161"), - ("204-steinBH", "1623-204"), - ("stonebridge", "1623-204"), - ("hauchhole", "1623-234"), - ("hauch", "1623-234"), - ("234-hauch", "1623-234"), - ("tunnocks", "1623-258"), - ("balcony", "1623-264"), - ("balkon", "1623-264"), - ("fgh", "1623-290"), - ("fishface", "1623-290"), - ("gsh", "1623-291"), - ("1623-2023-lc-01", "1623-318"), - ("tempest", "1623-2023-lc-01"), - -# from the git output after Becka's changes, used to construct this list.. - # rename caves-1623/{2023-ASH-15/2023-ASH-15.svx => 303/303.svx} (94%) - # rename caves-1623/{2023-mg-02/2023-mg-02.svx => 304/304.svx} (90%) - # rename caves-1623/{2023-mg-01/2023-mg-01.svx => 305/305.svx} (94%) - # rename caves-1623/{2023-ASH-17/2023-ASH-17.svx => 306/306.svx} (94%) - # rename caves-1623/{2023-ASH-05/2023-ASH-05.svx => 307/307.svx} (89%) - # rename caves-1623/{2023-ASH-08/2023-ASH-08.svx => 308/308.svx} (93%) - # rename caves-1623/{2023-ASH-09/2023-ASH-09.svx => 309/309.svx} (94%) - # rename caves-1623/{2023-mg-04/2023-mg-04.svx => 310/310.svx} (91%) - # rename caves-1623/{2023-ASH-16/2023-ASH-16.svx => 311/311.svx} (93%) - # rename caves-1623/{2023-RAWDB-02/2023-RAWDB-02.svx => 312/312.svx} (92%) - # rename caves-1623/{2023-RAWDB-01/2023-RAWDB-01.svx => 313/313.svx} (92%) - # rename caves-1623/{2023-kt-02/2023-kt-02.svx => 314/314.svx} (95%) - # rename caves-1623/{2023-jss-01 => 315}/2023-jss-01_trip1.svx (100%) - # rename caves-1623/{2023-jss-01 => 315}/2023-jss-01_trip2.svx (100%) - # rename caves-1623/{2023-jss-01/2023-jss-01.svx => 315/315.svx} (68%) - # rename caves-1623/{2023-kt-01/2023-kt-01.svx => 316/316.svx} (92%) - ("2023-ASH-15", "1623-303"), - ("2023-mg-02", "1623-304"), - ("2023-mg-01", "1623-305"), - ("2023-ASH-17", "1623-306"), - ("2023-ASH-05", "1623-307"), - ("2023-ASH-08", "1623-308"), - ("2023-ASH-09", "1623-309"), - ("2023-mg-04", "1623-310"), - ("2023-ASH-16", "1623-311"), - ("2023-RAWDB-02", "1623-312"), - ("2023-RAWDB-01", "1623-313"), - ("2023-kt-02", "1623-314"), - ("2023-jss-01", "1623-315"), - ("2023-kt-01", "1623-316"), - - # 1626 - ("langgustl", "1626-354"), - ("2018-dm-07", "1626-359"), - ("1626-2018-dm-07", "1626-359"), - ("homecoming", "2018-dm-07"), - ("heimkommen", "2018-dm-07"), - ("Heimkehr", "2018-dm-07"), - ("hc", "2018-dm-07"), - ("loveshack", "1626-2018-ad-03"), - ("crushed-garlic", "1626-2018-ad-03"), - - # Renaming cave ids which end in a letter NB targets should be LOWER CASE for this code - ("2002-XX", "1623-2002-fb-01"), - ("2002-X09B", "1623-2002-xb09"), - ("2007-neu", "1623-2007-neu-01"), - ("BuzzardHole", "1626-2023-bz-01"), - ("2023-BuzzardHole", "1626-2023-bz-01"), - ("1626-2023-BuzzardHole", "1626-2023-bz-01"), - ("1626-2023-buzzardhole","1626-2023-bz-01"), - ("Rentner","1623-rnt-01"), - ("Blaubeer","1623-blb-01"), - ("Haldenlock","1623-hld-01"), - ("Juttahoehle","1623-jtt-01"), - ("Loutotihoehle","1626-loutoti-01"), - ("Casino","1626-casino-01"), - ("Upside-down","1626-upside-down-01"), - - - ] for key, alias in aliases: - if alias in Gcavelookup: + if not alias in Gcavelookup: + bad_alias(alias, key) + else: if key in Gcavelookup: # already set by a different method, but is it the same cave? if Gcavelookup[key] == Gcavelookup[alias]: @@ -756,15 +570,7 @@ def GetCaveLookup(): DataIssue.objects.create(parser="alias", message=message) # Gcave_count[key] += 1 Gcavelookup[key] = Gcavelookup[alias] - else: - if alias.lower() in Gcavelookup: - Gcavelookup[key] = Gcavelookup[alias.lower()] - message = f" - Warning, capitalisation error in alias list. cave for id '{alias}' does not exist but {alias.lower()} does." - print(message) - DataIssue.objects.update_or_create(parser="aliases", message=message) - else: - message = f" * Coding or cave existence mistake, cave for id '{alias}' does not exist. Expecting to set alias '{key}' to it" - DataIssue.objects.update_or_create(parser="aliases", message=message) + addmore = {} for id in Gcavelookup: @@ -783,9 +589,14 @@ def GetCaveLookup(): for d in duplicates: # if an alias resolves to 2 or more caves, remove it as an alias # NOTE such an alisas is restored, assuming a 1623 area, when parsing Wallets - but only wallets. - Gcavelookup.pop(d) - Gcave_count.pop(d) # so should not get a duplicate msg below.. - ldup.append(d) + #print(f"{Gcavelookup[d]=} {Gcave_count[d]=}") + if Gcavelookup[d].areacode == "1623": + # then leave it, treat as OK + pass + else: + Gcavelookup.pop(d) + Gcave_count.pop(d) # so should not get a duplicate msg below.. + ldup.append(d) if ldup: message = f" - Ambiguous aliases being removed: {ldup}" print(message) diff --git a/parsers/logbooks.py b/parsers/logbooks.py index d5580a1..dabc701 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -59,7 +59,7 @@ LOGBOOK_PARSER_SETTINGS = { LOGBOOKS_DIR = "years" # subfolder of settings.EXPOWEB ENTRIES = { - "2024": 3, + "2024": 20, "2023": 86, "2022": 94, "2019": 55,