2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2024-11-21 23:01:52 +00:00

relax duplicate alias removal: assume 1623 area

This commit is contained in:
Philip Sargent 2024-07-14 21:39:56 +02:00
parent ee5b36a33c
commit 11756fa0bb
2 changed files with 27 additions and 216 deletions

View File

@ -475,6 +475,17 @@ def GetCaveLookup():
OR we could set it to return None if there are duplicates, and require the caller to
fall back on doing the actual database query it wants rather than using this cache shortcut
"""
def bad_alias(a,k):
# this is an error
if a.lower() in Gcavelookup:
Gcavelookup[key] = Gcavelookup[a.lower()]
message = f" - Warning, capitalisation error in alias list. cave for id '{a}' does not exist but {a.lower()} does."
print(message)
DataIssue.objects.update_or_create(parser="aliases", message=message)
else:
message = f" * Coding or cave existence mistake, cave for id '{a}' does not exist. Expecting to set key alias '{k}' to it"
DataIssue.objects.update_or_create(parser="aliases", message=message)
duplicates = {}
@ -487,6 +498,9 @@ def GetCaveLookup():
if cave == Gcavelookup[id]:
pass # same id, same cave
else: # same id but different cave, e.g. 122 => 1623-122 and 1626-122
# We want to keep the 1623- and get rid of the other one
if cave.areacode == "1623":
Gcavelookup[id] = cave
duplicates[id] = 1
global Gcavelookup
@ -540,211 +554,11 @@ def GetCaveLookup():
# On reset, these aliases only work if the cave already properly exists with an entry in :expoweb:/cave_data/
# but as the aliases are recomputed repeatedly, eventually they work on PENDING caves too
# oldaliases are NOT USED. We are reading from the files instead now. Pending deletion..
oldaliases = [
("1987-02", "1623-267"),
("1990-01", "1623-171"),
("1990-02", "1623-172"),
("1990-03", "1623-173"),
("1990-04", "1623-174"),
("1990-05", "1623-175"),
("1990-06", "1623-176"),
("1990-07", "1623-177"),
("1990-08", "1623-178"),
("1990-09", "1623-179"),
("1990-10", "1623-180"),
("1990-11", "1623-181"),
("1990-12", "1623-182"),
("1990-13", "1623-183"),
("1990-14", "1623-184"),
("1990-18", "1623-188"),
("1990-adam", "1623-225"),
("1993-01", "1623-200"),
("1996-02", "1623-224"),
("1996-03", "1623-223"),
("1996-04", "1623-222"),
("1996wk2", "1623-207"),
("1996wk3", "1623-208"),
("1996wk5", "1623-219"),
("1996wk6", "1623-218"),
("1996wk8", "1623-209"),
("1996wk11", "1623-268"),
("96wk11", "1623-268"),
("1998-01", "1623-201"),
("1998-03", "1623-210"),
("1999-03", "1623-204"),
("1999-04", "1623-230"),
("1999-10", "1623-162"),
("1999-bo-01", "1623-205"),
("1999-ob-03", "1623-226"),
("1999-ob-04", "1623-227"),
("99ob02", "1999-ob-02"), # exists? pending
("1623-99ob02", "1999-ob-02"),
("gassischacht", "1623-259"),
("1623-gassischacht", "1623-259"),
("2007-gassischacht", "1623-259"),
("2000-03", "1623-214"),
("2000-04", "1623-220"),
("2000-05", "1623-215"),
("2000-06", "1623-216"),
("2000-07", "1623-217"),
("2000-09", "1623-234"),
("2000-aa-01", "1623-250"),
("2001-04", "1623-239"),
("2001-05", "1623-243"),
("2002-01", "1623-249"),
("2002-02", "1623-234"),
("2002-04", "1623-242"),
("2002-05", "1623-294"),
("quarriesd", "1623-2002-08"),
("1623-quarriesd", "1623-2002-08"),
("2002-08", "1623-2002-08"),
("2003-01", "1623-256"),
("2003-02", "1623-248"),
("2003-03", "1623-247"),
("2003-04", "1623-241"),
("2003-05", "1623-246"),
("2003-06", "1623-161"),
("2003-08", "1623-240"),
("2003-09", "1623-245"),
("2003-10", "1623-244"),
("2004-01", "1623-269"),
("2004-03", "1623-270"),
("2004-11", "1623-251"),
("2004-12", "1623-161"),
("2004-15", "1623-253"),
("2004-19", "1623-254"),
("2004-20", "1623-255"),
("2005-04", "1623-204"),
("2005-05", "1623-264"),
("2005-07", "1623-257"),
("2006-08", "1623-285"),
("2006-09", "1623-298"),
("2007-71", "1623-271"),
("2010-01", "1623-263"),
("2010-03", "1623-293"),
("2012-70", "1623-296"),
("1623-2012-70", "1623-296"),
("2012-dd-05", "1623-286"),
("2012-dd-08", "1623-297"),
# ("2011-01", "1623-292"), seems to be a mistake
("2012-dd-05", "1623-286"),
("2012-0w-01", "2012-ow-01"), # catch the typo: zero for 'O'
("2012-ns-13", "1623-292"),
("2014-neo-01", "1623-273"),
("2014-sd-01", "1623-274"),
("2014-ms-14", "1623-287"),
("2015-mf-06", "1623-288"),
("2016-jb-01", "1623-289"),
("2016-01", "1623-2012-ns-07"),
("2016-03", "1623-2012-ns-12"),
("2016-04", "1623-2012-ns-10"),
("2017-pw-01", "1623-277"),
("2017_cucc_24", "1623-291"), # note _ not -
("2017_cucc_23", "1623-295"), # note _ not -
("2017_cucc_28", "1623-290"), # note _ not -
("2013-cucc-03", "1623-2013-03"),
("2018-ps-01", "1623-114"),
("bs17", "1623-283"),
("1976/b11", "1623-198"), # / in name with crash url resolution, bad idea, fix in original doc
("1976/b8", "1623-197"), # / in name with crash url resolution, bad idea, fix in original doc
("1976/b9", "1623-190"), # / in name with crash url resolution, bad idea, fix in original doc
("1976-b11", "1623-198"),
("1976-b8", "1623-197"),
("1976-b9", "1623-190"),
("b11", "1623-198"),
("b8", "1623-197"),
("b9", "1623-190"),
("2011-01-bs30", "1623-190"),
("bs30", "1623-190"),
("2011-01", "1623-190"),
("2002-x11", "1623-2005-08"),
("2002-x12", "2005-07"),
("2002-x13", "1623-2005-06"),
("2002-x14", "2005-05"),
# various funnies to cope with cave names used in logbooks
("kh", "1623-161"),
("161-kh", "1623-161"),
("204-steinBH", "1623-204"),
("stonebridge", "1623-204"),
("hauchhole", "1623-234"),
("hauch", "1623-234"),
("234-hauch", "1623-234"),
("tunnocks", "1623-258"),
("balcony", "1623-264"),
("balkon", "1623-264"),
("fgh", "1623-290"),
("fishface", "1623-290"),
("gsh", "1623-291"),
("1623-2023-lc-01", "1623-318"),
("tempest", "1623-2023-lc-01"),
# from the git output after Becka's changes, used to construct this list..
# rename caves-1623/{2023-ASH-15/2023-ASH-15.svx => 303/303.svx} (94%)
# rename caves-1623/{2023-mg-02/2023-mg-02.svx => 304/304.svx} (90%)
# rename caves-1623/{2023-mg-01/2023-mg-01.svx => 305/305.svx} (94%)
# rename caves-1623/{2023-ASH-17/2023-ASH-17.svx => 306/306.svx} (94%)
# rename caves-1623/{2023-ASH-05/2023-ASH-05.svx => 307/307.svx} (89%)
# rename caves-1623/{2023-ASH-08/2023-ASH-08.svx => 308/308.svx} (93%)
# rename caves-1623/{2023-ASH-09/2023-ASH-09.svx => 309/309.svx} (94%)
# rename caves-1623/{2023-mg-04/2023-mg-04.svx => 310/310.svx} (91%)
# rename caves-1623/{2023-ASH-16/2023-ASH-16.svx => 311/311.svx} (93%)
# rename caves-1623/{2023-RAWDB-02/2023-RAWDB-02.svx => 312/312.svx} (92%)
# rename caves-1623/{2023-RAWDB-01/2023-RAWDB-01.svx => 313/313.svx} (92%)
# rename caves-1623/{2023-kt-02/2023-kt-02.svx => 314/314.svx} (95%)
# rename caves-1623/{2023-jss-01 => 315}/2023-jss-01_trip1.svx (100%)
# rename caves-1623/{2023-jss-01 => 315}/2023-jss-01_trip2.svx (100%)
# rename caves-1623/{2023-jss-01/2023-jss-01.svx => 315/315.svx} (68%)
# rename caves-1623/{2023-kt-01/2023-kt-01.svx => 316/316.svx} (92%)
("2023-ASH-15", "1623-303"),
("2023-mg-02", "1623-304"),
("2023-mg-01", "1623-305"),
("2023-ASH-17", "1623-306"),
("2023-ASH-05", "1623-307"),
("2023-ASH-08", "1623-308"),
("2023-ASH-09", "1623-309"),
("2023-mg-04", "1623-310"),
("2023-ASH-16", "1623-311"),
("2023-RAWDB-02", "1623-312"),
("2023-RAWDB-01", "1623-313"),
("2023-kt-02", "1623-314"),
("2023-jss-01", "1623-315"),
("2023-kt-01", "1623-316"),
# 1626
("langgustl", "1626-354"),
("2018-dm-07", "1626-359"),
("1626-2018-dm-07", "1626-359"),
("homecoming", "2018-dm-07"),
("heimkommen", "2018-dm-07"),
("Heimkehr", "2018-dm-07"),
("hc", "2018-dm-07"),
("loveshack", "1626-2018-ad-03"),
("crushed-garlic", "1626-2018-ad-03"),
# Renaming cave ids which end in a letter NB targets should be LOWER CASE for this code
("2002-XX", "1623-2002-fb-01"),
("2002-X09B", "1623-2002-xb09"),
("2007-neu", "1623-2007-neu-01"),
("BuzzardHole", "1626-2023-bz-01"),
("2023-BuzzardHole", "1626-2023-bz-01"),
("1626-2023-BuzzardHole", "1626-2023-bz-01"),
("1626-2023-buzzardhole","1626-2023-bz-01"),
("Rentner","1623-rnt-01"),
("Blaubeer","1623-blb-01"),
("Haldenlock","1623-hld-01"),
("Juttahoehle","1623-jtt-01"),
("Loutotihoehle","1626-loutoti-01"),
("Casino","1626-casino-01"),
("Upside-down","1626-upside-down-01"),
]
for key, alias in aliases:
if alias in Gcavelookup:
if not alias in Gcavelookup:
bad_alias(alias, key)
else:
if key in Gcavelookup:
# already set by a different method, but is it the same cave?
if Gcavelookup[key] == Gcavelookup[alias]:
@ -756,15 +570,7 @@ def GetCaveLookup():
DataIssue.objects.create(parser="alias", message=message)
# Gcave_count[key] += 1
Gcavelookup[key] = Gcavelookup[alias]
else:
if alias.lower() in Gcavelookup:
Gcavelookup[key] = Gcavelookup[alias.lower()]
message = f" - Warning, capitalisation error in alias list. cave for id '{alias}' does not exist but {alias.lower()} does."
print(message)
DataIssue.objects.update_or_create(parser="aliases", message=message)
else:
message = f" * Coding or cave existence mistake, cave for id '{alias}' does not exist. Expecting to set alias '{key}' to it"
DataIssue.objects.update_or_create(parser="aliases", message=message)
addmore = {}
for id in Gcavelookup:
@ -783,9 +589,14 @@ def GetCaveLookup():
for d in duplicates:
# if an alias resolves to 2 or more caves, remove it as an alias
# NOTE such an alisas is restored, assuming a 1623 area, when parsing Wallets - but only wallets.
Gcavelookup.pop(d)
Gcave_count.pop(d) # so should not get a duplicate msg below..
ldup.append(d)
#print(f"{Gcavelookup[d]=} {Gcave_count[d]=}")
if Gcavelookup[d].areacode == "1623":
# then leave it, treat as OK
pass
else:
Gcavelookup.pop(d)
Gcave_count.pop(d) # so should not get a duplicate msg below..
ldup.append(d)
if ldup:
message = f" - Ambiguous aliases being removed: {ldup}"
print(message)

View File

@ -59,7 +59,7 @@ LOGBOOK_PARSER_SETTINGS = {
LOGBOOKS_DIR = "years" # subfolder of settings.EXPOWEB
ENTRIES = {
"2024": 3,
"2024": 20,
"2023": 86,
"2022": 94,
"2019": 55,