2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2024-11-21 23:01:52 +00:00

Cleaning auto Cave creation from survex file detection

This commit is contained in:
Philip Sargent 2023-09-10 02:06:38 +03:00
parent 54136721b8
commit 327b1923b0
4 changed files with 233 additions and 192 deletions

View File

@ -527,7 +527,8 @@ def GetCaveLookup():
Gcavelookup[key] = cave
Gcave_count[key] += 1
if cave.kataster_number:
checkcaveid(cave, cave.kataster_number) # we do expect 1623/55 and 1626/55 to cause a warning message
# NOTE this will set an alias for "145" not "1623-145"
checkcaveid(cave, cave.kataster_number) # we do expect 1623/55 and 1626/55 to cause clash, removed below
# the rest of these are 'nice to have' but may validly already be set
if cave.unofficial_number:
@ -546,151 +547,176 @@ def GetCaveLookup():
# These might alse create more duplicate entries
# Yes, this should be set in, and imported from, settings.py
# yes we should move to always using the 1623- prefix too.
# On reset, these aliases only work if the cave already properly exists with an entry in :expoweb:/cave_data/
# but as the aliases are recomputed repeatedly, eventually they work on PENDING caves too
aliases = [
("1987-02", "267"),
("1990-01", "171"),
("1990-02", "172"),
("1990-03", "173"),
("1990-04", "174"),
("1990-05", "175"),
("1990-06", "176"),
("1990-07", "177"),
("1990-08", "178"),
("1990-09", "179"),
("1990-10", "180"),
("1990-11", "181"),
("1990-12", "182"),
("1990-13", "183"),
("1990-14", "184"),
("1990-18", "188"),
("1990-adam", "225"),
("1993-01", "200"),
("1996-02", "224"),
("1996-03", "223"),
("1996-04", "222"),
("1996wk2", "207"),
("1996wk3", "208"),
("1996wk5", "219"),
("1996wk6", "218"),
("1996wk8", "209"),
("1996wk11", "268"),
("96wk11", "268"),
("1998-01", "201"),
("1998-03", "210"),
("1999-03", "204"),
("1999-04", "230"),
("1999-10", "162"),
("1999-bo-01", "205"),
("1999-ob-03", "226"),
("1999-ob-04", "227"),
("2000-01", "231"),
("2000-03", "214"),
("2000-04", "220"),
("2000-05", "215"),
("2000-06", "216"),
("2000-07", "217"),
("2000-09", "234"),
("2000-aa-01", "250"),
("2001-04", "239"),
("2001-05", "243"),
("2002-01", "249"),
("2002-02", "234"),
("2002-04", "242"),
("2002-05", "294"),
("2003-01", "256"),
("2003-02", "248"),
("2003-03", "247"),
("2003-04", "241"),
("2003-05", "246"),
("2003-06", "161"),
("2003-08", "240"),
("2003-09", "245"),
("2003-10", "244"),
("2004-01", "269"),
("2004-03", "270"),
("2004-11", "251"),
("2004-12", "161"),
("2004-15", "253"),
("2004-19", "254"),
("2004-20", "255"),
("2005-04", "204"),
("2005-05", "264"),
("2005-07", "257"),
("2006-08", "285"),
("2006-09", "298"),
("2007-71", "271"),
("2010-01", "263"),
("2010-03", "293"),
("2011-01", "292"),
("2012-dd-05", "286"),
("2012-ns-13", "292"),
("2014-neo-01", "273"),
("2014-sd-01", "274"),
("2014-ms-14", "287"),
("2015-mf-06", "288"),
("2016-jb-01", "289"),
("2017-pw-01", "277"),
("2018-dm-07", "359"), # NB this is 1626
("2017_cucc_24", "291"), # note _ not -
("2017_cucc_23", "295"), # note _ not -
("2017_cucc_28", "290"), # note _ not -
("bs17", "283"),
("1976/b11", "198"),
("1976/b8", "197"),
("1976/b9", "190"),
("1987-02", "1623-267"),
("1990-01", "1623-171"),
("1990-02", "1623-172"),
("1990-03", "1623-173"),
("1990-04", "1623-174"),
("1990-05", "1623-175"),
("1990-06", "1623-176"),
("1990-07", "1623-177"),
("1990-08", "1623-178"),
("1990-09", "1623-179"),
("1990-10", "1623-180"),
("1990-11", "1623-181"),
("1990-12", "1623-182"),
("1990-13", "1623-183"),
("1990-14", "1623-184"),
("1990-18", "1623-188"),
("1990-adam", "1623-225"),
("1993-01", "1623-200"),
("1996-02", "1623-224"),
("1996-03", "1623-223"),
("1996-04", "1623-222"),
("1996wk2", "1623-207"),
("1996wk3", "1623-208"),
("1996wk5", "1623-219"),
("1996wk6", "1623-218"),
("1996wk8", "1623-209"),
("1996wk11", "1623-268"),
("96wk11", "1623-268"),
("1998-01", "1623-201"),
("1998-03", "1623-210"),
("1999-03", "1623-204"),
("1999-04", "1623-230"),
("1999-10", "1623-162"),
("1999-bo-01", "1623-205"),
("1999-ob-03", "1623-226"),
("1999-ob-04", "1623-227"),
("99ob02", "1999-ob-02"), # exists? pending
("1623-99ob02", "1999-ob-02"),
("gassischacht", "1623-259"),
("1623-gassischacht", "1623-259"),
("2007-gassischacht", "1623-259"),
("2000-03", "1623-214"),
("2000-04", "1623-220"),
("2000-05", "1623-215"),
("2000-06", "1623-216"),
("2000-07", "1623-217"),
("2000-09", "1623-234"),
("2000-aa-01", "1623-250"),
("2001-04", "1623-239"),
("2001-05", "1623-243"),
("2002-01", "1623-249"),
("2002-02", "1623-234"),
("2002-04", "1623-242"),
("2002-05", "1623-294"),
("quarriesd", "1623-2002-08"),
("1623-quarriesd", "1623-2002-08"),
("2002-08", "1623-2002-08"),
("2003-01", "1623-256"),
("2003-02", "1623-248"),
("2003-03", "1623-247"),
("2003-04", "1623-241"),
("2003-05", "1623-246"),
("2003-06", "1623-161"),
("2003-08", "1623-240"),
("2003-09", "1623-245"),
("2003-10", "1623-244"),
("2004-01", "1623-269"),
("2004-03", "1623-270"),
("2004-11", "1623-251"),
("2004-12", "1623-161"),
("2004-15", "1623-253"),
("2004-19", "1623-254"),
("2004-20", "1623-255"),
("2005-04", "1623-204"),
("2005-05", "1623-264"),
("2005-07", "1623-257"),
("2006-08", "1623-285"),
("2006-09", "1623-298"),
("2007-71", "1623-271"),
("2010-01", "1623-263"),
("2010-03", "1623-293"),
# ("2011-01", "1623-292"), seems to be a mistake
("2012-dd-05", "1623-286"),
("2012-0w-01", "2012-ow-01"),
("2012-ns-13", "1623-292"),
("2014-neo-01", "1623-273"),
("2014-sd-01", "1623-274"),
("2014-ms-14", "1623-287"),
("2015-mf-06", "1623-288"),
("2016-jb-01", "1623-289"),
("2017-pw-01", "1623-277"),
("2017_cucc_24", "1623-291"), # note _ not -
("2017_cucc_23", "1623-295"), # note _ not -
("2017_cucc_28", "1623-290"), # note _ not -
("bs17", "1623-283"),
("1976/b11", "1623-198"),
("1976/b8", "1623-197"),
("1976/b9", "1623-190"),
("1976-b11", "1623-198"),
("1976-b8", "1623-197"),
("1976-b9", "1623-190"),
("b11", "1976/b11"),
("b8", "1976/b8"),
("b9", "1976/b9"),
("2011-01-bs30", "190"),
("bs30", "190"),
("2011-01", "190"),
("quarriesd", "2002-08"),
("2011-01-bs30", "1623-190"),
("bs30", "1623-190"),
("2011-01", "1623-190"),
("2002-x11", "2005-08"),
("2002-x12", "2005-07"),
("2002-x13", "2005-06"),
("2002-x13", "1623-2005-06"),
("2002-x14", "2005-05"),
("kh", "161"),
("161-kh", "161"),
("204-steinBH", "204"),
("stonebridge", "204"),
("hauchhole", "234"),
("hauch", "234"),
("234-hauch", "234"),
("tunnocks", "258"),
("balcony", "264"),
("balkon", "264"),
("fgh", "290"),
("fishface", "290"),
("gsh", "291"),
("kh", "1623-161"),
("161-kh", "1623-161"),
("204-steinBH", "1623-204"),
("stonebridge", "1623-204"),
("hauchhole", "1623-234"),
("hauch", "1623-234"),
("234-hauch", "1623-234"),
("tunnocks", "1623-258"),
("balcony", "1623-264"),
("balkon", "1623-264"),
("fgh", "1623-290"),
("fishface", "1623-290"),
("gsh", "1623-291"),
("tempest", "1623-2023-lc-01"),
("loveshack", "2023-pb-01"),
("1623-2023-kt-02", "2023-kt-02"),
#("1623-2023-jss-01", "2023-jss-01"),
# 1626 aliases
("langgustl", "1626-354"),
("2018-dm-07", "1626-359"),
("homecoming", "2018-dm-07"),
("heimkommen", "2018-dm-07"),
("Heimkehr", "2018-dm-07"),
("hc", "2018-dm-07"),
("99ob02", "1999-ob-02"),
("tempest", "2023-lc-01"),
]
for i in aliases:
if i[1] in Gcavelookup:
if i[0] in Gcavelookup:
for key, alias in aliases:
if alias in Gcavelookup:
if key in Gcavelookup:
# already set by a different method, but is it the same cave?
if Gcavelookup[i[0]] == Gcavelookup[i[1]]:
if Gcavelookup[key] == Gcavelookup[alias]:
pass
else:
Gcave_count[i[0]] += 1
Gcavelookup[i[0]] = Gcavelookup[i[1]]
# aliases wrong - these are different caves
message = f" - Alias list is mis-identifying different caves {key}:{Gcavelookup[key]} != {alias}:{Gcavelookup[alias]} "
print(message)
DataIssue.objects.create(parser="alias", message=message)
# Gcave_count[key] += 1
Gcavelookup[key] = Gcavelookup[alias]
else:
message = f" * Coding or cave existence mistake, cave for id '{i[1]}' does not exist. Expecting to set alias '{i[0]}' to it"
message = f" * Coding or cave existence mistake, cave for id '{alias}' does not exist. Expecting to set alias '{key}' to it"
print(message)
DataIssue.objects.create(parser="aliases", message=message)
addmore = {}
for id in Gcavelookup:
addmore[id.replace("-", "_")] = Gcavelookup[id]
addmore[id.replace("_", "-")] = Gcavelookup[id]
addmore[id.upper()] = Gcavelookup[id]
addmore[id.replace("-", "_")] = Gcavelookup[id]
addmore[id.replace("-", "_").upper()] = Gcavelookup[id]
addmore[id.replace("-", "_").lower()] = Gcavelookup[id]
addmore[id.replace("_", "-").upper()] = Gcavelookup[id]
addmore[id.replace("_", "-").lower()] = Gcavelookup[id]
Gcavelookup = {**addmore, **Gcavelookup}
addmore = {}
@ -698,7 +724,7 @@ def GetCaveLookup():
ldup = []
for d in duplicates:
Gcavelookup.pop(d)
Gcave_count.pop(d)
Gcave_count.pop(d) # so should not get a duplicate msg below..
ldup.append(d)
if ldup:
message = f" - Ambiguous aliases removed: {ldup}"

View File

@ -95,7 +95,7 @@ def is_cave(id):
print(f" - Failed to find cave object from id <{id}>")
if id.lower() != "unknown" and id != "":
print(f" - adding <{id}> to pendingcaves.txt list")
add_cave_to_pending_list(id)
add_cave_to_pending_list(id, f"Could not find id <{id}>")
return False
def fillblankothers(w):
@ -275,6 +275,11 @@ def cavewallets(request, caveid):
if str(fcave.slug()) == caveid:
# print(f' - Found one ! {z.walletname=} {zcaveid=}')
wallets.add(z)
elif f"1623-{cleanid}" in Gcavelookup: # special hack for all the old wallets which are 1623
fcave = Gcavelookup[f"1623-{cleanid}"]
if str(fcave.slug()) == caveid:
# print(f' - Found one ! {z.walletname=} {zcaveid=}')
wallets.add(z)
elif cleanid in ['surface', 'unknown', '']:
message = f" ! In {z.walletname} ignore '{cleanid}' "
print(message)
@ -284,7 +289,7 @@ def cavewallets(request, caveid):
message = f" ! In {z.walletname} there is an unrecognised cave name '{cleanid}', adding to pending list."
print(message)
DataIssue.objects.update_or_create(parser="scans", message=message, url=wurl)
add_cave_to_pending_list(cleanid)
add_cave_to_pending_list(cleanid, f"an unrecognised cave name in {z.walletname}")
manywallets = list(set(wallets))
for w in manywallets:

View File

@ -130,13 +130,13 @@ def get_area(areanum):
area = Area.objects.get(short_name="1627")
return area
def create_new_cave(svxpath):
def create_new_cave(svxpath, msg=None):
"""This is called only when a new survex file is edited online which has a path on the
:loser: repo which is not recognised as a known cave.
ALSO called by survex parser when it finds a cave it doesn't recognise
"""
# e.g. svxpath = "caves-1623/666/antig"
print(f"Create new cave at {svxpath}")
print(f"Create new cave at {svxpath} - {msg}")
#
survex_file = svxpath + ".svx"
parts = svxpath.split("/")
@ -149,7 +149,7 @@ def create_new_cave(svxpath):
url = f"{areanum}/{caveid}.html" # Note we are appending the .html as we are believe in backwards compatability.
#url = f"{areanum}/{a[5:]}.html" # This is original code, but a above is only defined as being 4 characters long, so it did not make sense and produced non unique urls
else:
print(f"WARNING: parsers/caves/create_new_cave called with svxpath '{svxpath}'. Surely it should start 'caves-162*'?")
print(f"WARNING: parsers/caves/create_new_cave called with svxpath '{svxpath}'. Surely it should start 'caves-162*'? {msg}")
areanum = "1623"
url = f"1623/{caveid}.html"
#url = f"1623/{k}.html" # This is original code, but a above is only defined as being 4 characters long, so it did not make sense and produced non unique urls
@ -157,17 +157,17 @@ def create_new_cave(svxpath):
k = f"{areanum}-{caveid}"
area = get_area(areanum)
caves = Cave.objects.filter(unofficial_number=caveid)
caves = Cave.objects.filter(unofficial_number=caveid, area =areanum)
if caves:
message = f" ! Already exists, caveid:{k} in area {areanum} {caves}"
message = f" ! Already exists, caveid:{k} in area {areanum} {caves} - {msg}"
DataIssue.objects.create(parser="caves", message=message)
print(message)
return caves[0]
try:
cave = do_pending_cave(k, caveid, url, area)
cave = do_pending_cave(k, caveid, url, area, msg)
except:
message = f" ! Error. Cannot create pending cave and entrance, pending-id:{k} in area {areanum}"
message = f" ! Error. Cannot create pending cave and entrance, pending-id:{k} in area {areanum} - {msg}"
DataIssue.objects.create(parser="caves", message=message)
print(message)
raise
@ -178,7 +178,7 @@ def create_new_cave(svxpath):
cave.save()
return cave
def do_pending_cave(k, caveid, url, area):
def do_pending_cave(k, caveid, url, area, msg=None):
"""
default for a PENDING cave, should be overwritten in the db later if a real cave of the same name exists
in expoweb/cave_data/1623-"k".html
@ -217,7 +217,7 @@ def do_pending_cave(k, caveid, url, area):
prime_suspect = survex_file
if prime_suspect:
survex_file = prime_suspect
# message = f" ! {k:14} Found a survex file which might be the right one: {survex_file}"
# message = f" ! {k:14} Found a survex file which might be the right one: {survex_file} - {msg}"
# DataIssue.objects.create(parser='caves', message=message, url=url)
# print(message)
return survex_file
@ -227,7 +227,7 @@ def do_pending_cave(k, caveid, url, area):
g = GetCaveLookup()
with transaction.atomic():
if slug in g:
message = f" ! {k:18} cave listed in pendingcaves.txt already exists."
message = f" ! {k:18} cave listed in pendingcaves.txt already exists. - {msg}"
DataIssue.objects.create(parser="caves", message=message, url=url)
print(message)
return
@ -267,11 +267,17 @@ def do_pending_cave(k, caveid, url, area):
default_note += (
f"to remove the line <br><var>{slug}</var><br> as it is no longer 'pending' but 'done. Well Done."
)
urltest = Cave.objects.filter(url=url)
if urltest:
message = f" ! Cave {urltest[0]} already exists with this url {url}. Can't create new cave {slug}"
DataIssue.objects.create(parser="caves", message=message, url=url)
print(message)
return urltest[0]
survex_file = get_survex_file(k)
cave = Cave(
unofficial_number=caveid,
unofficial_number=caveid.upper(),
underground_description="Pending cave write-up - No cave description created yet.",
survex_file=survex_file,
url=url,
@ -281,30 +287,21 @@ def do_pending_cave(k, caveid, url, area):
cave.save() # must save to have id before foreign keys work. This is also a ManyToMany key.
cave.area.add(area)
cave.save()
message = f" ! {k:18} {cave.underground_description} url: {url}"
message = f" ! {k:18} Pending cave write-up url: {url} - {msg}"
DataIssue.objects.create(parser="caves", message=message, url=url)
print(message)
try: # Now create a cave slug ID
CaveSlug.objects.update_or_create(cave=cave, slug=slug, primary=False)
except:
message = f" ! {k:11s} PENDING CaveSLUG {slug} create failure"
message = f" ! {k:11s} PENDING CaveSLUG {slug} create failure - {msg}"
DataIssue.objects.create(parser="caves", message=message)
print(message)
else:
message = f" ! {k:11s} PENDING cave create failure"
message = f" ! {k:11s} PENDING cave create failure - {msg}"
DataIssue.objects.create(parser="caves", message=message)
print(message)
try:
# troggle is more robust against missing entrances now, not needed.
# set_dummy_entrance(k, slug, cave, msg="PENDING")
pass
except:
message = f" ! {k:11s} PENDING entrance + cave UNION create failure '{cave}' [{slug}] {k}"
# message = f" ! {k:11s} PENDING entrance + cave UNION create failure '{cave}' [{ent}]"
DataIssue.objects.create(parser="caves", message=message)
print(message)
return cave
def getXML(text, itemname, minItems=1, maxItems=None, context=""):
@ -730,7 +727,9 @@ def read_cave(filename, cave=None):
cave.save()
return cave
def add_cave_to_pending_list(id):
def add_cave_to_pending_list(id, msg=None):
message = f"On dev machine, adding to PENDING. - {msg}"
print(message)
fpending = Path(CAVEDESCRIPTIONS, "pendingcaves.txt")
try:
if settings.DBSWITCH == "sqlite": # dev machine only

View File

@ -317,16 +317,16 @@ class LoadingSurvex:
insp = ""
callcount = 0
caverncount = 0
ignoreprefix = ["surface", "kataster", "fixedpts", "gpx", "deprecated"]
ignorenoncave = [
"caves-1623",
"caves-1623/2007-NEU",
"caves-1626",
"caves-1624",
"caves-1627",
"fixedpts/gps/gps00raw",
"",
]
ignoreprefix = ["surface", "kataster", "fixedpts", "gpx", "deprecated", "dummy_"]
# ignorenoncave = [
# "caves-1623",
# "caves-1623/2007-NEU",
# "caves-1626",
# "caves-1624",
# "caves-1627",
# "fixedpts/gps/gps00raw",
# "",
# ]
TREE = "tree"
ODDS = "oddments"
svxpass = TREE
@ -1165,42 +1165,60 @@ class LoadingSurvex:
def IdentifyCave(self, cavepath, svxid, depth):
"""Given a file path for a survex file, e.g. /1626/107/107.svx, or a survex-block path,
return the cave object
return the cave object
kataster
fixedpts/gps
and everything at top level, directly in caves-1623/ not in a subdir
NOTE self.cavelist is a superset of GCaveLookup, which already contians both uppercase and lowercase aliases
"""
path = cavepath.lower()
if path in self.caveslist: # primed with GCaveLookup
return self.caveslist[path]
if cavepath == "caves-1623/99ob02":
for key in self.caveslist:
cave = self.caveslist[key]
if type(cave) != Cave:
print(f"BAD CAVE TYPE '{key}' -- {type(cave)}'{cave}'")
for key in self.caveslist:
cave = self.caveslist[key]
print(f"{key} -- Cave<{cave}>")
for i in self.ignoreprefix:
if cavepath.lower().startswith(i):
message = (f" - {cavepath} starts with <ignoreprefix> (while creating '{svxid}.svx' )")
return False
if cavepath in self.caveslist: # primed with GCaveLookup
return self.caveslist[cavepath]
rx_svxcollection = re.compile(r"(?i)caves-(\d\d\d\d)/(.*)$")
# rx_cave = re.compile(r"(?i)caves-(\d\d\d\d)/([-\d\w]+|\d\d\d\d-?\w+-\d+)")
path_match = self.rx_cave.search(cavepath)
if path_match:
sluggy = f"{path_match.group(1)}-{path_match.group(2)}"
guesses = [sluggy.lower()] # full 1626-107 search, don;t use short-forms
for g in guesses:
if g in self.caveslist:
self.caveslist[cavepath] = self.caveslist[g] # set "caves-1626/107/107" as index to cave 1626-107
return self.caveslist[g]
cave = create_new_cave(cavepath) # uses the pending stuff to create pending cave descriptions
seek = [sluggy, sluggy.replace("1623-","")] # to catch '2023-kt-02' etc
for s in seek:
if s in self.caveslist:
self.caveslist[cavepath] = self.caveslist[s] # set "caves-1626/107/107" as index to cave 1626-107
return self.caveslist[s]
cave = create_new_cave(cavepath, f"Make cave found in survex file {svxid}") # uses the pending code to create pending cave descriptions
self.caveslist[cavepath] = cave
message = f" ! MAKING cave for {cavepath=} {svxid=}"
stash_data_issue(parser="survex", message=message, url=None, sb=(svxid))
if not cavepath.startswith("caves-1624") or cavepath.startswith("caves-1626"):
message = f"\n ! MAKING cave {sluggy} for {cavepath=} {svxid=} (not reporting this for 1624 or 1626)"
# stash_data_issue(parser="survex", message=message, url="/survexfile/{svxid}.svx", sb=(svxid))
if not (cavepath.startswith("caves-1624") or cavepath.startswith("caves-1626")):
print(message, file=sys.stderr)
return cave
else:
# isn't all this pointless...??
if self.is_it_already_pending(cavepath, svxid, depth): # but pending will already have been created as Cave objects
pass
path_match = rx_svxcollection.search(svxid)
if path_match:
message = f" ! Recognised survex file which is not a cave at {svxid=}"
# stash_data_issue(parser="survex", message=message, url=None, sb=(svxid))
# print(message, file=sys.stderr)
return False
else:
# It is too late to add it to the pending caves list here, they were already
# processed in parsers/caves.py So we have to do a bespoke creation.
cave = create_new_cave(svxid)
message = f" ! Warning: cave identifier '{caveid}'or {id} (guessed from file path) is not a known cave. Need to add to expoweb/cave_data/pendingcaves.txt ? In '{includelabel}.svx' at depth:[{len(depth)}]."
message = f" ! ERROR: no cave at '{svxid}.svx - is not a known cavename format. "
print("\n" + message)
print("\n" + message, file=sys.stderr)
print(f"{self.pending}", end="", file=sys.stderr)
stash_data_issue(parser="survex", message=message, url=None, sb=(includelabel))
print(f' ! No regex (standard identifier) cave match for {cavepath.lower()}', file=sys.stderr)
return None
stash_data_issue(parser="survex", message=message, url="{svxid}.svx", sb=(svxid))
return None
def is_it_already_pending(self, headpath, includelabel, depth):
"""Ignore surface, kataser and gpx *include survex files"""
@ -1222,14 +1240,7 @@ class LoadingSurvex:
# print("\n"+message)
# print("\n"+message,file=sys.stderr)
return True
for i in self.ignoreprefix:
if headpath.startswith(i):
message = (
f" - {headpath} starts with <ignoreprefix> (while creating '{includelabel}' sfile & sdirectory)"
)
# print("\n"+message)
# print("\n"+message,file=sys.stderr)
return True
caveid = f"{headpath[6:10]}-{headpath[11:]}".upper()
if caveid in self.pending:
# Yes we didn't find this cave, but we know it is a pending one. So not an error.