2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2025-12-15 00:57:15 +00:00

detecting orphan cave ids and adding to pending list

This commit is contained in:
2023-08-02 18:23:04 +03:00
parent c76c09fced
commit a0f85454f8
3 changed files with 110 additions and 37 deletions

View File

@@ -229,7 +229,8 @@ def do_pending_cave(k, caveid, url, area):
print(message)
return
default_note = "_Survex file found in loser repo but no description in expoweb <br><br><br>\n"
default_note = "A reference has been found to this cave id in a survex file in the loser repo, or in a wallet metadata"
default_note += " in a JSON file in the drawings repo, but no Cave Description exists in expoweb (in /cave_data/)<br><br><br>\n"
default_note += "INSTRUCTIONS: First open 'This survex file' (link above the CaveView panel) to find the date and info. Then "
default_note += '<br><br>\n\n - (0) look in the <a href="/noinfo/cave-number-index">cave number index</a> for notes on this cave, '
default_note += "<br><br>\n\n - (1) search in the survex file for the *ref to find a "
@@ -495,23 +496,47 @@ def read_cave(filename, cave=None):
What is Class CaveAndEntrance for?
"""
for e in entrances:
eslug = getXML(e, "entranceslug", maxItems=1, context=context)[0]
# if eslug.endswith(('a','b','c','d','e','f')):
# print(f"! Entrance {eslug}")
if eslug.endswith('a b'):
message = f' - Entrance has weird name slug:"{eslug}" cave:"{cave}" caveslug:"{slug}" filename:"cave_data/{filename}"'
DataIssue.objects.create(parser="xEntrances", message=message, url=f"{cave.area}/{cave.area}-{cave.url}_cave_edit/")
print(message)
letter = getXML(e, "letter", maxItems=1, context=context)[0]
if len(entrances) == 1 and not eslug: # may be empty: <entranceslug></entranceslug>
set_dummy_entrance(slug[5:], slug, c, msg="DUMMY: no entrance slug read from file")
msg="DUMMY: no entrance slug read from file, so assume textually same as cave slug"
set_dummy_entrance(slug[5:], slug, c, msg=msg)
print(f"! {msg}\n- {slug} {c}")
else:
try:
if eslug in entrances_xslug:
entrance = entrances_xslug[eslug]
else:
if eslug in entrances_xslug:
# print(f"eslug {eslug} found eslug in xslug cache ")
entrance = entrances_xslug[eslug]
else:
# print(f"eslug {eslug} looking up entrance ")
try:
entrance = Entrance.objects.get(slug=eslug)
entrances_xslug[eslug] = entrance
except:
message = f"! eslug {eslug} Abort entrance loading. Failed to find entrance in db"
DataIssue.objects.create(parser="xEntrances", message=message, url=f"{cave.area}/{cave.area}-{cave.url}_cave_edit/")
print(message)
return
if eslug != f"{entrance}":
message = f"eslug {eslug} using different entrance {entrance} to set CaveAndEntrance"
DataIssue.objects.create(parser="xEntrances", message=message, url=f"{cave.area}/{cave.area}-{cave.url}_cave_edit/")
print(message)
try:
CaveAndEntrance.objects.update_or_create(
cave=cave, entrance_letter=letter, entrance=entrance
)
except:
message = f' ! Entrance setting failure, slug:"{slug}" #entrances:{len(entrances)} {e} letter:"{letter}" cave:"{cave}" filename:"cave_data/{filename}"'
DataIssue.objects.create(parser="entrances", message=message, url=f"{cave.url}_edit/")
print(f"! Entrance setting failure {slug}")
message = f' ! Entrance setting failure, slug:"{slug}" #entrances:{len(entrances)} letter:"{letter}" cave:"{cave}" filename:"cave_data/{filename}"\n{e}'
DataIssue.objects.create(parser="entrances", message=message, url=f"{cave.url}_cave_edit/")
print(message)
def reload_entrances():
"""For individual re-reading of a cave_data file when editing,
@@ -690,19 +715,26 @@ def read_cave(filename, cave=None):
cave.save()
return cave
def add_cave_to_pending_list(id):
fpending = Path(CAVEDESCRIPTIONS, "pendingcaves.txt")
if fpending.is_file():
with open(fpending, "a") as pend:
pend.write(f"{id}\n")
def readcaves():
"""Called from databaseReset mass importer.
Reads the xml-format HTML 'cave' files in the EXPOWEB repo, the survex files from the loser repo.
"""
# Pending is for those caves which do not have cave_data/1623-xxx.html XML files even though
# they exist and have surveys.
pending = set()
fpending = Path(CAVEDESCRIPTIONS, "pendingcaves.txt")
if fpending.is_file():
with open(fpending, "r") as fo:
cids = fo.readlines()
for cid in cids:
pending.add(cid.strip().rstrip("\n").upper())
with transaction.atomic():
pending = set()
fpending = Path(CAVEDESCRIPTIONS, "pendingcaves.txt")
if fpending.is_file():
with open(fpending, "r") as fo:
cids = fo.readlines()
for cid in cids:
pending.add(cid.strip().rstrip("\n").upper())
with transaction.atomic():
print(" - Deleting Caves and Entrances")
@@ -719,11 +751,13 @@ def readcaves():
Entrance.objects.all().delete()
except:
pass
# Clear the cave data issues and the caves as we are reloading
DataIssue.objects.filter(parser="areas").delete()
DataIssue.objects.filter(parser="caves").delete()
DataIssue.objects.filter(parser="caves ok").delete()
#DataIssue.objects.filter(parser="entrances").delete()
# Clear the cave data issues and the caves as we are reloading
DataIssue.objects.filter(parser="areas").delete()
DataIssue.objects.filter(parser="caves").delete()
DataIssue.objects.filter(parser="caves ok").delete()
#DataIssue.objects.filter(parser="entrances").delete()
#DataIssue.objects.filter(parser="xEntrances").delete()
with transaction.atomic():
area = get_area("1623")
@@ -731,11 +765,12 @@ def readcaves():
print(" - Reading Entrances from entrance descriptions xml files")
for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files
read_entrance(filename)
# WHy is this needed ? Without it, we lose these DataIssues!
ent_issues = DataIssue.objects.filter(parser="entrances")
print(f"__ We now have {len(ent_issues)} entrance DataIssues")
# Why is this needed ? Without it, we lose these DataIssues!
ent_issues = DataIssue.objects.filter(parser="entrances")
print(f" _ We now have {len(ent_issues)} entrance DataIssues")
with transaction.atomic():
print(" - Reading Caves from cave descriptions xml files")
for filename in next(os.walk(CAVEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files
if filename.endswith(".html"):