diff --git a/core/views/scans.py b/core/views/scans.py
index 83d6ac3..f7abc06 100644
--- a/core/views/scans.py
+++ b/core/views/scans.py
@@ -12,6 +12,7 @@ from troggle.core.models.wallets import Wallet
from troggle.core.models.troggle import DataIssue, Expedition, Person
from troggle.core.views.expo import getmimetype
from troggle.parsers.survex import set_walletdate
+from troggle.parsers.caves import add_cave_to_pending_list
# from troggle.parsers.people import GetPersonExpeditionNameLookup
# import parsers.surveys
@@ -85,8 +86,21 @@ def fillblankpeople(w):
# print(f' - {wp=} {nobody=}')
populatewallet(w)
+def is_cave(id):
+ Gcavelookup = GetCaveLookup()
+ id = id.strip("' []'")
+ if id in Gcavelookup:
+ return True
+ else:
+ print(f" - Failed to find cave object from id <{id}>")
+ if id.lower() != "unknown" and id != "":
+ print(f" - adding <{id}> to pendingcaves.txt list")
+ add_cave_to_pending_list(id)
+ return False
def fillblankothers(w):
+ """This is on the way to having a many:many relationship between Caves and Wallets
+ """
if not w.walletdate:
set_walletdate(w)
@@ -98,14 +112,21 @@ def fillblankothers(w):
else:
if type(wcaveid) == list:
for i in wcaveid:
- if i in Gcavelookup:
- w.caveobj = Gcavelookup[i] # just sets it to the last one found. nasty. bug waiting to happen
- # print(f' - Found cave object from id {wcaveid}')
+ i = i.strip("' []'")
+ if is_cave(i):
+ w.caveobj = Gcavelookup[i] # just sets it to the last one found. nasty. bug waiting to happen
+ elif wcaveid.find(',') != -1:
+ # it's a list of cave ids as a string
+ ids = wcaveid.split(',')
+ for i in ids:
+ i = i.strip("' []'")
+ if is_cave(i):
+ w.caveobj = Gcavelookup[i] # just sets it to the last one found. nasty. bug waiting to happen
else:
- if wcaveid in Gcavelookup:
- w.caveobj = Gcavelookup[wcaveid]
- else:
- print(f" - Failed to find cave object from id {wcaveid}")
+ if is_cave(wcaveid):
+ w.caveobj = Gcavelookup[wcaveid.strip("' []'")]
+
+
def fixsurvextick(w, ticks):
@@ -216,7 +237,7 @@ def walletslistyear(request, year):
def cavewallets(request, caveid):
"""Returns all the wallets for just one cave"""
- print("-cavewalletsl")
+ print("-cavewallets")
Gcavelookup = GetCaveLookup()
if caveid in Gcavelookup:
@@ -233,8 +254,23 @@ def cavewallets(request, caveid):
for z in zilchwallets:
zcaveid = z.cave()
if zcaveid:
- cleanid = str(zcaveid).strip("'[]'")
- if cleanid in Gcavelookup:
+ cleanid = str(zcaveid).strip("' []'")
+
+ if cleanid.find(',') != -1:
+ # it's a list of cave ids
+ wurl = f"/walletedit/{z.walletname.replace('#',':')}"
+ message = f" ! In {z.walletname} we do not handle lists of cave ids yet '{cleanid}'"
+ print(message)
+ DataIssue.objects.update_or_create(parser="scans", message=message, url=wurl)
+
+ # it's a list of cave ids as a string. Identify any orphan caves hidden here
+ ids = cleanid.split(',')
+ for i in ids:
+ i = i.strip("' []'")
+ if is_cave(i):
+ fcave = Gcavelookup[i.strip("' []'")] # just sets it to the last one found. nasty. bug waiting to happen
+
+ elif cleanid in Gcavelookup:
fcave = Gcavelookup[cleanid]
if str(fcave.slug()) == caveid:
# print(f' - Found one ! {z.walletname=} {zcaveid=}')
@@ -245,9 +281,10 @@ def cavewallets(request, caveid):
pass
else:
wurl = f"/walletedit/{z.walletname.replace('#',':')}"
- message = f" ! In {z.walletname} there is an unrecognised cave name '{cleanid}' (out of {len(Gcavelookup):,} cave names and aliases)"
+ message = f" ! In {z.walletname} there is an unrecognised cave name '{cleanid}', adding to pending list."
print(message)
DataIssue.objects.update_or_create(parser="scans", message=message, url=wurl)
+ add_cave_to_pending_list(cleanid)
manywallets = list(set(wallets))
for w in manywallets:
@@ -277,7 +314,8 @@ def oldwallet(request, path):
def scansingle(request, path, file):
- """sends a single binary file to the user for display - browser decides how using mimetype"""
+ """sends a single binary file to the user for display - browser decides how using mimetype
+ This is very unsafe"""
try:
wallet = Wallet.objects.get(walletname=urlunquote(path))
singlescan = SingleScan.objects.get(wallet=wallet, name=file)
diff --git a/parsers/caves.py b/parsers/caves.py
index b4643c8..2bfd648 100644
--- a/parsers/caves.py
+++ b/parsers/caves.py
@@ -229,7 +229,8 @@ def do_pending_cave(k, caveid, url, area):
print(message)
return
- default_note = "_Survex file found in loser repo but no description in expoweb
\n"
+ default_note = "A reference has been found to this cave id in a survex file in the loser repo, or in a wallet metadata"
+ default_note += " in a JSON file in the drawings repo, but no Cave Description exists in expoweb (in /cave_data/)
\n"
default_note += "INSTRUCTIONS: First open 'This survex file' (link above the CaveView panel) to find the date and info. Then "
default_note += '
\n\n - (0) look in the cave number index for notes on this cave, '
default_note += "
\n\n - (1) search in the survex file for the *ref to find a "
@@ -495,23 +496,47 @@ def read_cave(filename, cave=None):
What is Class CaveAndEntrance for?
"""
for e in entrances:
+
eslug = getXML(e, "entranceslug", maxItems=1, context=context)[0]
+ # if eslug.endswith(('a','b','c','d','e','f')):
+ # print(f"! Entrance {eslug}")
+ if eslug.endswith('a b'):
+ message = f' - Entrance has weird name slug:"{eslug}" cave:"{cave}" caveslug:"{slug}" filename:"cave_data/{filename}"'
+ DataIssue.objects.create(parser="xEntrances", message=message, url=f"{cave.area}/{cave.area}-{cave.url}_cave_edit/")
+ print(message)
+
letter = getXML(e, "letter", maxItems=1, context=context)[0]
if len(entrances) == 1 and not eslug: # may be empty:
- set_dummy_entrance(slug[5:], slug, c, msg="DUMMY: no entrance slug read from file")
+ msg="DUMMY: no entrance slug read from file, so assume textually same as cave slug"
+ set_dummy_entrance(slug[5:], slug, c, msg=msg)
+ print(f"! {msg}\n- {slug} {c}")
else:
- try:
- if eslug in entrances_xslug:
- entrance = entrances_xslug[eslug]
- else:
+ if eslug in entrances_xslug:
+ # print(f"eslug {eslug} found eslug in xslug cache ")
+ entrance = entrances_xslug[eslug]
+ else:
+ # print(f"eslug {eslug} looking up entrance ")
+ try:
entrance = Entrance.objects.get(slug=eslug)
entrances_xslug[eslug] = entrance
+ except:
+ message = f"! eslug {eslug} Abort entrance loading. Failed to find entrance in db"
+ DataIssue.objects.create(parser="xEntrances", message=message, url=f"{cave.area}/{cave.area}-{cave.url}_cave_edit/")
+ print(message)
+ return
+
+ if eslug != f"{entrance}":
+ message = f"eslug {eslug} using different entrance {entrance} to set CaveAndEntrance"
+ DataIssue.objects.create(parser="xEntrances", message=message, url=f"{cave.area}/{cave.area}-{cave.url}_cave_edit/")
+ print(message)
+ try:
CaveAndEntrance.objects.update_or_create(
cave=cave, entrance_letter=letter, entrance=entrance
)
except:
- message = f' ! Entrance setting failure, slug:"{slug}" #entrances:{len(entrances)} {e} letter:"{letter}" cave:"{cave}" filename:"cave_data/{filename}"'
- DataIssue.objects.create(parser="entrances", message=message, url=f"{cave.url}_edit/")
+ print(f"! Entrance setting failure {slug}")
+ message = f' ! Entrance setting failure, slug:"{slug}" #entrances:{len(entrances)} letter:"{letter}" cave:"{cave}" filename:"cave_data/{filename}"\n{e}'
+ DataIssue.objects.create(parser="entrances", message=message, url=f"{cave.url}_cave_edit/")
print(message)
def reload_entrances():
"""For individual re-reading of a cave_data file when editing,
@@ -690,19 +715,26 @@ def read_cave(filename, cave=None):
cave.save()
return cave
+def add_cave_to_pending_list(id):
+ fpending = Path(CAVEDESCRIPTIONS, "pendingcaves.txt")
+ if fpending.is_file():
+ with open(fpending, "a") as pend:
+ pend.write(f"{id}\n")
+
def readcaves():
"""Called from databaseReset mass importer.
Reads the xml-format HTML 'cave' files in the EXPOWEB repo, the survex files from the loser repo.
"""
# Pending is for those caves which do not have cave_data/1623-xxx.html XML files even though
# they exist and have surveys.
- pending = set()
- fpending = Path(CAVEDESCRIPTIONS, "pendingcaves.txt")
- if fpending.is_file():
- with open(fpending, "r") as fo:
- cids = fo.readlines()
- for cid in cids:
- pending.add(cid.strip().rstrip("\n").upper())
+ with transaction.atomic():
+ pending = set()
+ fpending = Path(CAVEDESCRIPTIONS, "pendingcaves.txt")
+ if fpending.is_file():
+ with open(fpending, "r") as fo:
+ cids = fo.readlines()
+ for cid in cids:
+ pending.add(cid.strip().rstrip("\n").upper())
with transaction.atomic():
print(" - Deleting Caves and Entrances")
@@ -719,11 +751,13 @@ def readcaves():
Entrance.objects.all().delete()
except:
pass
- # Clear the cave data issues and the caves as we are reloading
- DataIssue.objects.filter(parser="areas").delete()
- DataIssue.objects.filter(parser="caves").delete()
- DataIssue.objects.filter(parser="caves ok").delete()
- #DataIssue.objects.filter(parser="entrances").delete()
+
+ # Clear the cave data issues and the caves as we are reloading
+ DataIssue.objects.filter(parser="areas").delete()
+ DataIssue.objects.filter(parser="caves").delete()
+ DataIssue.objects.filter(parser="caves ok").delete()
+ #DataIssue.objects.filter(parser="entrances").delete()
+ #DataIssue.objects.filter(parser="xEntrances").delete()
with transaction.atomic():
area = get_area("1623")
@@ -731,11 +765,12 @@ def readcaves():
print(" - Reading Entrances from entrance descriptions xml files")
for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files
read_entrance(filename)
-
- # WHy is this needed ? Without it, we lose these DataIssues!
- ent_issues = DataIssue.objects.filter(parser="entrances")
- print(f"__ We now have {len(ent_issues)} entrance DataIssues")
+ # Why is this needed ? Without it, we lose these DataIssues!
+ ent_issues = DataIssue.objects.filter(parser="entrances")
+ print(f" _ We now have {len(ent_issues)} entrance DataIssues")
+
+ with transaction.atomic():
print(" - Reading Caves from cave descriptions xml files")
for filename in next(os.walk(CAVEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files
if filename.endswith(".html"):
diff --git a/parsers/survex.py b/parsers/survex.py
index bc1efd9..59278ed 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -2564,7 +2564,7 @@ def LoadSurvexBlocks():
DataIssue.objects.filter(parser="survexleg").delete()
DataIssue.objects.filter(parser="survexunits").delete()
DataIssue.objects.filter(parser="survex team").delete()
- DataIssue.objects.filter(parser="xEntrances").delete()
+ # DataIssue.objects.filter(parser="xEntrances").delete()
print(" - survex Data Issues flushed")
mem1 = get_process_memory()
print(f" - MEM:{mem1:7.2f} MB now ", file=sys.stderr)