import os import re import subprocess import tempfile import urllib import zipfile from pathlib import Path import django from bs4 import BeautifulSoup from django.core.exceptions import MultipleObjectsReturned, ObjectDoesNotExist from django.http import FileResponse, HttpResponse, HttpResponseNotFound, HttpResponseRedirect from django.shortcuts import redirect, render from django.template import loader from django.urls import NoReverseMatch, reverse from django.utils.safestring import mark_safe import troggle.settings as settings from troggle.core.forms import CaveForm, EntranceForm, EntranceLetterForm # CaveAndEntranceFormSet, from troggle.core.models.caves import Cave, CaveAndEntrance, Entrance, GetCaveLookup, get_cave_leniently from troggle.core.models.logbooks import QM from troggle.core.models.wallets import Wallet from troggle.core.utils import current_expo, write_and_commit from troggle.core.views import expo from troggle.parsers.caves import read_cave, read_entrance from troggle.settings import CAVEDESCRIPTIONS, ENTRANCEDESCRIPTIONS from .auth import login_required_if_public """Manages the complex procedures to assemble a cave description out of the compnoents Manages the use of cavern to parse survex files to produce 3d and pos files """ todo = """ - in getCaves() search GCavelookup first, which should raise a MultpleObjectsReturned exception if no duplicates - Learn to use Django .select_related() and .prefetch_related() to speed things up especially on the big report pages https://zerotobyte.com/how-to-use-django-select-related-and-prefetch-related/ - Remove all the URL rewriting which is there because we have not yet edited all the caves to use our new (2023) standard addressing of /16xx/NNN/NNN.html where *all* caves are assumed to have their own directory 16xx/NNN/ even if they have no images to put in it. """ # def cavepagefwd(request, karea=None, subpath=None): # """archaic, just send to the caves list page # """ # return redirect("/caves") def get_cave_from_slug(caveslug): """Needs refactoring """ caves = [] print(f"get_cave_from_slug(): {caveslug} ...") areacode = caveslug[:4] # e.g. 1623 id = caveslug[5:] # e.g. 161 or 2023-MM-02 thisarea = Cave.objects.filter(areacode=areacode) caves_k = thisarea.filter(kataster_number=id) if len(caves_k) == 1: caves.append(caves_k[0]) print(f"get_cave_from_slug(): {caves_k=} {len(caves_k)=}") caves_id = thisarea.filter(unofficial_number=id) if len(caves_id) == 1: caves.append(caves_id[0]) print(f"get_cave_from_slug(): {caves_id=} {len(caves_id)=}") if len(caves) > 1: print(f"get_cave_from_slug(): {caveslug} More than 1 \n{caves}") return None if len(caves) <1: print(f"get_cave_from_slug(): {caveslug} Nowt found..") return None cave = caves[0] print(f"get_cave_from_slug(): {caveslug} FOUND {cave}") return cave try: cave_zero = Cave.objects.get(caveslug__slug=caveslug) print(f"Getting cave from '{caveslug}'") if cave_zero != cave: print(f"get_cave_from_slug(): {caveslug} BAD DISCREPANCY {cave_zero=} {cave=}") else: print(f"get_cave_from_slug(): {caveslug} SUCCESS") return cave_zero except: return None def caveslugfwd(request, slug): """This is ass backwards. It would be better style to have the slug-identified request be the master, and have other paths redirect to it, rather than what we have here. Pending a change where we remove cave.url as a field and have an explicit fixed convention instead. """ if not slug: message = f"Failed to find cave from identifier given: {slug}." return render(request, "errors/generic.html", {"message": message}) Gcavelookup = GetCaveLookup() if slug in Gcavelookup: cave = Gcavelookup[slug] return redirect(f"/{cave.url}") def getCaves(cave_id): """Only gets called if a call to getCave() raises a MultipleObjects exception TO DO: search GCavelookup first, which should raise a MultpleObjectsReturned exception if there are duplicates""" try: caves = Cave.objects.filter(kataster_number=cave_id) caveset = set(caves) Gcavelookup = GetCaveLookup() # dictionary makes strings to Cave objects if cave_id in Gcavelookup: caveset.add(Gcavelookup[cave_id]) return list(caveset) except: return [] def getCave(cave_id): """Returns a cave object when given a cave name or number. It is used by views including cavehref, ent, wallets and qm. TO DO: search GCavelookup first, which should raise a MultpleObjectsReturned exception if there are duplicates""" try: cave = Cave.objects.get(kataster_number=cave_id) return cave except Cave.MultipleObjectsReturned as ex: raise MultipleObjectsReturned("Duplicate kataster number") from ex # propagate this up except Cave.DoesNotExist as ex: Gcavelookup = GetCaveLookup() # dictionary makes strings to Cave objects if cave_id in Gcavelookup: return Gcavelookup[cave_id] else: raise ObjectDoesNotExist("No cave found with this identifier in any id field") from ex # propagate this up except: raise ObjectDoesNotExist("No cave found with this identifier in any id field") def pad5(x): return "0" * (5 - len(x.group(0))) + x.group(0) def padnumber(x): return re.sub(r"\d+", pad5, x) def numericalcmp(x, y): return cmp(padnumber(x), padnumber(y)) def caveKey(c): """This function goes into a lexicographic sort function, and the values are strings, but we want to sort numerically on kataster number before sorting on unofficial number. """ if not c.kataster_number: return "9999." + c.unofficial_number else: try: if int(c.kataster_number) >= 100: return "99." + c.kataster_number if int(c.kataster_number) >= 10: return "9." + c.kataster_number return c.kataster_number except: return c.kataster_number + "_ERROR" def getnotablecaves(): notablecaves = [] for kataster_number in settings.NOTABLECAVES1623: try: cave = Cave.objects.get(kataster_number=kataster_number, areacode="1623") notablecaves.append(cave) except: print(" ! Notable Caves: FAILED to get only one cave per kataster_number OR invalid number for: 1623-"+kataster_number) for kataster_number in settings.NOTABLECAVES1626: try: cave = Cave.objects.get(kataster_number=kataster_number, areacode="1626") notablecaves.append(cave) except: print(" ! Notable Caves: FAILED to get only one cave per kataster_number OR invalid number for: 1626-"+kataster_number) print(notablecaves) return notablecaves def caveindex(request): """Should use Django order-by for lazy sorting, not here. But only after we have a proper slug system in place for Caves """ # allcaves = Cave.objects.all() # for c in allcaves: # if c.entrances: # pass caves1623 = list(Cave.objects.filter(areacode="1623")) caves1624 = list(Cave.objects.filter(areacode="1624")) caves1626 = list(Cave.objects.filter(areacode="1626")) caves1627 = list(Cave.objects.filter(areacode="1627")) caves1623.sort(key=caveKey) caves1624.sort(key=caveKey) caves1626.sort(key=caveKey) caves1627.sort(key=caveKey) return render( request, "caveindex.html", {"caves1623": caves1623, "caves1626": caves1626, "caves1627": caves1627, "caves1624": caves1624, "notablecaves": getnotablecaves(), "cavepage": True, "year": current_expo()}, ) def entranceindex(request): ents = Entrance.objects.all().order_by("slug") return render( request, "entranceindex.html", {"entrances": ents}, ) def cave3d(request, cave_id=""): """This is used to create a download url in templates/cave.html if anyone wants to download the .3d file The caller template tries kataster first, then unofficial_number if that kataster number does not exist but only if Cave.survex_file is non-empty """ try: cave = getCave(cave_id) except ObjectDoesNotExist: return HttpResponseNotFound except Cave.MultipleObjectsReturned: # should really produce a better error message. This is a failure of ambiguous aliases probably. caves = Cave.objects.filter(url=kpath) return render(request, "svxcaveseveral.html", {"settings": settings, "caves": caves, "year": current_expo()}) else: return file3d(request, cave, cave_id) def file3d(request, cave, cave_id): """Produces a .3d file directly for download. survex_file should be in valid path format 'caves-1623/264/264.svx' but it might be mis-entered as simply '2012-ns-10.svx' Also the cave.survex_file may well not match the cave description path: e.g. it might be to the whole system 'smk-system.svx' instead of just for the specific cave. - If the expected .3d file corresponding to cave.survex_file is present, return it. - If the cave.survex_file exists, generate the 3d file, cache it and return it - Use the cave_id to guess what the 3d file might be and, if in the cache, return it There is a problem as the filename is shown of all areacode information, so both 1624-161 and 1623-161 have a file called 161.svx and return a file called "161.3d" which may get incorrectly cached by other software (i.e your browser) """ def runcavern(survexpath): """This has not yet been properly updated with respect to putting the .3d file in the same folder as the .svx filse as done in runcavern3d() in parsers/survex.py Needs testing. """ if not survexpath.is_file(): pass try: completed_process = subprocess.run( [settings.CAVERN, "--log", f"--output={settings.SURVEX_DATA}", f"{survexpath}"] ) except OSError as ex: # propagate this to caller. raise OSError(completed_process.stdout) from ex op3d = (Path(settings.SURVEX_DATA) / Path(survexpath).name).with_suffix(".3d") op3dlog = Path(op3d.with_suffix(".log")) if not op3d.is_file(): print(f" - - Regeneration FAILED\n - - from '{survexpath}'\n - - to '{op3d}'") print(" - - Regeneration stdout: ", completed_process.stdout) print(" - - Regeneration cavern log output: ", op3dlog.read_text()) def return3d(threedpath, cave): newfilename = cave.slug() + ".3d" # add the "1623-" part of the filename effectively. if threedpath.is_file(): response = HttpResponse(content=open(threedpath, "rb"), content_type="application/3d") response["Content-Disposition"] = f"attachment; filename={newfilename}" return response else: message = f'
path: "{threedpath}"' return HttpResponseNotFound(message) survexname = Path(cave.survex_file).name # removes directories ie 1623/161/161.svx -> 161.svx survexpath = Path(settings.SURVEX_DATA, cave.survex_file) survexdir = survexpath.parent threedname = Path(survexname).with_suffix(".3d") # removes .svx, replaces with .3d AND DISCARDS PATH arrgh threedpath = survexpath.parent / threedname # These if statements need refactoring more cleanly if cave.survex_file: if threedpath.is_file(): if survexpath.is_file(): if os.path.getmtime(survexpath) > os.path.getmtime(threedpath): runcavern(survexpath) return return3d(threedpath, cave) else: if survexpath.is_file(): runcavern(survexpath) return return3d(threedpath, cave) # Get here if cave.survex_file was set but did not correspond to a valid svx file if survexpath.is_file(): # a file, but invalid format message = f'
Could not generate 3d file from "{survexpath}"' else: # we could try to guess that 'caves-1623/' is missing,... nah. message = f'
path: "{survexpath}"'
return HttpResponseNotFound(message)
def rendercave(request, cave, slug, cave_id=""):
"""Gets the data and files ready and then triggers Django to render the template.
The resulting html contains urls which are dispatched independently, e.g. the 'download' link
"""
if cave.non_public and settings.PUBLIC_SITE and not request.user.is_authenticated:
return render(request, "nonpublic.html", {"instance": cave, "cavepage": True, "cave_id": cave_id})
else:
# print(f" ! rendercave: slug:'{slug}' survex file:'{cave.survex_file}'")
try:
svx3d = Path(cave.survex_file).stem
svxstem = Path(settings.SURVEX_DATA) / Path(cave.survex_file)
# print(f" ! rendercave: slug:'{slug}' '' ++ '{svxstem}'")
except:
svx3d = ""
svxstem = ""
print(f" ! rendercave: slug:'{slug}' FAIL TO MANAGE survex file:'{cave.survex_file}'")
# NOTE the template itself loads the 3d file using javascript before it loads anything else.
# Django cannot see what this javascript is doing, so we need to ensure that the 3d file exists first.
# So only do this render if a valid .3d file exists. TO BE DONE
# see design docum in troggle/templates/cave.html
# see rendercave() in troggle/core/views/caves.py
templatefile = "cave.html"
wallets = Wallet.objects.filter(caves=cave)
if not cave_id:
cave_id = slug # cave.unofficial_number
context = {
"cave_editable": True,
"settings": settings,
"cave": cave,
"cavepage": True,
"cave_id": cave_id,
"svxstem": str(svxstem),
"svx3d": svx3d,
"wallets": wallets,
"year": current_expo()
}
# Do not catch any exceptions here: propagate up to caller
r = render(
request, templatefile, context
) # crashes here with NoReverseMatch if url not set up for 'edit_cave' in urls.py
return r
def cavepage(request, karea=None, subpath=None):
"""Displays a cave description page
accessed by kataster area number specifically
OR
accessed by cave.url specifically set in data, e.g.
"1623/000/000" <= cave-data/1623-000.html
"1623/41/115.htm" <= cave-data/1623-115.html
so we have to query the database to find the URL as we cannot rely on the url actually telling us the cave by inspection.
NOTE that old caves have ".html" (or ".htm") in the URL as they used to be actual files. But since 2006 these URLs
refer to virtual pages generated on the fly by troggle, so the".html" is confusing and redundant.
But this confused Becka so it was re-instated. Thus creating more confusion for future generations...
There are also A LOT OF URLS to e.g. /1623/161/l/rl89a.htm which are IMAGES and real html files
in cave descriptions. These need to be handled HERE too (accident of history).
"""
if not subpath or subpath=='/':
print(f"{karea=} {subpath=} ")
return render(request, "pagenotfound.html", {"path": f"{karea}/{subpath}"}, status=404)
# lack of validation for karea, 162x
# subpath has an initial /
kpath = karea + subpath
#print(f" ! cavepage:'{kpath}' kataster area:'{karea}' rest of path:'{subpath}'")
caves = Cave.objects.filter(url=kpath)
if len(caves) == 1:
cave = caves[0]
return rendercave(request, cave, cave.slug())
subpath = subpath.strip("//")
# re do all this using pathlib functions
parts = subpath.strip("/").split("/")
if len(parts) > 5:
# recursive loop. break out of it.
subparts = parts[0].split(".")
caveid = subparts[0]
slug = f"{karea}-{caveid}"
if cave:= get_cave_from_slug(slug): # walrus operator
return redirect(f"/{cave.url}")
else:
return redirect(f"/caves")
# epath = karea + subpath # e.g. 1623 /204
# return expo.expopage(request, epath)
# BUGGER the real problem is the the cave descript has embedded in it images like
# src="110/entrance.jpeg and since the cave url is now /1623/110/110.html
# the images try to load from /1623/110/110/entrance.jpeg and of course fail.
# THIS IS A HORRIBLE HACK
if len(parts) == 1:
# simple filename, no folders in path,
# either need to insert caveid OR leave as relative link as we are already "in" /1623/nn/
subparts = parts[0].split(".")
caveid = subparts[0] # e.g. 204.htm
k2path = karea +"/"+ caveid + subpath
return redirect(f"/{k2path}") # infinite loop
elif len(parts) >2:
# e.g. i/204.jpg, but that's ok as we are already "in" /1623/nn/
if parts[0] == parts[1]: # double caveid
epath = karea
for i in parts[1:]:
epath += "/" + i
#print(f"{subpath=}\n {epath=}")
return expo.expopage(request, epath)
# if either the first two parts are not /caveid/caveid/
# or the number of parts == 2,
# print(f"2 {subpath=}")
epath = karea + "/" + subpath
return expo.expopage(request, epath)
@login_required_if_public
def edit_cave(request, path="", slug=None):
"""This is the form that edits all the cave data and writes out an XML file in the :expoweb: repo folder
The format for the file being saved is in templates/dataformat/cave.xml
Warning. This uses Django deep magic in the CaveForm processing.
It saves the data into into the database and into the html file, which it then commits to git.
We basically ignore the