import datetime import difflib import os import re import socket from pathlib import Path from collections import namedtuple from django import forms from django.db import models from django.db.models import Q from django.core.exceptions import MultipleObjectsReturned, ObjectDoesNotExist from django.http import HttpResponse from django.shortcuts import render from django.views.decorators.csrf import ensure_csrf_cookie import troggle.settings as settings from troggle.core.models.logbooks import LogbookEntry from troggle.core.models.caves import Cave from troggle.core.models.survex import SurvexFile, SurvexBlock from troggle.core.models.wallets import Wallet from troggle.core.utils import only_commit from troggle.parsers.survex import parse_one_file """Everything that views survexfiles but also displays data on a cave or caves when there is ambiguity """ todo = """- survexcavesingle is not properly producing any result for Homecoming, 1626-359, 2018-dm-07 even though there are dozens of surveys. - REFACTOR the very impenetrable code for scanningsubdirectories, replace with modern python pathlib - filter out the non-public caves from display UNLESS LOGGED IN - Never actual uses the object for the survexfile, works entirely from the filepath! Make it check and validate - the primary survex file in each cave directory should be in a configuration, not buried in the code... - Save and re-parse an edited survexfile which already exists in the db, and update all its dependencies (work in progress) """ survexdatasetpath = Path(settings.SURVEX_DATA) survextemplatefile = """; *** THIS IS A TEMPLATE FILE NOT WHAT YOU MIGHT BE EXPECTING *** *** DO NOT SAVE THIS FILE WITHOUT RENAMING IT !! *** ;[Stuff in square brackets is example text to be replaced with real data, ; removing the square brackets] *begin [surveyname] ; stations linked into other surveys (or likely to) *export [1 8 12 34] ; Cave: ; Area in cave/QM: *title "" *date [2040.07.04] ; <-- CHANGE THIS DATE *team Insts [Fred Fossa] *team Notes [Brenda Badger] *team Pics [Luke Lynx] *team Tape [Albert Aadvark] *instrument [SAP #+Laser Tape/DistoX/Compass # ; Clino #] ; Calibration: [Where, readings] *ref [2040#00] ; <-- CHANGE THIS TOO ; the #number is on the clear pocket containing the original notes ; if using a tape: *calibrate tape +0.0 ; +ve if tape was too short, -ve if too long ; Centreline data *data normal from to length bearing gradient ignoreall [ 1 2 5.57 034.5 -12.8 ] ;----------- ;recorded station details (leave commented out) ;(NP=Nail Polish, LHW/RHW=Left/Right Hand Wall) ;Station Left Right Up Down Description ;[Red] nail varnish markings [;1 0.8 0 5.3 1.6 ; NP on boulder. pt 23 on foo survey ] [;2 0.3 1.2 6 1.2 ; NP '2' LHW ] [;3 1.3 0 3.4 0.2 ; Rock on floor - not refindable ] ;LRUDs arranged into passage tubes ;new *data command for each 'passage', ;repeat stations and adjust numbers as needed *data passage station left right up down ;[ 1 0.8 0 5.3 1.6 ] ;[ 2 0.3 1.2 6 1.2 ] *data passage station left right up down ;[ 1 1.3 1.5 5.3 1.6 ] ;[ 3 2.4 0 3.4 0.2 ] ;----------- ;Question Mark List ;(leave commented-out) ; The nearest-station is the name of the survey and station which are nearest to ; the QM. The resolution-station is either '-' to indicate that the QM hasn't ; been checked; or the name of the survey and station which push that QM. If a ; QM doesn't go anywhere, set the resolution-station to be the same as the ; nearest-station. Include any relevant details of how to find or push the QM in ; the textual description. ;Serial number grade(A/B/C/X) nearest-station resolution-station description ;[ QM1 A surveyname.3 - description of QM ] ;[ QM2 B surveyname.5 - description of QM ] ;------------ ;Cave description ;(leave commented-out) ;freeform text describing this section of the cave *end [surveyname] """ def get_survexfile(filename): """Gets the SurvexFile object from the survex path for the file in a robust way """ refs = SurvexFile.objects.filter(path=filename) if len(refs)==0: # new survex file, not created in db yet survexfile = False elif len(refs)==1: survexfile = SurvexFile.objects.get(path=filename) else: survexfile = refs[0] # OK this is due to a bug in the import file parsing, whoops. Now fixed ?! print("BUG - to be fixed in the survex parser - not critical..") print(f"Number of SurvexFile objects found: {len(refs)}") for s in refs: print (s.path, s.survexdirectory, s.cave) # print(type(survexfile), filename) return survexfile class SvxForm(forms.Form): """Two-pane form, upper half is the raw survex file, lower half (with green background) is the output : of running 'cavern' on the survex file, of running a 'difference', of checking that there are no square brackets left. """ dirname = forms.CharField(widget=forms.TextInput(attrs={"readonly": True})) filename = forms.CharField(widget=forms.TextInput(attrs={"readonly": True})) datetime = forms.DateTimeField(widget=forms.TextInput(attrs={"readonly": True})) outputtype = forms.CharField(widget=forms.TextInput(attrs={"readonly": True})) code = forms.CharField(widget=forms.Textarea(attrs={"cols": 140, "rows": 36})) survexfile = models.ForeignKey(SurvexFile, blank=True, null=True, on_delete=models.SET_NULL) # 1:1 ? template = False def GetDiscCode(self): fname = survexdatasetpath / (self.data["filename"] + ".svx") if not fname.is_file(): print(">>> >>> WARNING - svx file not found, showing TEMPLATE SVX", fname, flush=True) self.template = True self.survexfile = False return survextemplatefile if not self.survexfile: self.survexfile = get_survexfile(self.data["filename"]) try: fin = open(fname, "r", encoding="utf8", newline="") svxtext = fin.read() fin.close() except: # hack. Replace this with something better. fin = open(fname, "r", encoding="iso-8859-1", newline="") svxtext = fin.read() fin.close() return svxtext def DiffCode(self, rcode): code = self.GetDiscCode() difftext = difflib.unified_diff(code.splitlines(), rcode.splitlines()) difflist = [diffline.strip() for diffline in difftext if not re.match(r"\s*$", diffline)] return difflist def SaveCode(self, rcode): fname = survexdatasetpath / (self.data["filename"] + ".svx") if not fname.is_file(): if re.search(r"\[|\]", rcode): errmsg = "Error: remove all []s from the text.\nEverything inside [] are only template guidance.\n\n" errmsg += "All [] must be edited out and replaced with real data before you can save this file.\n" return errmsg mbeginend = re.search(r"(?s)\*begin\s+(\w+).*?\*end\s+(\w+)", rcode) if not mbeginend: return "Error: no begin/end block here" if mbeginend.group(1) != mbeginend.group(2): return "Error: mismatching begin/end labels" # Make this create new survex folders if needed try: fout = open(fname, "w", encoding="utf8", newline="\n") except FileNotFoundError: pth = os.path.dirname(self.data["filename"]) newpath = survexdatasetpath / pth if not os.path.exists(newpath): os.makedirs(newpath) fout = open(fname, "w", encoding="utf8", newline="\n") except PermissionError: return ( "CANNOT save this file.\nPERMISSIONS incorrectly set on server for this file. Ask a nerd to fix this." ) # javascript seems to insert CRLF on WSL1 whatever you say. So fix that: fout.write(rcode.replace("\r", "")) fout.write("\n") fout.close() if socket.gethostname() == "expo": comment = f"Online survex edit: {self.data['filename']}.svx" else: comment = f"Online survex edit: {self.data['filename']}.svx on dev machine '{socket.gethostname()}' " only_commit(fname, comment) parse_one_file(self.data["filename"]) return "SAVED and committed to git (if there were differences)" def Process(self): print(">>>>....\n....Processing\n") froox = os.fspath(survexdatasetpath / (self.data["filename"] + ".svx")) froog = os.fspath(survexdatasetpath / (self.data["filename"] + ".log")) cwd = os.getcwd() os.chdir(os.path.split(froox)[0]) os.system(settings.CAVERN + " --log " + froox) os.chdir(cwd) # Update this to use the new syntax.. # sp = subprocess.run([settings.CAVERN, "--log", f'--output={outputdir}', f'{fullpath}.svx'], # capture_output=True, check=False, text=True) # if sp.returncode != 0: # message = f' ! Error running {settings.CAVERN}: {fullpath}' # DataIssue.objects.create(parser='entrances', message=message) # print(message) # print(f'stderr:\n\n' + str(sp.stderr) + '\n\n' + str(sp.stdout) + '\n\nreturn code: ' + str(sp.returncode)) filepatherr = Path(survexdatasetpath / str(self.data["filename"] + ".err")) if filepatherr.is_file(): if filepatherr.stat().st_size == 0: filepatherr.unlink() # delete empty closure error file fin = open(froog, "r", encoding="utf8") log = fin.read() fin.close() # log = re.sub("(?s).*?(Survey contains)", "\\1", log) # this omits any ERROR MESSAGES ! Don't do it. for s in [ "Removing trailing traverses...\n\n", "Concatenating traverses...\n\n" "Simplifying network...\n\n", "Calculating network...\n\n", "Calculating traverses...\n\n", "Calculating trailing traverses...\n\n", "Calculating statistics...\n\n", ]: log = log.replace(s, "") return log @ensure_csrf_cookie def svx(request, survex_file): """Displays a single survex file in an textarea window (using a javascript online editor to enable editing) with buttons which allow SAVE, check for DIFFerences from saved, and RUN (which runs the cavern executable and displays the output below the main textarea). Requires CSRF to be set up correctly, and requires permission to write to the filesystem. Originally the non-existence of difflist was used as a marker to say that the file had been saved and that thuis there were no differences. This is inadequate, as a new file which has not been saved also has no difflist. Needs refactoring. Too many piecemeal edits and odd state dependencies. """ warning = False # get the basic data from the file given in the URL dirname = os.path.split(survex_file)[0] # replace with proper pathlib function.. dirname += "/" nowtime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") outputtype = "normal" form = SvxForm({"filename": survex_file, "dirname": dirname, "datetime": nowtime, "outputtype": outputtype}) # if the form has been returned difflist = [] logmessage = "" message = "" if request.method == "POST": # If the form has been submitted... rform = SvxForm(request.POST) # if rform.is_valid(): # All validation rules pass (how do we check it against the filename and users?) rcode = rform.cleaned_data["code"] outputtype = rform.cleaned_data["outputtype"] # used by CodeMirror ajax I think difflist = form.DiffCode(rcode) # keys = [] # for key in rform.data: # keys.append(key) # print(">>>> ", keys) sfile = form.survexfile if "revert" in rform.data: pass if "process" in rform.data: if difflist: message = "SAVE FILE FIRST" form.data["code"] = rcode elif sfile: logmessage = form.Process() if logmessage: message = f"OUTPUT FROM PROCESSING\n{logmessage}" else: message = "SAVE VALID FILE FIRST" form.data["code"] = rcode if "save" in rform.data: if request.user.is_authenticated: message = form.SaveCode(rcode) else: message = "You do not have authority to save this file. Please log in." if message != "SAVED": form.data["code"] = rcode if "diff" in rform.data: print("Differences: ") form.data["code"] = rcode # GET, also fall-through after POST-specific handling svxfile = get_survexfile(survex_file) if "code" not in form.data: form.data["code"] = form.GetDiscCode() if form.template: warning = True if not difflist: if svxfile: difflist.append("No differences from last saved file.") else: difflist.append("No differences from last saved file (or from initial template).") if message: difflist.insert(0, message) svxincludes = re.findall(r"(?i)\*include\s+(\S+)", form.data["code"] or "") # collect all the survex blocks which actually have a valid date if svxfile: try: svxblocks = svxfile.survexblock_set.filter(date__isnull=False).order_by('date') except: svxblocks = [] try: svxblocksall = svxfile.survexblock_set.all() except AttributeError: # some survexfiles just *include files and have no blocks themselves svxblocksall = [] else: svxblocks = [] svxblocksall = [] if not difflist: difflist = ["Survex file does not exist yet"] events = events_on_dates(svxblocks) vmap = { "settings": settings, "warning": warning, "has_3d": (Path(survexdatasetpath) / Path(survex_file + ".3d")).is_file(), "title": survex_file, "svxblocks": svxblocks, "svxincludes": svxincludes, "difflist": difflist, "logmessage": logmessage, "form": form, "events": events, } if outputtype == "ajax": # used by CodeMirror ajax I think return render(request, "svxfiledifflistonly.html", vmap) return render(request, "svxfile.html", vmap) SameDateEvents = namedtuple('SameDateEvents', ['trips', 'svxfiles', 'wallets', 'blocks']) def events_on_dates(svxblocks): """Returns a dictionary of indexed by date. For each date there is a named tuple of 3 lists: logbookentries, survexfiles (NB files, not blocks), and wallets. """ # deduplicate but maintain date order dates = [] for b in svxblocks: if b.date not in dates: dates.append(b.date) # print(f"- {b.date}") events = {} for date in dates: trips = LogbookEntry.objects.filter(date=date) svxfiles = SurvexFile.objects.filter(survexblock__date=date).distinct() # https://stackoverflow.com/questions/739776/how-do-i-do-an-or-filter-in-a-django-query wallets = Wallet.objects.filter(Q(survexblock__date=date) | Q(walletdate=date)).distinct() blocks = [] for b in svxblocks: if b.date == date: blocks.append(b.name) events[date] = SameDateEvents(trips=trips, svxfiles=svxfiles, wallets=wallets, blocks=blocks) # print(events) return events # The cavern running function. This is NOT where it is run inside the form! see SvxForm.Process() for that def process(survex_file): """This runs cavern only where a .3d, .log or .err file is requested.""" filepathsvx = survexdatasetpath / str(survex_file + ".svx") cwd = os.getcwd() os.chdir(os.path.split(os.fspath(survexdatasetpath / survex_file))[0]) os.system(settings.CAVERN + " --log " + str(filepathsvx)) os.chdir(cwd) # Update this to use the new syntax.. # sp = subprocess.run([settings.CAVERN, "--log", f'--output={outputdir}', f'{fullpath}.svx'], # capture_output=True, check=False, text=True) # if sp.returncode != 0: # message = f' ! Error running {settings.CAVERN}: {fullpath}' # DataIssue.objects.create(parser='entrances', message=message) # print(message) # print(f'stderr:\n\n' + str(sp.stderr) + '\n\n' + str(sp.stdout) + '\n\nreturn code: ' + str(sp.returncode)) filepatherr = Path(survexdatasetpath / str(survex_file + ".err")) if filepatherr.is_file(): if filepatherr.stat().st_size == 0: filepatherr.unlink() # delete empty closure error file def threed(request, survex_file): filepath3d = survexdatasetpath / str(survex_file + ".3d") survexdatasetpath / str(survex_file + ".log") if filepath3d.is_file(): threed = open(filepath3d, "rb") return HttpResponse(threed, content_type="application/x-aven") else: process(survex_file) # should not need to do this if it already exists, as it should. log = open(survexdatasetpath / str(survex_file + ".log"), "r", encoding="utf-8") return HttpResponse(log, content_type="text") def svxlog(request, survex_file): """Used for rendering .log files from survex outputtype""" filepathlog = survexdatasetpath / str(survex_file + ".log") if not filepathlog.is_file(): process(survex_file) log = open(filepathlog, "r") return HttpResponse(log, content_type="text/plain; charset=utf-8") # default: "text/html; charset=utf-8" def err(request, survex_file): filepatherr = survexdatasetpath / str(survex_file + ".err") if not filepatherr.is_file(): # probably not there because it was empty, but re-run anyway process(survex_file) process(survex_file) if filepatherr.is_file(): err = open(filepatherr, "r") return HttpResponse(err, content_type="text/plain; charset=utf-8") else: return HttpResponse( f"No closure errors. \nEmpty {filepatherr} file produced. \nSee the .log file.", content_type="text/plain; charset=utf-8", ) def identifycavedircontents(gcavedir): """ find the primary survex file in each cave directory this should be in a configuration, not buried in the code... For gods sake someone refactor this monstrosity using pathlib """ name = os.path.split(gcavedir)[1] subdirs = [] subsvx = [] primesvx = None for f in os.listdir(gcavedir): # These may get outdated as data gets tidied up. This should not be in the code! if name == "204" and (f in ["skel.svx", "template.svx", "204withents.svx"]): pass elif name == "136" and (f in ["136-noents.svx"]): pass elif name == "115" and (f in ["115cufix.svx", "115fix.svx"]): pass elif os.path.isdir(os.path.join(gcavedir, f)): if f[0] != ".": subdirs.append(f) elif f[-4:] == ".svx": nf = f[:-4] if ( nf.lower() == name.lower() or nf[:3] == "all" or (name, nf) in [("resurvey2005", "145-2005"), ("cucc", "cu115")] ): if primesvx: if nf[:3] == "all": # assert primesvx[:3] != "all", (name, nf, primesvx, gcavedir, subsvx) primesvx = nf else: # assert primesvx[:3] == "all", (name, nf, primesvx, gcavedir, subsvx) pass else: primesvx = nf else: subsvx.append(nf) else: pass # assert re.match(".*?(?:.3d|.log|.err|.txt|.tmp|.diff|.e?spec|~)$", f), (gcavedir, f) subsvx.sort() # assert primesvx, (gcavedir, subsvx) if primesvx: subsvx.insert(0, primesvx) return subdirs, subsvx def get_survexareapath(area): return survexdatasetpath / str("caves-" + area) # direct local non-database browsing through the svx file repositories # every time the page is viewed! Should cache this. def survexcaveslist(request): """This reads the entire list of caves in the Loser repo directory and produces a complete report. It can find caves which have not yet been properly registered in the system by Databasereset.py because someone may have uploaded the survex files without doing the rest of the integration process. It uses very impenetrable code in identifycavedircontents() """ # TO DO - filter out the non-public caves from display UNLESS LOGGED IN # This is very impenetrable code, original from Aaron Curtis I think. onefilecaves = [] multifilecaves = [] subdircaves = [] fnumlist = [] for area in ["1623", "1626", "1624", "1627"]: cavesdir = get_survexareapath(area) arealist = sorted([(area, -int(re.match(r"\d*", f).group(0) or "0"), f) for f in os.listdir(cavesdir)]) fnumlist += arealist # print(fnumlist) # go through the list and identify the contents of each cave directory for area, num, cavedir in fnumlist: # these have sub dirs /cucc/ /arge/ /old/ but that is no reason to hide them in this webpage # so these are now treated the same as 142 and 113 which also have a /cucc/ sub dir # if cavedir in ["144", "40"]: # continue # This all assumes that the first .svx file has the same name as the cave name, # which usually but not always true. e.g. caves-1623/78/allkaese.svx not caves-1623/78/78.svx # which is why we now also pass through the cavedir # Still fails for loutitohoehle etc even though this is set correctly when the pending cave is created cavesdir = get_survexareapath(area) gcavedir = os.path.join(cavesdir, cavedir) if os.path.isdir(gcavedir) and cavedir[0] != ".": subdirs, subsvx = identifycavedircontents(gcavedir) check_cave_registered( area, cavedir ) # should do this only once per database load or it will be slow survdirobj = [] for lsubsvx in subsvx: survdirobj.append(("caves-" + area + "/" + cavedir + "/" + lsubsvx, lsubsvx)) # caves with subdirectories if subdirs: subsurvdirs = [] for subdir in subdirs: dsubdirs, dsubsvx = identifycavedircontents(os.path.join(gcavedir, subdir)) # assert not dsubdirs # handle case of empty sub directory lsurvdirobj = [] for lsubsvx in dsubsvx: lsurvdirobj.append(("caves-" + area + "/" + cavedir + "/" + subdir + "/" + lsubsvx, lsubsvx)) if len(dsubsvx) >= 1: subsurvdirs.append( (subdir, lsurvdirobj[0], lsurvdirobj[0:]) ) # list now includes the first item too if survdirobj: subdircaves.append((cavedir, (survdirobj[0], survdirobj[1:]), subsurvdirs)) else: print(f" ! Subdirectory containing empty subdirectory {subdirs} in {gcavedir}") # multifile caves elif len(survdirobj) > 1: multifilecaves.append((survdirobj[0], cavedir, survdirobj[1:])) # single file caves elif len(survdirobj) == 1: onefilecaves.append(survdirobj[0]) return render( request, "svxfilecavelist.html", { "settings": settings, "onefilecaves": onefilecaves, "multifilecaves": multifilecaves, "subdircaves": subdircaves, }, ) def survexcavesingle(request, survex_cave): """parsing all the survex files of a single cave and showing that it's consistent and can find all the files and people. Should explicitly fix the kataster number thing. kataster numbers are not unique across areas. This used to be a db constraint but we need to manage this ourselves as we don't want the parser aborting with an error message. Should use getCave() from models_caves """ sc = survex_cave try: cave = Cave.objects.get(kataster_number=sc) # This may not be unique. return render(request, "svxcavesingle.html", {"settings": settings, "cave": cave}) except ObjectDoesNotExist: # can get here if the survex file is in a directory labelled with unofficial number not kataster number. # maybe - and _ mixed up, or CUCC-2017- instead of 2017-CUCC-, or CUCC2015DL01 . Let's not get carried away.. # or it might be an exact search for a specific survefile but just missing the '.svx. if (Path(survexdatasetpath) / Path(survex_cave + ".svx")).is_file(): return svx(request, survex_cave) for unoff in [sc, sc.replace("-", "_"), sc.replace("_", "-"), sc.replace("-", ""), sc.replace("_", "")]: try: cave = Cave.objects.get(unofficial_number=unoff) # return on first one we find return render(request, "svxcavesingle.html", {"settings": settings, "cave": cave}) except ObjectDoesNotExist: continue # next attempt in for loop return render(request, "errors/svxcavesingle404.html", {"settings": settings, "cave": sc}) except MultipleObjectsReturned: caves = Cave.objects.filter(kataster_number=survex_cave) return render(request, "svxcaveseveral.html", {"settings": settings, "caves": caves}) except: return render(request, "errors/svxcavesingle404.html", {"settings": settings, "cave": sc}) def check_cave_registered(area, survex_cave): """Checks whether a cave has been properly registered when it is found in the Loser repo This should really be called by databaseReset not here in a view Currently Caves are only registered if they are listed in :expoweb: settings.CAVEDESCRIPTIONS so we need to add in any more here. This function runs but does not seem to be used?! A serious bodge anyway. """ try: cave = Cave.objects.get(kataster_number=survex_cave) return str(cave) except MultipleObjectsReturned: caves = Cave.objects.filter(kataster_number=survex_cave) for c in caves: if str(c) == area + "-" + survex_cave: return str(c) # just get the first that matches return None # many returned but none in correct area except ObjectDoesNotExist: pass try: cave = Cave.objects.get(unofficial_number=survex_cave) # should be unique! if cave.kataster_number: return str(cave) else: return None except ObjectDoesNotExist: pass return None