import datetime import json import operator import re from functools import reduce from pathlib import Path from urllib.parse import urljoin from django.conf import settings from django.db import models from django.urls import reverse from troggle.core.models.troggle import DataIssue from troggle.core.models.caves import get_cave_leniently # from troggle.core.models.survex import SurvexBlock # from troggle.core.models.troggle import DataIssue # circular import. Hmm YEAR_RANGE = (1975, 2050) def make_valid_date(walletname, date): """Take whatever garbage some fool has typed in and try to make it into a valid ISO-format date """ datestr = date.replace(".", "-").replace("/", "-") try: samedate = datetime.date.fromisoformat(datestr) return samedate except ValueError: # Could be in std euro format e.g. 14/07/2023 match = re.search(r'(\d{1,2})/(\d{1,2})/(\d{2,4})', datestr) if match: d = int(match.group(1)) m = int(match.group(2)) y = int(match.group(3)) if y<2000: y = y + 2000 try: samedate = datetime.date(y, m, d) print(f"- - Warning, not in ISO format. '{datestr=}' but we tried to cope: {samedate.isoformat()} {walletname}") return samedate except: print(f"! - Fail, tried to decompose date in dd/mm/yyyy format but failed: {datestr=} ") return None # probably a single digit day number or month number match = re.search(r'(\d{4})-(\d{1,2})-(\d{1,2})', datestr) if match: y = int(match.group(1)) m = int(match.group(2)) d = int(match.group(3)) try: samedate = datetime.date(y, m, d) print(f"- - Warning, 1 digit only for month or day '{datestr=}' but we coped: {samedate.isoformat()} ") return samedate except: print(f"! - Fail, tried to decompose date in yyyy-mm-d or yyy-m-dd format but failed: {datestr=} ") return None if datestr: # might have been None if datestr != "None": print(f"! - Failed to understand date, none of our tricks worked {datestr=} ") return None archaic_wallets = [ '1984AndysNotebook', '1984BrownLandscapeNotebook', '1989LUSS', '1989surveybook', '1990Surveybookkh', '1991surveybook', '1992-94Surveybookkh', '1994', '1995-96kh', '1996-1999NotKHbook', '1997-99kh', 'loosepages', ] class Wallet(models.Model): """We do not keep the JSON values in the database, we query them afresh each time, but we may change this if we need to do a Django query on e.g. personame ManyToMany field uses modern Django: a hidden Class, unlike CaveAndEntrances which is explict and visible. We parse all the JSON on initial reset/import but we only keep data on the Wallet objects that we need for indexing: people, caves and year. So that we can quickly make reports on e.g. all wallets for a particular cave. All the other fields in the JSON are parsed and loaded from file only dynamiclally, when a report is beging generated, but since this only happens for a subset of wallets (e.g. for a specific year) the speed penalty is fine. Indeed it might be faster overall as db operations in django are a bit slow. This other data is the stuff which generates the tick-lists. A trick to minimize the number of times we hit the file to load JSON data is to use a field on every wallet object called 'JSONdata', but this is not part of the schema and there is no corresponding field in the db. The property 'JSONdata' only lives for as long as the ephemeral python wallet object. We use this to cache the JSON data so that queries of several different things, e.g. 'name', or 'survexnotrequired', do not repeatedly re-read the JSON which has not changed. """ fpath = models.CharField(max_length=200) walletname = models.CharField(max_length=200) walletdate = models.DateField(blank=True, null=True) walletyear = models.DateField(blank=True, null=True) caves = models.ManyToManyField("Cave", related_name="wallets") persons = models.ManyToManyField("Person", related_name="wallets") class Meta: ordering = ("walletname",) @staticmethod def input_to_list(stuff): """With wallets we often have to deal with either a list object (from valid JSON parsing) or a string which may or may not also be a list, but munged by user eror on a form etc. This function returns a list, either the JSON list, or a list with a single object in it, or a list of strings. It silently absorbs empty strings an consumes odd quotes and square brackets. Always return a list, even if it is an empty list""" if type(stuff) == list: newstuff = [] for o in stuff: if o: # not an empty string, None newstuff.append(o) return newstuff if type(stuff) == str: newstuff = stuff.split(",") for s in newstuff: s = s.strip('[] ').replace("'","").replace('"','').replace("/", "-").replace(" ", "_").strip('[] ') return newstuff if stuff: return [stuff] # single object, not a string, but now in a list. return [] def get_absolute_url(self): # we do not use URL_ROOT any more. return reverse("singlewallet", kwargs={"path": re.sub("#", "%23", self.walletname)}) def get_url(self): return f"/walletedit/{self.walletname}".replace('#', ':') def get_json(self): """Read the JSON file for the wallet and do stuff Do it every time it is queried, to be sure the result is fresh.. well, no. Do it every time we have a new python instance. Reads JSON date and sets w.walletdate This repeats a lot of stuff done in the initial parsing job: traversing the file system. Needs to be refactored as special handling of subdirectories is duplicated import DataIssue locally to prevent import cycle problem""" if hasattr(self, "JSONdata"): return self.JSONdata scans_path = Path(settings.SCANS_ROOT) wurl = self.get_url() # :drawings: walletjson/2022/2022#01/contents.json # fpath = /mnt/d/EXPO/expofiles/surveyscans/1999/1999#02 fp = Path(self.fpath) if fp.name in archaic_wallets: return None if fp.parent.parent.parent.parent == scans_path: subfolder = fp.parent.name wname = fp.parent.parent.name wyear = fp.parent.parent.parent.name # print(f" - Subfolder {subfolder} two deep, detected in {wname=} {wyear=} {wurl=}") else: wname = fp.name wyear = fp.parent.name try: y = int(wyear) except: message = f"! 3 or more deep subfolder detected?:\n {fp.parent.name=}\n {fp.name=} \n {self.fpath=} {wurl=}" print(message) # this is a hack, work down from /surveyscans instead. To be fixed wname = fp.parent.parent.parent.name wyear = fp.parent.parent.parent.parent.name if len(wyear) != 4 or len(wname) !=6: # no contents.json for old-style wallets # but this ruined all the tick-list displays.. why?! # return None pass jsonfile = Path(settings.DRAWINGS_DATA, "walletjson") / wyear / wname / "contents.json" if not Path(jsonfile).is_file(): message = f"! {jsonfile} is not a file:\n {wyear=} (should be eg. '2035')\n {wname=} (should be eg. '2035#13')\n {self.fpath=}" print(message) if wname not in archaic_wallets: message = f"! {jsonfile} is not a file:\n {wyear=} (should be eg. '2023')\n {wname=} (should be eg. '2023#13')\n {self.fpath=}" print(message) from troggle.core.models.troggle import DataIssue DataIssue.objects.update_or_create(parser="wallets", message=message, url=wurl) return None else: with open(jsonfile) as json_f: try: waldata = json.load(json_f) except: message = f"! {str(self.walletname)} Failed to load {jsonfile} JSON file" print(message) DataIssue.objects.update_or_create(parser="wallets", message=message, url=wurl) return None if waldata["date"]: thisdate = make_valid_date(self.walletname, waldata["date"]) if thisdate: self.walletdate = thisdate self.save() waldata["date"] = thisdate.isoformat() else: if waldata["date"] != "" or waldata["date"] != "None": message = f"! {str(self.walletname)} Date format not ISO {waldata['date']}. Failed to load from {jsonfile} JSON file" from troggle.core.models.troggle import DataIssue DataIssue.objects.update_or_create(parser="wallets", message=message, url=wurl) self.JSONdata = waldata return waldata def check_survexlist(self): wurl = f"/walletedit/{self.walletname}".replace('#', ':') if not (waldata := self.get_json()): # WALRUS return None if waldata["survex file"]: waldata["survex file"] = Wallet.input_to_list(waldata["survex file"]) for sx in waldata["survex file"]: # this logic appears in several places, inc get_ticks(). and wallets_edit.py Refactor. if sx != "": if Path(sx).suffix.lower() != ".svx": sx = sx + ".svx" if not (Path(settings.SURVEX_DATA) / sx).is_file(): message=f"{self} Survex file {sx} was not found in LOSER repo" DataIssue.objects.update_or_create(parser="wallets", message=message, url=wurl) # check using field on the cave whether it has been renamed... def allcaves(self): """Called when parsing importing all data. Called on all new wallets, but before the survex files are parsed""" if not (jsondata := self.get_json()): # WALRUS return None #cavelist = jsondata["cave"] cavelist = Wallet.input_to_list(jsondata["cave"]) for i in cavelist: try: caveobject = get_cave_leniently(i) if caveobject: self.caves.add(caveobject) except: print(f"FAIL adding cave to wallet.caves '{i}'") pass def year(self): """This gets the year syntactically without opening and reading the JSON""" if len(self.walletname) < 5: return None if self.walletname[4] != "#": return None year = int(self.walletname[0:4]) ymin, ymax = YEAR_RANGE if year < ymin or year > ymax: return None else: self.walletyear = datetime.date(year, 1, 1) self.save() return str(year) # Yes this is horribly, horribly inefficient, esp. for a page that have date, people and cave in it def date(self): """Reads all the JSON data just to get the JSON date.""" if self.walletdate: return self.walletdate if not (jsondata := self.get_json()): # WALRUS return None datestr = jsondata["date"] if not datestr: return None else: datestr = datestr.replace(".", "-") try: samedate = datetime.date.fromisoformat(datestr) self.walletdate = samedate.isoformat() except: try: samedate = datetime.date.fromisoformat(datestr[:10]) self.walletdate = samedate.isoformat() except: samedate = None self.save() return self.walletdate def people(self): if not (jsondata := self.get_json()): # WALRUS return None return jsondata["people"] def cave(self): if not (jsondata := self.get_json()): # WALRUS return None return jsondata["cave"] def name(self): if not (jsondata := self.get_json()): # WALRUS return None return jsondata["name"] def survexfiles(self): if not (jsondata := self.get_json()): # WALRUS return None filelist = Wallet.input_to_list(jsondata["survex file"]) # print(f"'{self} {jsondata['survex file']}' => {filelist}") return filelist def get_fnames(self): '''Filenames without the suffix, i.e. without the ".jpg"''' dirpath = Path(settings.SCANS_ROOT, self.fpath) # does nowt as fpath is a rooted path already files = [] if not self.fpath: files.append(f"Incorrect path to wallet contents: '{self.fpath}'") return files if not dirpath.is_dir(): files.append(f"No uploaded scans or incorrect path to wallet contents") return files else: try: for f in dirpath.iterdir(): if f.is_file(): files.append(Path(f.name).stem) else: files.append(f"-{Path(f.name).stem}-") except FileNotFoundError: files.append("FileNotFoundError") pass return files def fixsurvextick(self, tick): blocks = self.survexblock_set.all() # blocks = SurvexBlock.objects.filter(scanswallet = self) result = tick for b in blocks: if b.survexfile: # if any exist in db, no check for validity or a real file. Refactor. result = "seagreen" # slightly different shade of green return result def get_ticks(self): """Reads all the JSON data and sets the colour of the completion tick for each condition""" ticks = {} waldata = self.get_json() if not waldata: ticks["S"] = "darkgrey" ticks["C"] = "darkgrey" ticks["Q"] = "darkgrey" ticks["N"] = "darkgrey" ticks["P"] = "darkgrey" ticks["E"] = "darkgrey" ticks["T"] = "darkgrey" ticks["W"] = "darkgrey" return ticks ticks = {} # Initially, are there any required survex files present ? # Note that we can't set the survexblock here on the wallet as that info is only available while parsing the survex file survexok = "red" ticks["S"] = "red" if waldata["survex not required"]: survexok = "green" ticks["S"] = "green" else: if waldata["survex file"]: if not type(waldata["survex file"]) == list: # a string also is a sequence type, so do it this way waldata["survex file"] = [waldata["survex file"]] ngood = 0 nbad = 0 ticks["S"] = "purple" for sx in waldata["survex file"]: # this logic appears in several places, inc uploads.py). Refactor. if sx != "": if Path(sx).suffix.lower() != ".svx": sx = sx + ".svx" if (Path(settings.SURVEX_DATA) / sx).is_file(): ngood += 1 else: nbad += 1 if nbad == 0 and ngood >= 1: # all valid ticks["S"] = "green" elif nbad >= 1 and ngood >= 1: # some valid, some invalid ticks["S"] = "orange" elif nbad >= 1 and ngood == 0: # all bad ticks["S"] = "red" elif nbad == 0 and ngood == 0: # list of blank strings ticks["S"] = "red" else: ticks["S"] = "fuchsia" # have fun working out what this means # Cave Description if waldata["description written"]: ticks["C"] = "green" else: ticks["C"] = survexok # QMs if waldata["qms written"]: ticks["Q"] = "green" else: ticks["Q"] = survexok if not self.year(): ticks["Q"] = "darkgrey" else: if int(self.year()) < 2015: ticks["Q"] = "lightgrey" if 'notes not required' not in waldata: waldata['notes not required'] = False # Notes, Plan, Elevation files = self.get_fnames() # Notes required notes_scanned = reduce(operator.or_, [f.startswith("note") for f in files], False) notes_scanned = reduce(operator.or_, [f.endswith("notes") for f in files], notes_scanned) notes_required = not (notes_scanned or waldata["notes not required"]) if notes_required: ticks["N"] = "red" else: ticks["N"] = "green" # print(f"{self.walletname} {ticks['N'].upper()} {notes_scanned=} {notes_required=} {waldata['notes not required']=}") # Plan drawing required plan_scanned = reduce(operator.or_, [f.startswith("plan") for f in files], False) plan_scanned = reduce(operator.or_, [f.endswith("plan") for f in files], plan_scanned) plan_drawing_required = not (plan_scanned or waldata["plan drawn"] or waldata["plan not required"]) if plan_drawing_required: ticks["P"] = "red" else: ticks["P"] = "green" # Elev drawing required elev_scanned = reduce(operator.or_, [f.startswith("elev") for f in files], False) elev_scanned = reduce(operator.or_, [f.endswith("elev") for f in files], elev_scanned) elev_scanned = reduce(operator.or_, [f.endswith("elevation") for f in files], elev_scanned) elev_drawing_required = not (elev_scanned or waldata["elev drawn"] or waldata["elev not required"]) if elev_drawing_required: ticks["E"] = "red" else: ticks["E"] = "green" # if electronic, don't require P or E if waldata["electronic survey"]: # ticks["N"] = "green" ticks["P"] = "green" ticks["E"] = "green" # ticks["T"] = "green" # No, this does not mean it has been 'tunneled' properly # Tunnel / Therion if elev_drawing_required or plan_drawing_required: ticks["T"] = "red" else: ticks["T"] = "green" # Website if waldata["website updated"]: ticks["W"] = "green" else: ticks["W"] = "red" return ticks def __str__(self): return "[" + str(self.walletname) + " (Wallet)]"