2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2024-11-22 07:11:52 +00:00
troggle/core/models/wallets.py

479 lines
19 KiB
Python

import datetime
import json
import operator
import re
from functools import reduce
from pathlib import Path
from urllib.parse import urljoin
from django.conf import settings
from django.db import models
from django.urls import reverse
from troggle.core.models.troggle import DataIssue
from troggle.core.models.caves import get_cave_leniently
# from troggle.core.models.survex import SurvexBlock
# from troggle.core.models.troggle import DataIssue # circular import. Hmm
YEAR_RANGE = (1975, 2050)
def make_valid_date(walletname, date):
"""Take whatever garbage some fool has typed in and try to make it into a valid ISO-format date
"""
datestr = date.replace(".", "-").replace("/", "-")
try:
samedate = datetime.date.fromisoformat(datestr)
return samedate
except ValueError:
# Could be in std euro format e.g. 14/07/2023
match = re.search(r'(\d{1,2})/(\d{1,2})/(\d{2,4})', datestr)
if match:
d = int(match.group(1))
m = int(match.group(2))
y = int(match.group(3))
if y<2000:
y = y + 2000
try:
samedate = datetime.date(y, m, d)
print(f"- - Warning, not in ISO format. '{datestr=}' but we tried to cope: {samedate.isoformat()} {walletname}")
return samedate
except:
print(f"! - Fail, tried to decompose date in dd/mm/yyyy format but failed: {datestr=} ")
return None
# probably a single digit day number or month number
match = re.search(r'(\d{4})-(\d{1,2})-(\d{1,2})', datestr)
if match:
y = int(match.group(1))
m = int(match.group(2))
d = int(match.group(3))
try:
samedate = datetime.date(y, m, d)
print(f"- - Warning, 1 digit only for month or day '{datestr=}' but we coped: {samedate.isoformat()} ")
return samedate
except:
print(f"! - Fail, tried to decompose date in yyyy-mm-d or yyy-m-dd format but failed: {datestr=} ")
return None
if datestr: # might have been None
if datestr != "None":
print(f"! - Failed to understand date, none of our tricks worked {datestr=} ")
return None
archaic_wallets = [
'1984AndysNotebook',
'1984BrownLandscapeNotebook',
'1989LUSS',
'1989surveybook',
'1990Surveybookkh',
'1991surveybook',
'1992-94Surveybookkh',
'1994',
'1995-96kh',
'1996-1999NotKHbook',
'1997-99kh',
'loosepages',
]
class Wallet(models.Model):
"""We do not keep the JSON values in the database, we query them afresh each time,
but we may change this if we need to do a Django query on e.g. personame
ManyToMany field uses modern Django: a hidden Class, unlike CaveAndEntrances which is explict and visible.
We parse all the JSON on initial reset/import but we only keep data on the Wallet objects that we need for
indexing: people, caves and year. So that we can quickly make reports on e.g. all wallets for a particular cave.
All the other fields in the JSON are parsed and loaded from file only dynamiclally, when a report is beging generated,
but since this only happens for a subset of wallets (e.g. for a specific year) the speed penalty is fine. Indeed it
might be faster overall as db operations in django are a bit slow.
This other data is the stuff which generates the tick-lists.
A trick to minimize the number of times we hit the file to load JSON data is to use a field on every wallet object
called 'JSONdata', but this is not part of the schema and there is no corresponding field in the db. The property
'JSONdata' only lives for as long as the ephemeral python wallet object. We use this to cache the JSON data so that
queries of several different things, e.g. 'name', or 'survexnotrequired', do not repeatedly re-read the JSON
which has not changed.
"""
fpath = models.CharField(max_length=200)
walletname = models.CharField(max_length=200)
walletdate = models.DateField(blank=True, null=True)
walletyear = models.DateField(blank=True, null=True)
caves = models.ManyToManyField("Cave", related_name="wallets")
persons = models.ManyToManyField("Person", related_name="wallets")
class Meta:
ordering = ("walletname",)
@staticmethod
def input_to_list(stuff):
"""With wallets we often have to deal with either a list object (from valid JSON parsing)
or a string which may or may not also be a list, but munged by user eror on a form etc.
This function returns a list, either the JSON list, or a list with a single object in it,
or a list of strings. It silently absorbs empty strings an consumes odd quotes and square
brackets.
Always return a list, even if it is an empty list"""
if type(stuff) == list:
newstuff = []
for o in stuff:
if o: # not an empty string, None
newstuff.append(o)
return newstuff
if type(stuff) == str:
newstuff = stuff.split(",")
for s in newstuff:
s = s.strip('[] ').replace("'","").replace('"','').replace("/", "-").replace(" ", "_").strip('[] ')
return newstuff
if stuff:
return [stuff] # single object, not a string, but now in a list.
return []
def get_absolute_url(self):
# we do not use URL_ROOT any more.
return reverse("singlewallet", kwargs={"path": re.sub("#", "%23", self.walletname)})
def get_url(self):
return f"/walletedit/{self.walletname}".replace('#', ':')
def get_json(self):
"""Read the JSON file for the wallet and do stuff
Do it every time it is queried, to be sure the result is fresh.. well, no.
Do it every time we have a new python instance.
Reads JSON date and sets w.walletdate
This repeats a lot of stuff done in the initial parsing job:
traversing the file system.
Needs to be refactored as special handling of subdirectories is duplicated
import DataIssue locally to prevent import cycle problem"""
if hasattr(self, "JSONdata"):
return self.JSONdata
scans_path = Path(settings.SCANS_ROOT)
wurl = self.get_url()
# :drawings: walletjson/2022/2022#01/contents.json
# fpath = /mnt/d/EXPO/expofiles/surveyscans/1999/1999#02
fp = Path(self.fpath)
if fp.name in archaic_wallets:
return None
if fp.parent.parent.parent.parent == scans_path:
subfolder = fp.parent.name
wname = fp.parent.parent.name
wyear = fp.parent.parent.parent.name
# print(f" - Subfolder {subfolder} two deep, detected in {wname=} {wyear=} {wurl=}")
else:
wname = fp.name
wyear = fp.parent.name
try:
y = int(wyear)
except:
message = f"! 3 or more deep subfolder detected?:\n {fp.parent.name=}\n {fp.name=} \n {self.fpath=} {wurl=}"
print(message)
# this is a hack, work down from /surveyscans instead. To be fixed
wname = fp.parent.parent.parent.name
wyear = fp.parent.parent.parent.parent.name
if len(wyear) != 4 or len(wname) !=6:
# no contents.json for old-style wallets
# but this ruined all the tick-list displays.. why?!
# return None
pass
jsonfile = Path(settings.DRAWINGS_DATA, "walletjson") / wyear / wname / "contents.json"
if not Path(jsonfile).is_file():
message = f"! {jsonfile} is not a file:\n {wyear=} (should be eg. '2035')\n {wname=} (should be eg. '2035#13')\n {self.fpath=}"
print(message)
if wname not in archaic_wallets:
message = f"! {jsonfile} is not a file:\n {wyear=} (should be eg. '2023')\n {wname=} (should be eg. '2023#13')\n {self.fpath=}"
print(message)
from troggle.core.models.troggle import DataIssue
DataIssue.objects.update_or_create(parser="wallets", message=message, url=wurl)
return None
else:
with open(jsonfile) as json_f:
try:
waldata = json.load(json_f)
except:
message = f"! {str(self.walletname)} Failed to load {jsonfile} JSON file"
print(message)
from troggle.core.models.troggle import DataIssue
DataIssue.objects.update_or_create(parser="wallets", message=message, url=wurl)
return None
if waldata["date"]:
thisdate = make_valid_date(self.walletname, waldata["date"])
if thisdate:
self.walletdate = thisdate
self.save()
waldata["date"] = thisdate.isoformat()
else:
if waldata["date"] != "" or waldata["date"] != "None":
message = f"! {str(self.walletname)} Date format not ISO {waldata['date']}. Failed to load from {jsonfile} JSON file"
from troggle.core.models.troggle import DataIssue
DataIssue.objects.update_or_create(parser="wallets", message=message, url=wurl)
self.JSONdata = waldata
return waldata
def check_survexlist(self):
wurl = f"/walletedit/{self.walletname}".replace('#', ':')
if not (waldata := self.get_json()): # WALRUS
return None
if waldata["survex file"]:
waldata["survex file"] = Wallet.input_to_list(waldata["survex file"])
for sx in waldata["survex file"]:
# this logic appears in several places, inc get_ticks(). and wallets_edit.py Refactor.
if sx != "":
if Path(sx).suffix.lower() != ".svx":
sx = sx + ".svx"
if not (Path(settings.SURVEX_DATA) / sx).is_file():
message=f"{self} Survex file {sx} was not found in LOSER repo"
from troggle.core.models.troggle import DataIssue
DataIssue.objects.update_or_create(parser="wallets", message=message, url=wurl)
# check using <kataster> field on the cave whether it has been renamed...
def allcaves(self):
"""Called when parsing importing all data. Called on all new wallets, but before
the survex files are parsed"""
if not (jsondata := self.get_json()): # WALRUS
return None
#cavelist = jsondata["cave"]
cavelist = Wallet.input_to_list(jsondata["cave"])
for i in cavelist:
try:
caveobject = get_cave_leniently(i)
if caveobject:
self.caves.add(caveobject)
except:
print(f"FAIL adding cave to wallet.caves '{i}'")
pass
def year(self):
"""This gets the year syntactically without opening and reading the JSON"""
if len(self.walletname) < 5:
return None
if self.walletname[4] != "#":
return None
year = int(self.walletname[0:4])
ymin, ymax = YEAR_RANGE
if year < ymin or year > ymax:
return None
else:
self.walletyear = datetime.date(year, 1, 1)
self.save()
return str(year)
# Yes this is horribly, horribly inefficient, esp. for a page that have date, people and cave in it
def date(self):
"""Reads all the JSON data just to get the JSON date."""
if self.walletdate:
return self.walletdate
if not (jsondata := self.get_json()): # WALRUS
return None
datestr = jsondata["date"]
if not datestr:
return None
else:
datestr = datestr.replace(".", "-")
try:
samedate = datetime.date.fromisoformat(datestr)
self.walletdate = samedate.isoformat()
except:
try:
samedate = datetime.date.fromisoformat(datestr[:10])
self.walletdate = samedate.isoformat()
except:
samedate = None
self.save()
return self.walletdate
def people(self):
if not (jsondata := self.get_json()): # WALRUS
return None
return jsondata["people"]
def cave(self):
if not (jsondata := self.get_json()): # WALRUS
return None
return jsondata["cave"]
def name(self):
if not (jsondata := self.get_json()): # WALRUS
return None
return jsondata["name"]
def survexfiles(self):
if not (jsondata := self.get_json()): # WALRUS
return None
filelist = Wallet.input_to_list(jsondata["survex file"])
# print(f"'{self} {jsondata['survex file']}' => {filelist}")
return filelist
def get_fnames(self):
'''Filenames without the suffix, i.e. without the ".jpg"'''
dirpath = Path(settings.SCANS_ROOT, self.fpath) # does nowt as fpath is a rooted path already
files = []
if not self.fpath:
files.append(f"Incorrect path to wallet contents: '{self.fpath}'")
return files
if not dirpath.is_dir():
files.append(f"No uploaded scans or incorrect path to wallet contents")
return files
else:
try:
for f in dirpath.iterdir():
if f.is_file():
files.append(Path(f.name).stem)
else:
files.append(f"-{Path(f.name).stem}-")
except FileNotFoundError:
files.append("FileNotFoundError")
pass
return files
def fixsurvextick(self, tick):
blocks = self.survexblock_set.all()
# blocks = SurvexBlock.objects.filter(scanswallet = self)
result = tick
for b in blocks:
if b.survexfile: # if any exist in db, no check for validity or a real file. Refactor.
result = "seagreen" # slightly different shade of green
return result
def get_ticks(self):
"""Reads all the JSON data and sets the colour of the completion tick for each condition"""
ticks = {}
waldata = self.get_json()
if not waldata:
ticks["S"] = "darkgrey"
ticks["C"] = "darkgrey"
ticks["Q"] = "darkgrey"
ticks["N"] = "darkgrey"
ticks["P"] = "darkgrey"
ticks["E"] = "darkgrey"
ticks["T"] = "darkgrey"
ticks["W"] = "darkgrey"
return ticks
ticks = {}
# Initially, are there any required survex files present ?
# Note that we can't set the survexblock here on the wallet as that info is only available while parsing the survex file
survexok = "red"
ticks["S"] = "red"
if waldata["survex not required"]:
survexok = "green"
ticks["S"] = "green"
else:
if waldata["survex file"]:
if not type(waldata["survex file"]) == list: # a string also is a sequence type, so do it this way
waldata["survex file"] = [waldata["survex file"]]
ngood = 0
nbad = 0
ticks["S"] = "purple"
for sx in waldata["survex file"]:
# this logic appears in several places, inc uploads.py). Refactor.
if sx != "":
if Path(sx).suffix.lower() != ".svx":
sx = sx + ".svx"
if (Path(settings.SURVEX_DATA) / sx).is_file():
ngood += 1
else:
nbad += 1
if nbad == 0 and ngood >= 1: # all valid
ticks["S"] = "green"
elif nbad >= 1 and ngood >= 1: # some valid, some invalid
ticks["S"] = "orange"
elif nbad >= 1 and ngood == 0: # all bad
ticks["S"] = "red"
elif nbad == 0 and ngood == 0: # list of blank strings
ticks["S"] = "red"
else:
ticks["S"] = "fuchsia" # have fun working out what this means
# Cave Description
if waldata["description written"]:
ticks["C"] = "green"
else:
ticks["C"] = survexok
# QMs
if waldata["qms written"]:
ticks["Q"] = "green"
else:
ticks["Q"] = survexok
if not self.year():
ticks["Q"] = "darkgrey"
else:
if int(self.year()) < 2015:
ticks["Q"] = "lightgrey"
if 'notes not required' not in waldata:
waldata['notes not required'] = False
# Notes, Plan, Elevation
files = self.get_fnames()
# Notes required
notes_scanned = reduce(operator.or_, [f.startswith("note") for f in files], False)
notes_scanned = reduce(operator.or_, [f.endswith("notes") for f in files], notes_scanned)
notes_required = not (notes_scanned or waldata["notes not required"])
if notes_required:
ticks["N"] = "red"
else:
ticks["N"] = "green"
# print(f"{self.walletname} {ticks['N'].upper()} {notes_scanned=} {notes_required=} {waldata['notes not required']=}")
# Plan drawing required
plan_scanned = reduce(operator.or_, [f.startswith("plan") for f in files], False)
plan_scanned = reduce(operator.or_, [f.endswith("plan") for f in files], plan_scanned)
plan_drawing_required = not (plan_scanned or waldata["plan drawn"] or waldata["plan not required"])
if plan_drawing_required:
ticks["P"] = "red"
else:
ticks["P"] = "green"
# Elev drawing required
elev_scanned = reduce(operator.or_, [f.startswith("elev") for f in files], False)
elev_scanned = reduce(operator.or_, [f.endswith("elev") for f in files], elev_scanned)
elev_scanned = reduce(operator.or_, [f.endswith("elevation") for f in files], elev_scanned)
elev_drawing_required = not (elev_scanned or waldata["elev drawn"] or waldata["elev not required"])
if elev_drawing_required:
ticks["E"] = "red"
else:
ticks["E"] = "green"
# if electronic, don't require P or E
if waldata["electronic survey"]:
# ticks["N"] = "green"
ticks["P"] = "green"
ticks["E"] = "green"
# ticks["T"] = "green" # No, this does not mean it has been 'tunneled' properly
# Tunnel / Therion
if elev_drawing_required or plan_drawing_required:
ticks["T"] = "red"
else:
ticks["T"] = "green"
# Website
if waldata["website updated"]:
ticks["W"] = "green"
else:
ticks["W"] = "red"
return ticks
def __str__(self):
return "[" + str(self.walletname) + " (Wallet)]"