troggle/core/models/wallets.py

import datetime
import json
import operator
import re
from functools import reduce
from pathlib import Path
from urllib.parse import urljoin

from django.conf import settings
from django.db import models
from django.urls import reverse

from troggle.core.models.troggle import DataIssue
from troggle.core.models.caves import get_cave_leniently

# from troggle.core.models.survex import SurvexBlock
# from troggle.core.models.troggle import DataIssue # circular import. Hmm

YEAR_RANGE = (1975, 2050)

def make_valid_date(walletname, date):
    """Take whatever garbage some fool has typed in and try to make it into a valid ISO-format date
    """
    datestr = date.replace(".", "-").replace("/", "-")
    try:
        samedate = datetime.date.fromisoformat(datestr)
        return samedate
    except ValueError:
        # Could be in std euro format e.g. 14/07/2023
        match = re.search(r'(\d{1,2})/(\d{1,2})/(\d{2,4})', datestr)
        if match:
            d = int(match.group(1))
            m = int(match.group(2))
            y = int(match.group(3))
            if y<2000:
                y = y + 2000
            try:
                samedate = datetime.date(y, m, d)
                print(f"- - Warning, not in ISO format. '{datestr=}' but we tried to cope: {samedate.isoformat()} {walletname}")
                return samedate
            except:
                print(f"! - Fail, tried to decompose date in dd/mm/yyyy format but failed: {datestr=}  ")
                return None
        # probably a single digit day number or month number
        match = re.search(r'(\d{4})-(\d{1,2})-(\d{1,2})', datestr)
        if match:
            y = int(match.group(1))
            m = int(match.group(2))
            d = int(match.group(3))
            try:
                samedate = datetime.date(y, m, d)
                print(f"- - Warning, 1 digit only for month or day '{datestr=}' but we coped: {samedate.isoformat()} ")
                return samedate
            except:
                print(f"! - Fail, tried to decompose date in yyyy-mm-d or yyy-m-dd format but failed: {datestr=}  ")
                return None

        if datestr: # might have been None
            if datestr != "None":
                print(f"! - Failed to understand date, none of our tricks worked {datestr=}  ")
        return None

archaic_wallets = [
    '1984AndysNotebook',
    '1984BrownLandscapeNotebook',
    '1989LUSS',
    '1989surveybook',
    '1990Surveybookkh',
    '1991surveybook',
    '1992-94Surveybookkh',
    '1994',
    '1995-96kh',
    '1996-1999NotKHbook',
    '1997-99kh',
    'loosepages',
    ]
class Wallet(models.Model):
    """We do not keep the JSON values in the database, we query them afresh each time,
    but we may change this if we need to do a Django query on e.g. personame

    ManyToMany field uses modern Django: a hidden Class, unlike CaveAndEntrances which is explict and visible.

    We parse all the JSON on initial reset/import but we only keep data on the Wallet objects that we need for
    indexing: people, caves and year. So that we can quickly make reports on e.g. all wallets for a particular cave.
    All the other fields in the JSON are parsed and loaded from file only dynamiclally, when a report is beging generated,
    but since this only happens for a subset of wallets (e.g. for a specific year) the speed penalty is fine. Indeed it
    might be faster overall as db operations in django are a bit slow.
    This other data is the stuff which generates the tick-lists.

    A trick to minimize the number of times we hit the file to load JSON data is to use a field on every wallet object
    called 'JSONdata', but this is not part of the schema and there is no corresponding field in the db. The property
    'JSONdata' only lives for as long as the ephemeral python wallet object. We use this to cache the JSON data so that
    queries of several different things, e.g. 'name', or 'survexnotrequired', do not repeatedly re-read the JSON
    which has not changed.
    """

    fpath = models.CharField(max_length=200)
    walletname = models.CharField(max_length=200)
    walletdate = models.DateField(blank=True, null=True)
    walletyear = models.DateField(blank=True, null=True)
    caves =  models.ManyToManyField("Cave", related_name="wallets")
    persons = models.ManyToManyField("Person", related_name="wallets")

    class Meta:
        ordering = ("walletname",)

    @staticmethod
    def input_to_list(stuff):
        """With wallets we often have to deal with either a list object (from valid JSON parsing)
        or a string which may or may not also be a list, but munged by user eror on a form etc.
        This function returns a list, either the JSON list, or a list with a single object in it,
        or a list of strings. It silently absorbs empty strings an consumes odd quotes and square
        brackets.
        Always return a list, even if it is an empty list"""
        if type(stuff) == list:
            newstuff = []
            for o in stuff:
                if o: # not an empty string, None
                   newstuff.append(o)
            return newstuff
        if type(stuff) == str:
            newstuff = stuff.split(",")
            for s in newstuff:
                s = s.strip('[] ').replace("'","").replace('"','').replace("/", "-").replace(" ", "_").strip('[] ')
            return newstuff
        if stuff:
            return [stuff] # single object, not a string, but now in a list.

        return []


    def get_absolute_url(self):
        # we do not use URL_ROOT any more.
        return reverse("singlewallet", kwargs={"path": re.sub("#", "%23", self.walletname)})

    def get_url(self):
        return f"/walletedit/{self.walletname}".replace('#', ':')

    def get_json(self):
        """Read the JSON file for the wallet and do stuff
        Do it every time it is queried, to be sure the result is fresh.. well, no.
        Do it every time we have a new python instance.

        Reads JSON date and sets w.walletdate

        This repeats a lot of stuff done in the initial parsing job:
            traversing the file system.
            Needs to be refactored as special handling of subdirectories is duplicated

        import DataIssue locally to prevent import cycle problem"""

        if hasattr(self, "JSONdata"):
            return self.JSONdata

        scans_path = Path(settings.SCANS_ROOT)
        wurl = self.get_url()

        # :drawings: walletjson/2022/2022#01/contents.json
        # fpath = /mnt/d/EXPO/expofiles/surveyscans/1999/1999#02
        fp = Path(self.fpath)
        if fp.name in archaic_wallets:
            return None

        if fp.parent.parent.parent.parent == scans_path:
            subfolder = fp.parent.name
            wname = fp.parent.parent.name
            wyear = fp.parent.parent.parent.name
            # print(f" - Subfolder {subfolder} two deep, detected in {wname=} {wyear=} {wurl=}")
        else:
            wname = fp.name
            wyear = fp.parent.name
        try:
            y = int(wyear)
        except:
            message = f"! 3 or more deep subfolder detected?:\n {fp.parent.name=}\n {fp.name=} \n {self.fpath=} {wurl=}"
            print(message)
            # this is a hack, work down from /surveyscans instead. To be fixed
            wname = fp.parent.parent.parent.name
            wyear = fp.parent.parent.parent.parent.name

        if len(wyear) != 4 or len(wname) !=6:
            # no contents.json for old-style wallets
            # but this ruined all the tick-list displays.. why?!
            # return None
            pass

        jsonfile = Path(settings.DRAWINGS_DATA, "walletjson") / wyear / wname / "contents.json"
        if not Path(jsonfile).is_file():
            message = f"! {jsonfile} is not a file:\n {wyear=} (should be eg. '2035')\n {wname=} (should be eg. '2035#13')\n {self.fpath=}"
            print(message)

            if wname not in archaic_wallets:
                message = f"! {jsonfile} is not a file:\n {wyear=} (should be eg. '2023')\n {wname=} (should be eg. '2023#13')\n {self.fpath=}"
                print(message)
                from troggle.core.models.troggle import DataIssue
                DataIssue.objects.update_or_create(parser="wallets", message=message, url=wurl)
            return None
        else:
            with open(jsonfile) as json_f:
                try:
                    waldata = json.load(json_f)
                except:
                    message = f"! {str(self.walletname)} Failed to load {jsonfile} JSON file"
                    print(message)
                    from troggle.core.models.troggle import DataIssue
                    DataIssue.objects.update_or_create(parser="wallets", message=message, url=wurl)
                    return None

                if waldata["date"]:
                    thisdate = make_valid_date(self.walletname, waldata["date"])
                    if thisdate:
                        self.walletdate = thisdate
                        self.save()
                        waldata["date"] = thisdate.isoformat()
                    else:
                        if waldata["date"] != "" or waldata["date"] != "None":
                            message = f"! {str(self.walletname)} Date format not ISO {waldata['date']}. Failed to load from {jsonfile} JSON file"
                            from troggle.core.models.troggle import DataIssue
                            DataIssue.objects.update_or_create(parser="wallets", message=message, url=wurl)

        self.JSONdata = waldata
        return waldata

    def check_survexlist(self):
        wurl = f"/walletedit/{self.walletname}".replace('#', ':')
        if not (waldata := self.get_json()): # WALRUS
            return None
        if waldata["survex file"]:
            waldata["survex file"] = Wallet.input_to_list(waldata["survex file"])
            for sx in waldata["survex file"]:
                # this logic appears in several places, inc get_ticks(). and wallets_edit.py Refactor.
                if sx != "":
                    if Path(sx).suffix.lower() != ".svx":
                        sx = sx + ".svx"
                    if not (Path(settings.SURVEX_DATA) / sx).is_file():
                        message=f"{self} Survex file {sx} was not found in LOSER repo"
                        from troggle.core.models.troggle import DataIssue
                        DataIssue.objects.update_or_create(parser="wallets", message=message, url=wurl)
                        # check using <kataster> field on the cave whether it has been renamed...

    def allcaves(self):
        """Called when parsing importing all data. Called on all new wallets, but before
        the survex files are parsed"""
        if not (jsondata := self.get_json()): # WALRUS
             return None
        #cavelist = jsondata["cave"]
        cavelist = Wallet.input_to_list(jsondata["cave"])
        for i in cavelist:
            try:
                caveobject = get_cave_leniently(i)
                if caveobject:
                    self.caves.add(caveobject)
            except:
                print(f"FAIL adding cave to wallet.caves  '{i}'")
                pass

    def year(self):
        """This gets the year syntactically without opening and reading the JSON"""
        if len(self.walletname) < 5:
            return None
        if self.walletname[4] != "#":
            return None
        year = int(self.walletname[0:4])
        ymin, ymax = YEAR_RANGE
        if year < ymin or year > ymax:
            return None
        else:
            self.walletyear = datetime.date(year, 1, 1)
            self.save()
            return str(year)

    # Yes this is horribly, horribly inefficient, esp. for a page that have date, people and cave in it
    def date(self):
        """Reads all the JSON data just to get the JSON date."""
        if self.walletdate:
            return self.walletdate
        if not (jsondata := self.get_json()): # WALRUS
            return None

        datestr = jsondata["date"]
        if not datestr:
            return None
        else:
            datestr = datestr.replace(".", "-")
            try:
                samedate = datetime.date.fromisoformat(datestr)
                self.walletdate = samedate.isoformat()
            except:
                try:
                    samedate = datetime.date.fromisoformat(datestr[:10])
                    self.walletdate = samedate.isoformat()
                except:
                    samedate = None
            self.save()
            return self.walletdate

    def people(self):
        if not (jsondata := self.get_json()): # WALRUS
            return None
        return jsondata["people"]

    def cave(self):
        if not (jsondata := self.get_json()): # WALRUS
            return None
        return jsondata["cave"]

    def name(self):
        if not (jsondata := self.get_json()): # WALRUS
            return None
        return jsondata["name"]


    def survexfiles(self):
        if not (jsondata := self.get_json()): # WALRUS
            return None

        filelist = Wallet.input_to_list(jsondata["survex file"])
        # print(f"'{self} {jsondata['survex file']}' => {filelist}")
        return filelist

    def get_fnames(self):
        '''Filenames without the suffix, i.e. without the ".jpg"'''
        dirpath = Path(settings.SCANS_ROOT, self.fpath)  # does nowt as fpath is a rooted path already
        files = []
        if not self.fpath:
            files.append(f"Incorrect path to wallet contents: '{self.fpath}'")
            return files
        if not dirpath.is_dir():
            files.append(f"No uploaded scans or incorrect path to wallet contents")
            return files
        else:
            try:
                for f in dirpath.iterdir():
                    if f.is_file():
                        files.append(Path(f.name).stem)
                    else:
                        files.append(f"-{Path(f.name).stem}-")
            except FileNotFoundError:
                files.append("FileNotFoundError")
                pass
        return files

    def fixsurvextick(self, tick):
        blocks = self.survexblock_set.all()
        # blocks = SurvexBlock.objects.filter(scanswallet = self)
        result = tick
        for b in blocks:
            if b.survexfile:  # if any exist in db, no check for validity or a real file. Refactor.
                result = "seagreen"  #  slightly different shade of green
        return result

    def get_ticks(self):
        """Reads all the JSON data and sets the colour of the completion tick for each condition"""
        ticks = {}

        waldata = self.get_json()
        if not waldata:
            ticks["S"] = "darkgrey"
            ticks["C"] = "darkgrey"
            ticks["Q"] = "darkgrey"
            ticks["N"] = "darkgrey"
            ticks["P"] = "darkgrey"
            ticks["E"] = "darkgrey"
            ticks["T"] = "darkgrey"
            ticks["W"] = "darkgrey"
            return ticks
        ticks = {}

        # Initially, are there any required survex files present ?
        # Note that we can't set the survexblock here on the wallet as that info is only available while parsing the survex file
        survexok = "red"
        ticks["S"] = "red"
        if waldata["survex not required"]:
            survexok = "green"
            ticks["S"] = "green"
        else:
            if waldata["survex file"]:
                if not type(waldata["survex file"]) == list:  # a string also is a sequence type, so do it this way
                    waldata["survex file"] = [waldata["survex file"]]
                ngood = 0
                nbad = 0
                ticks["S"] = "purple"
                for sx in waldata["survex file"]:
                    # this logic appears in several places, inc uploads.py). Refactor.
                    if sx != "":
                        if Path(sx).suffix.lower() != ".svx":
                            sx = sx + ".svx"
                        if (Path(settings.SURVEX_DATA) / sx).is_file():
                            ngood += 1
                        else:
                            nbad += 1
                if nbad == 0 and ngood >= 1: # all valid
                    ticks["S"] = "green"
                elif nbad >= 1 and ngood >= 1: # some valid, some invalid
                    ticks["S"] = "orange"
                elif nbad >= 1 and ngood == 0: # all bad
                    ticks["S"] = "red"
                elif nbad == 0 and ngood == 0: # list of blank strings
                    ticks["S"] = "red"
                else:
                    ticks["S"] = "fuchsia" # have fun working out what this means

        # Cave Description
        if waldata["description written"]:
            ticks["C"] = "green"
        else:
            ticks["C"] = survexok
        # QMs
        if waldata["qms written"]:
            ticks["Q"] = "green"
        else:
            ticks["Q"] = survexok
        if not self.year():
            ticks["Q"] = "darkgrey"
        else:
            if int(self.year()) < 2015:
                ticks["Q"] = "lightgrey"

        if 'notes not required' not in waldata:
            waldata['notes not required'] = False


        # Notes, Plan, Elevation
        files = self.get_fnames()

        # Notes required
        notes_scanned = reduce(operator.or_, [f.startswith("note") for f in files], False)
        notes_scanned = reduce(operator.or_, [f.endswith("notes") for f in files], notes_scanned)
        notes_required = not (notes_scanned or waldata["notes not required"])
        if notes_required:
            ticks["N"] = "red"
        else:
            ticks["N"] = "green"
        # print(f"{self.walletname} {ticks['N'].upper()} {notes_scanned=} {notes_required=} {waldata['notes not required']=}")

        # Plan drawing required
        plan_scanned = reduce(operator.or_, [f.startswith("plan") for f in files], False)
        plan_scanned = reduce(operator.or_, [f.endswith("plan") for f in files], plan_scanned)
        plan_drawing_required = not (plan_scanned or waldata["plan drawn"] or waldata["plan not required"])
        if plan_drawing_required:
            ticks["P"] = "red"
        else:
            ticks["P"] = "green"

        # Elev drawing required
        elev_scanned = reduce(operator.or_, [f.startswith("elev") for f in files], False)
        elev_scanned = reduce(operator.or_, [f.endswith("elev") for f in files], elev_scanned)
        elev_scanned = reduce(operator.or_, [f.endswith("elevation") for f in files], elev_scanned)
        elev_drawing_required = not (elev_scanned or waldata["elev drawn"] or waldata["elev not required"])
        if elev_drawing_required:
            ticks["E"] = "red"
        else:
            ticks["E"] = "green"

        # if electronic, don't require P or E
        if waldata["electronic survey"]:
            # ticks["N"] = "green"
            ticks["P"] = "green"
            ticks["E"] = "green"
            # ticks["T"] = "green" # No, this does not mean it has been 'tunneled' properly

        # Tunnel / Therion
        if elev_drawing_required or plan_drawing_required:
            ticks["T"] = "red"
        else:
            ticks["T"] = "green"

        # Website
        if waldata["website updated"]:
            ticks["W"] = "green"
        else:
            ticks["W"] = "red"


        return ticks

    def __str__(self):
        return "[" + str(self.walletname) + " (Wallet)]"