troggle-unchained/parsers/QMs.py

import csv
import os
import re
from datetime import date

from django.conf import settings

from troggle.core.models.caves import QM, Cave, LogbookEntry
from troggle.core.models.troggle import DataIssue
from troggle.core.utils import save_carefully

"""Reads the CSV files containg QMs for a select few caves
See parsers/survex.py for the parser which extracts QMs from the survex files
"""


def deleteQMs():
    QM.objects.all().delete()
    DataIssue.objects.filter(parser="QMs").delete()


def parseCaveQMs(cave, inputFile, ticked=False):
    """Runs through the CSV file at inputFile (which is a relative path from expoweb) and
    saves each QM as a QM instance.
    This is creating and linking a Placeholder logbookentry dated 1st Jan. in the relevant
    year. This is pointless but it is needed because found_by is a ForeignKey in the db
    and we can't be arsed to fudge this properly with a null.(July 2020)

    Linking to a passage in a SVX file might be more interesting as the QM does sometimes
    have the passage name, e.g. in 204/qm.csv
    C2000-204-39	B	Tree	Pitch in Cave Tree		treeumphant.28	Gosser Streamway
    The CSV file does not have the exact date for the QM, only the year, so links to
    survex files might be ambiguous. But potentially useful?

    Much of this code assumes that QMs are edited using troggle. This is not done so this code can be deleted.
    All QMs are created afresh and this is all only run once on import on a fresh database.
    """

    if cave == "204-steinBH":
        try:
            steinBr = Cave.objects.get(official_name="Steinbr&uuml;ckenh&ouml;hle")
            caveid = steinBr
        except Cave.DoesNotExist:
            message = f" ! - {qmPath} Steinbruckenhoehle is not in the database. Please run cave parser"
            print(message)
            DataIssue.objects.create(parser="QMs", message=message)
            return
    elif cave == "234-Hauch":
        try:
            hauchHl = Cave.objects.get(official_name="Hauchh&ouml;hle")
            caveid = hauchHl
        except Cave.DoesNotExist:
            message = f" ! - {qmPath} Hauchhoehle is not in the database. Please run cave parser"
            print(message)
            DataIssue.objects.create(parser="QMs", message=message)
            return
    elif cave == "161-KH":
        try:
            kh = Cave.objects.get(official_name="Kaninchenh&ouml;hle")
            caveid = kh
        except Cave.DoesNotExist:
            message = f" ! - {qmPath} KH is not in the database. Please run cave parser"
            print(message)
            DataIssue.objects.create(parser="QMs", message=message)
        nqms = parse_KH_QMs(kh, inputFile=inputFile, ticked=ticked)
        return nqms

    # qmPath = settings.EXPOWEB+inputFile
    qmPath = os.path.join(settings.EXPOWEB, inputFile)  # why not use the pathlib stuff ?

    qmCSVContents = open(qmPath, "r")
    dialect = csv.Sniffer().sniff(qmCSVContents.read())
    qmCSVContents.seek(0, 0)
    qmReader = csv.reader(qmCSVContents, dialect=dialect)
    next(qmReader)  # Skip header row
    n = 0
    nqms = 0
    for line in qmReader:
        try:
            n += 1
            year = int(line[0][1:5])
            logslug = f"PH_{int(year)}_{int(n):02d}"
            QMnum = re.match(r".*?-\d*?-X?(?P<numb>\d*)", line[0]).group("numb")
            newQM = QM()
            # newQM.found_by=placeholder
            newQM.number = QMnum
            newQM.cave = caveid
            newQM.blockname = ""
            if line[1] == "Dig":
                newQM.grade = "D"
            else:
                newQM.grade = line[1]
            newQM.area = line[2]
            newQM.location_description = line[3]

            # In the table, completion is indicated by the presence of a completion discription.
            newQM.completion_description = line[4]
            newQM.nearest_station_description = line[5]
            if newQM.completion_description:
                newQM.ticked = True
            else:
                newQM.ticked = False

            newQM.comment = line[6]
            try:
                # year and number are unique for a cave in CSV imports
                preexistingQM = QM.objects.get(
                    number=QMnum, found_by__date__year=year
                )  # if we don't have this one in the DB, save it
                if (
                    preexistingQM.new_since_parsing == False
                ):  # if the pre-existing QM has not been modified, overwrite it - VERY OLD THING
                    preexistingQM.delete()
                    newQM.expoyear = year
                    newQM.save()
                else:  # otherwise, print that it was ignored
                    print((" - preserving " + str(preexistingQM) + ", which was edited in admin \r"))

            except QM.DoesNotExist:  # if there is no pre-existing QM, save the new one
                newQM.expoyear = year
                newQM.save()
            nqms += 1
        except KeyError:  # check on this one
            message = f" ! - {qmPath} KeyError {str(line)} "
            print(message)
            DataIssue.objects.create(parser="QMs", message=message)
            continue
        except IndexError:
            message = f" ! - {qmPath} IndexError {str(line)} "
            print(message)
            DataIssue.objects.create(parser="QMs", message=message)
            continue
    return nqms


def parse_KH_QMs(kh, inputFile, ticked):
    """import QMs from the 1623-161 (Kaninchenhohle) html pages, different format"""
    khQMs = open(os.path.join(settings.EXPOWEB, inputFile), "r")
    khQMs = khQMs.readlines()
    nqms = 0
    for line in khQMs:
        res = re.search(
            r"name=\"[CB](?P<year>\d*)-(?P<cave>\d*)-(?P<number>\d*).*</a> (?P<grade>[ABDCV])<dd>(?P<description>.*)\[(?P<nearest_station>.*)\]",
            line,
        )
        if res:
            res = res.groupdict()
            year = int(res["year"])
            # logbook placeholder code was previously here. No longer needed.
            # check if placeholder exists for given year, create it if not
            # message = " ! - "+ str(year) + " logbook: placeholder entry for '161 KH' created. DUMMY EXPEDITION ID. Should be re-attached to the actual trip."
            # placeholder, hadToCreate = LogbookEntry.objects.get_or_create(date__year=year, place="161", title="placeholder for QMs in 161", text=message, entry_type="DUMMY", expedition_id=1, defaults={"date": date((year), 1, 1),"cave_slug":str(kh)})
            # # if hadToCreate:
            # print(message)
            # DataIssue.objects.create(parser='QMs', message=message)
            lookupArgs = {
                #'found_by':placeholder,
                "blockname": "",
                "expoyear": year,
                "number": res["number"],
                "cave": kh,
                "grade": res["grade"],
            }
            nonLookupArgs = {
                "ticked": ticked,
                "nearest_station_name": res["nearest_station"],
                "location_description": res["description"],
            }
            instance, created = save_carefully(QM, lookupArgs, nonLookupArgs)
            # if created:
            # message = f" ! - {instance.code()} QM entry for '161 KH' created. ticked: {ticked}"
            # print(message)
            # DataIssue.objects.create(parser='QMs', message=message)
            nqms += 1
    return nqms


def Load_QMs():
    deleteQMs()
    n204 = parseCaveQMs(cave="204-steinBH", inputFile=r"1623/204/qm.csv")
    n234 = parseCaveQMs(cave="234-Hauch", inputFile=r"1623/234/qm.csv")
    n161 = parseCaveQMs(cave="161-KH", inputFile="1623/161/qmtodo.htm", ticked=False)
    t161 = parseCaveQMs(cave="161-KH", inputFile="1623/161/qmdone.htm", ticked=True)
    # parseCaveQMs(cave='balkonhoehle',inputFile=r"1623/264/qm.csv")
    print(f" - Imported: {n204} QMs for 204, {n234} QMs for 234, {t161} QMs for 161 done, {n161} QMs for 161 not done.")

    print()