import csv import os import re from pathlib import Path from django.conf import settings from troggle.core.models.caves import Cave from troggle.core.models.logbooks import QM from troggle.core.models.troggle import DataIssue """Reads the CSV files containg QMs for a select few caves See parsers/survex.py for the parser which extracts QMs from the survex files """ def deleteQMs(): QM.objects.all().delete() DataIssue.objects.filter(parser="QMs").delete() def parseCaveQMs(cave, inputFile, ticked=False): """Runs through the CSV file at inputFile (which is a relative path from expoweb) and saves each QM as a QM instance. This is creating and linking a Placeholder logbookentry dated 1st Jan. in the relevant year. This is pointless but it is needed because found_by is a ForeignKey in the db and we can't be arsed to fudge this properly with a null.(July 2020) Linking to a passage in a SVX file might be more interesting as the QM does sometimes have the passage name, e.g. in 204/qm.csv C2000-204-39 B Tree Pitch in Cave Tree treeumphant.28 Gosser Streamway The CSV file does not have the exact date for the QM, only the year, so links to survex files might be ambiguous. But potentially useful? Much of this code assumes that QMs are edited using troggle. This is not done so this code can be deleted. All QMs are created afresh and this is all only run once on import on a fresh database. """ if cave == "204-steinBH": try: steinBr = Cave.objects.get(official_name="Steinbrückenhöhle") caveid = steinBr except Cave.DoesNotExist: message = f" ! - {qmPath} Steinbruckenhoehle is not in the database. Please run cave parser" print(message) DataIssue.objects.create(parser="QMs", message=message) return elif cave == "234-Hauch": try: hauchHl = Cave.objects.get(official_name="Hauchhöhle") caveid = hauchHl except Cave.DoesNotExist: message = f" ! - {qmPath} Hauchhoehle is not in the database. Please run cave parser" print(message) DataIssue.objects.create(parser="QMs", message=message) return elif cave == "161-KH": try: kh = Cave.objects.get(official_name="Kaninchenhöhle") caveid = kh except Cave.DoesNotExist: message = f" ! - {qmPath} KH is not in the database. Please run cave parser" print(message) DataIssue.objects.create(parser="QMs", message=message) nqms = parse_KH_QMs(kh, inputFile=inputFile, ticked=ticked) return nqms # qmPath = settings.EXPOWEB+inputFile qmPath = Path(settings.EXPOWEB, inputFile) with open(qmPath, "r") as qmCSVContents: dialect = csv.Sniffer().sniff(qmCSVContents.read()) qmCSVContents.seek(0, 0) qmReader = csv.reader(qmCSVContents, dialect=dialect) next(qmReader) # Skip header row n = 0 nqms = 0 for line in qmReader: try: n += 1 year = int(line[0][1:5]) f"PH_{int(year)}_{int(n):02d}" QMnum = re.match(r".*?-\d*?-X?(?P\d*)", line[0]).group("numb") newQM = QM() # newQM.found_by=placeholder newQM.number = QMnum newQM.cave = caveid newQM.blockname = "" if line[1] == "Dig": newQM.grade = "D" else: newQM.grade = line[1] newQM.area = line[2] newQM.location_description = line[3] # In the table, completion is indicated by the presence of a completion discription. newQM.completion_description = line[4] newQM.nearest_station_description = line[5] if newQM.completion_description: newQM.ticked = True else: newQM.ticked = False newQM.comment = line[6] try: # year and number are unique for a cave in CSV imports preexistingQM = QM.objects.get( number=QMnum, found_by__date__year=year ) # if we don't have this one in the DB, save it if ( preexistingQM.new_since_parsing is False ): # if the pre-existing QM has not been modified, overwrite it - VERY OLD THING preexistingQM.delete() newQM.expoyear = year newQM.save() else: # otherwise, print that it was ignored print((" - preserving " + str(preexistingQM) + ", which was edited in admin \r")) except QM.DoesNotExist: # if there is no pre-existing QM, save the new one newQM.expoyear = year newQM.save() nqms += 1 except KeyError: # check on this one message = f" ! - {qmPath} KeyError {str(line)} " print(message) DataIssue.objects.create(parser="QMs", message=message) continue except IndexError: message = f" ! - {qmPath} IndexError {str(line)} " print(message) DataIssue.objects.create(parser="QMs", message=message) continue return nqms def parse_KH_QMs(kh, inputFile, ticked): """import QMs from the 1623-161 (Kaninchenhohle) html pages, different format""" with open(os.path.join(settings.EXPOWEB, inputFile), "r") as khQMfile: khQMs = khQMfile.readlines() nqms = 0 for line in khQMs: res = re.search( r"name=\"[CB](?P\d*)-(?P\d*)-(?P\d*).* (?P[ABDCV])
(?P.*)\[(?P.*)\]", line, ) if res: res = res.groupdict() year = int(res["year"]) lookupAttribs = { #'found_by':placeholder, "blockname": "", "expoyear": year, "number": res["number"], "cave": kh, "grade": res["grade"], } nonLookupAttribs = { "ticked": ticked, "nearest_station_name": res["nearest_station"], "location_description": res["description"], } # Create new. We know it doesn't exist as we deleted evrything when we started. instance = QM.objects.create(**nonLookupAttribs, **lookupAttribs) nqms += 1 return nqms def Load_QMs(): deleteQMs() n204 = parseCaveQMs(cave="204-steinBH", inputFile=r"1623/204/qm.csv") n234 = parseCaveQMs(cave="234-Hauch", inputFile=r"1623/234/qm.csv") n161 = parseCaveQMs(cave="161-KH", inputFile="1623/161/qmtodo.htm", ticked=False) t161 = parseCaveQMs(cave="161-KH", inputFile="1623/161/qmdone.htm", ticked=True) # parseCaveQMs(cave='balkonhoehle',inputFile=r"1623/264/qm.csv") print(f" - Imported: {n204} QMs for 204, {n234} QMs for 234, {t161} QMs for 161 done, {n161} QMs for 161 not done.") print()