From 2704fc42d4912b361481a19f525e9d1c508dd4b7 Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Sat, 28 Jan 2023 14:04:32 +0000 Subject: [PATCH] faster db creation, safer file reading with 'with' --- parsers/QMs.py | 187 ++++++++++++++++++++++++------------------------- 1 file changed, 90 insertions(+), 97 deletions(-) diff --git a/parsers/QMs.py b/parsers/QMs.py index da3d6c7..9fa5bc7 100644 --- a/parsers/QMs.py +++ b/parsers/QMs.py @@ -1,6 +1,7 @@ import csv import os import re +from pathlib import Path from django.conf import settings @@ -65,112 +66,104 @@ def parseCaveQMs(cave, inputFile, ticked=False): return nqms # qmPath = settings.EXPOWEB+inputFile - qmPath = os.path.join(settings.EXPOWEB, inputFile) # why not use the pathlib stuff ? + qmPath = Path(settings.EXPOWEB, inputFile) - qmCSVContents = open(qmPath, "r") - dialect = csv.Sniffer().sniff(qmCSVContents.read()) - qmCSVContents.seek(0, 0) - qmReader = csv.reader(qmCSVContents, dialect=dialect) - next(qmReader) # Skip header row - n = 0 - nqms = 0 - for line in qmReader: - try: - n += 1 - year = int(line[0][1:5]) - f"PH_{int(year)}_{int(n):02d}" - QMnum = re.match(r".*?-\d*?-X?(?P\d*)", line[0]).group("numb") - newQM = QM() - # newQM.found_by=placeholder - newQM.number = QMnum - newQM.cave = caveid - newQM.blockname = "" - if line[1] == "Dig": - newQM.grade = "D" - else: - newQM.grade = line[1] - newQM.area = line[2] - newQM.location_description = line[3] - - # In the table, completion is indicated by the presence of a completion discription. - newQM.completion_description = line[4] - newQM.nearest_station_description = line[5] - if newQM.completion_description: - newQM.ticked = True - else: - newQM.ticked = False - - newQM.comment = line[6] + with open(qmPath, "r") as qmCSVContents: + dialect = csv.Sniffer().sniff(qmCSVContents.read()) + qmCSVContents.seek(0, 0) + qmReader = csv.reader(qmCSVContents, dialect=dialect) + next(qmReader) # Skip header row + n = 0 + nqms = 0 + for line in qmReader: try: - # year and number are unique for a cave in CSV imports - preexistingQM = QM.objects.get( - number=QMnum, found_by__date__year=year - ) # if we don't have this one in the DB, save it - if ( - preexistingQM.new_since_parsing is False - ): # if the pre-existing QM has not been modified, overwrite it - VERY OLD THING - preexistingQM.delete() + n += 1 + year = int(line[0][1:5]) + f"PH_{int(year)}_{int(n):02d}" + QMnum = re.match(r".*?-\d*?-X?(?P\d*)", line[0]).group("numb") + newQM = QM() + # newQM.found_by=placeholder + newQM.number = QMnum + newQM.cave = caveid + newQM.blockname = "" + if line[1] == "Dig": + newQM.grade = "D" + else: + newQM.grade = line[1] + newQM.area = line[2] + newQM.location_description = line[3] + + # In the table, completion is indicated by the presence of a completion discription. + newQM.completion_description = line[4] + newQM.nearest_station_description = line[5] + if newQM.completion_description: + newQM.ticked = True + else: + newQM.ticked = False + + newQM.comment = line[6] + try: + # year and number are unique for a cave in CSV imports + preexistingQM = QM.objects.get( + number=QMnum, found_by__date__year=year + ) # if we don't have this one in the DB, save it + if ( + preexistingQM.new_since_parsing is False + ): # if the pre-existing QM has not been modified, overwrite it - VERY OLD THING + preexistingQM.delete() + newQM.expoyear = year + newQM.save() + else: # otherwise, print that it was ignored + print((" - preserving " + str(preexistingQM) + ", which was edited in admin \r")) + + except QM.DoesNotExist: # if there is no pre-existing QM, save the new one newQM.expoyear = year newQM.save() - else: # otherwise, print that it was ignored - print((" - preserving " + str(preexistingQM) + ", which was edited in admin \r")) - - except QM.DoesNotExist: # if there is no pre-existing QM, save the new one - newQM.expoyear = year - newQM.save() - nqms += 1 - except KeyError: # check on this one - message = f" ! - {qmPath} KeyError {str(line)} " - print(message) - DataIssue.objects.create(parser="QMs", message=message) - continue - except IndexError: - message = f" ! - {qmPath} IndexError {str(line)} " - print(message) - DataIssue.objects.create(parser="QMs", message=message) - continue + nqms += 1 + except KeyError: # check on this one + message = f" ! - {qmPath} KeyError {str(line)} " + print(message) + DataIssue.objects.create(parser="QMs", message=message) + continue + except IndexError: + message = f" ! - {qmPath} IndexError {str(line)} " + print(message) + DataIssue.objects.create(parser="QMs", message=message) + continue return nqms def parse_KH_QMs(kh, inputFile, ticked): """import QMs from the 1623-161 (Kaninchenhohle) html pages, different format""" - khQMs = open(os.path.join(settings.EXPOWEB, inputFile), "r") - khQMs = khQMs.readlines() - nqms = 0 - for line in khQMs: - res = re.search( - r"name=\"[CB](?P\d*)-(?P\d*)-(?P\d*).* (?P[ABDCV])
(?P.*)\[(?P.*)\]", - line, - ) - if res: - res = res.groupdict() - year = int(res["year"]) - # logbook placeholder code was previously here. No longer needed. - # check if placeholder exists for given year, create it if not - # message = " ! - "+ str(year) + " logbook: placeholder entry for '161 KH' created. DUMMY EXPEDITION ID. Should be re-attached to the actual trip." - # placeholder, hadToCreate = LogbookEntry.objects.get_or_create(date__year=year, place="161", title="placeholder for QMs in 161", text=message, entry_type="DUMMY", expedition_id=1, defaults={"date": date((year), 1, 1),"cave_slug":str(kh)}) - # # if hadToCreate: - # print(message) - # DataIssue.objects.create(parser='QMs', message=message) - lookupArgs = { - #'found_by':placeholder, - "blockname": "", - "expoyear": year, - "number": res["number"], - "cave": kh, - "grade": res["grade"], - } - nonLookupArgs = { - "ticked": ticked, - "nearest_station_name": res["nearest_station"], - "location_description": res["description"], - } - instance, created = save_carefully(QM, lookupArgs, nonLookupArgs) - # if created: - # message = f" ! - {instance.code()} QM entry for '161 KH' created. ticked: {ticked}" - # print(message) - # DataIssue.objects.create(parser='QMs', message=message) - nqms += 1 + with open(os.path.join(settings.EXPOWEB, inputFile), "r") as khQMfile: + khQMs = khQMfile.readlines() + nqms = 0 + for line in khQMs: + res = re.search( + r"name=\"[CB](?P\d*)-(?P\d*)-(?P\d*).* (?P[ABDCV])
(?P.*)\[(?P.*)\]", + line, + ) + if res: + res = res.groupdict() + year = int(res["year"]) + + lookupAttribs = { + #'found_by':placeholder, + "blockname": "", + "expoyear": year, + "number": res["number"], + "cave": kh, + "grade": res["grade"], + } + nonLookupAttribs = { + "ticked": ticked, + "nearest_station_name": res["nearest_station"], + "location_description": res["description"], + } + # Create new. We know it doesn't exist as we deleted evrything when we started. + instance = QM.objects.create(**nonLookupAttribs, **lookupAttribs) + + nqms += 1 return nqms