faster db creation, safer file reading with 'with'

This commit is contained in:
Philip Sargent 2023-01-28 14:04:32 +00:00
parent d9a4069662
commit 2704fc42d4

View File

@ -1,6 +1,7 @@
import csv
import os
import re
from pathlib import Path
from django.conf import settings
@ -65,112 +66,104 @@ def parseCaveQMs(cave, inputFile, ticked=False):
return nqms
# qmPath = settings.EXPOWEB+inputFile
qmPath = os.path.join(settings.EXPOWEB, inputFile) # why not use the pathlib stuff ?
qmPath = Path(settings.EXPOWEB, inputFile)
qmCSVContents = open(qmPath, "r")
dialect = csv.Sniffer().sniff(qmCSVContents.read())
qmCSVContents.seek(0, 0)
qmReader = csv.reader(qmCSVContents, dialect=dialect)
next(qmReader) # Skip header row
n = 0
nqms = 0
for line in qmReader:
try:
n += 1
year = int(line[0][1:5])
f"PH_{int(year)}_{int(n):02d}"
QMnum = re.match(r".*?-\d*?-X?(?P<numb>\d*)", line[0]).group("numb")
newQM = QM()
# newQM.found_by=placeholder
newQM.number = QMnum
newQM.cave = caveid
newQM.blockname = ""
if line[1] == "Dig":
newQM.grade = "D"
else:
newQM.grade = line[1]
newQM.area = line[2]
newQM.location_description = line[3]
# In the table, completion is indicated by the presence of a completion discription.
newQM.completion_description = line[4]
newQM.nearest_station_description = line[5]
if newQM.completion_description:
newQM.ticked = True
else:
newQM.ticked = False
newQM.comment = line[6]
with open(qmPath, "r") as qmCSVContents:
dialect = csv.Sniffer().sniff(qmCSVContents.read())
qmCSVContents.seek(0, 0)
qmReader = csv.reader(qmCSVContents, dialect=dialect)
next(qmReader) # Skip header row
n = 0
nqms = 0
for line in qmReader:
try:
# year and number are unique for a cave in CSV imports
preexistingQM = QM.objects.get(
number=QMnum, found_by__date__year=year
) # if we don't have this one in the DB, save it
if (
preexistingQM.new_since_parsing is False
): # if the pre-existing QM has not been modified, overwrite it - VERY OLD THING
preexistingQM.delete()
n += 1
year = int(line[0][1:5])
f"PH_{int(year)}_{int(n):02d}"
QMnum = re.match(r".*?-\d*?-X?(?P<numb>\d*)", line[0]).group("numb")
newQM = QM()
# newQM.found_by=placeholder
newQM.number = QMnum
newQM.cave = caveid
newQM.blockname = ""
if line[1] == "Dig":
newQM.grade = "D"
else:
newQM.grade = line[1]
newQM.area = line[2]
newQM.location_description = line[3]
# In the table, completion is indicated by the presence of a completion discription.
newQM.completion_description = line[4]
newQM.nearest_station_description = line[5]
if newQM.completion_description:
newQM.ticked = True
else:
newQM.ticked = False
newQM.comment = line[6]
try:
# year and number are unique for a cave in CSV imports
preexistingQM = QM.objects.get(
number=QMnum, found_by__date__year=year
) # if we don't have this one in the DB, save it
if (
preexistingQM.new_since_parsing is False
): # if the pre-existing QM has not been modified, overwrite it - VERY OLD THING
preexistingQM.delete()
newQM.expoyear = year
newQM.save()
else: # otherwise, print that it was ignored
print((" - preserving " + str(preexistingQM) + ", which was edited in admin \r"))
except QM.DoesNotExist: # if there is no pre-existing QM, save the new one
newQM.expoyear = year
newQM.save()
else: # otherwise, print that it was ignored
print((" - preserving " + str(preexistingQM) + ", which was edited in admin \r"))
except QM.DoesNotExist: # if there is no pre-existing QM, save the new one
newQM.expoyear = year
newQM.save()
nqms += 1
except KeyError: # check on this one
message = f" ! - {qmPath} KeyError {str(line)} "
print(message)
DataIssue.objects.create(parser="QMs", message=message)
continue
except IndexError:
message = f" ! - {qmPath} IndexError {str(line)} "
print(message)
DataIssue.objects.create(parser="QMs", message=message)
continue
nqms += 1
except KeyError: # check on this one
message = f" ! - {qmPath} KeyError {str(line)} "
print(message)
DataIssue.objects.create(parser="QMs", message=message)
continue
except IndexError:
message = f" ! - {qmPath} IndexError {str(line)} "
print(message)
DataIssue.objects.create(parser="QMs", message=message)
continue
return nqms
def parse_KH_QMs(kh, inputFile, ticked):
"""import QMs from the 1623-161 (Kaninchenhohle) html pages, different format"""
khQMs = open(os.path.join(settings.EXPOWEB, inputFile), "r")
khQMs = khQMs.readlines()
nqms = 0
for line in khQMs:
res = re.search(
r"name=\"[CB](?P<year>\d*)-(?P<cave>\d*)-(?P<number>\d*).*</a> (?P<grade>[ABDCV])<dd>(?P<description>.*)\[(?P<nearest_station>.*)\]",
line,
)
if res:
res = res.groupdict()
year = int(res["year"])
# logbook placeholder code was previously here. No longer needed.
# check if placeholder exists for given year, create it if not
# message = " ! - "+ str(year) + " logbook: placeholder entry for '161 KH' created. DUMMY EXPEDITION ID. Should be re-attached to the actual trip."
# placeholder, hadToCreate = LogbookEntry.objects.get_or_create(date__year=year, place="161", title="placeholder for QMs in 161", text=message, entry_type="DUMMY", expedition_id=1, defaults={"date": date((year), 1, 1),"cave_slug":str(kh)})
# # if hadToCreate:
# print(message)
# DataIssue.objects.create(parser='QMs', message=message)
lookupArgs = {
#'found_by':placeholder,
"blockname": "",
"expoyear": year,
"number": res["number"],
"cave": kh,
"grade": res["grade"],
}
nonLookupArgs = {
"ticked": ticked,
"nearest_station_name": res["nearest_station"],
"location_description": res["description"],
}
instance, created = save_carefully(QM, lookupArgs, nonLookupArgs)
# if created:
# message = f" ! - {instance.code()} QM entry for '161 KH' created. ticked: {ticked}"
# print(message)
# DataIssue.objects.create(parser='QMs', message=message)
nqms += 1
with open(os.path.join(settings.EXPOWEB, inputFile), "r") as khQMfile:
khQMs = khQMfile.readlines()
nqms = 0
for line in khQMs:
res = re.search(
r"name=\"[CB](?P<year>\d*)-(?P<cave>\d*)-(?P<number>\d*).*</a> (?P<grade>[ABDCV])<dd>(?P<description>.*)\[(?P<nearest_station>.*)\]",
line,
)
if res:
res = res.groupdict()
year = int(res["year"])
lookupAttribs = {
#'found_by':placeholder,
"blockname": "",
"expoyear": year,
"number": res["number"],
"cave": kh,
"grade": res["grade"],
}
nonLookupAttribs = {
"ticked": ticked,
"nearest_station_name": res["nearest_station"],
"location_description": res["description"],
}
# Create new. We know it doesn't exist as we deleted evrything when we started.
instance = QM.objects.create(**nonLookupAttribs, **lookupAttribs)
nqms += 1
return nqms