forked from expo/troggle
faster db creation, safer file reading with 'with'
This commit is contained in:
parent
d9a4069662
commit
2704fc42d4
187
parsers/QMs.py
187
parsers/QMs.py
@ -1,6 +1,7 @@
|
|||||||
import csv
|
import csv
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
|
||||||
@ -65,112 +66,104 @@ def parseCaveQMs(cave, inputFile, ticked=False):
|
|||||||
return nqms
|
return nqms
|
||||||
|
|
||||||
# qmPath = settings.EXPOWEB+inputFile
|
# qmPath = settings.EXPOWEB+inputFile
|
||||||
qmPath = os.path.join(settings.EXPOWEB, inputFile) # why not use the pathlib stuff ?
|
qmPath = Path(settings.EXPOWEB, inputFile)
|
||||||
|
|
||||||
qmCSVContents = open(qmPath, "r")
|
with open(qmPath, "r") as qmCSVContents:
|
||||||
dialect = csv.Sniffer().sniff(qmCSVContents.read())
|
dialect = csv.Sniffer().sniff(qmCSVContents.read())
|
||||||
qmCSVContents.seek(0, 0)
|
qmCSVContents.seek(0, 0)
|
||||||
qmReader = csv.reader(qmCSVContents, dialect=dialect)
|
qmReader = csv.reader(qmCSVContents, dialect=dialect)
|
||||||
next(qmReader) # Skip header row
|
next(qmReader) # Skip header row
|
||||||
n = 0
|
n = 0
|
||||||
nqms = 0
|
nqms = 0
|
||||||
for line in qmReader:
|
for line in qmReader:
|
||||||
try:
|
|
||||||
n += 1
|
|
||||||
year = int(line[0][1:5])
|
|
||||||
f"PH_{int(year)}_{int(n):02d}"
|
|
||||||
QMnum = re.match(r".*?-\d*?-X?(?P<numb>\d*)", line[0]).group("numb")
|
|
||||||
newQM = QM()
|
|
||||||
# newQM.found_by=placeholder
|
|
||||||
newQM.number = QMnum
|
|
||||||
newQM.cave = caveid
|
|
||||||
newQM.blockname = ""
|
|
||||||
if line[1] == "Dig":
|
|
||||||
newQM.grade = "D"
|
|
||||||
else:
|
|
||||||
newQM.grade = line[1]
|
|
||||||
newQM.area = line[2]
|
|
||||||
newQM.location_description = line[3]
|
|
||||||
|
|
||||||
# In the table, completion is indicated by the presence of a completion discription.
|
|
||||||
newQM.completion_description = line[4]
|
|
||||||
newQM.nearest_station_description = line[5]
|
|
||||||
if newQM.completion_description:
|
|
||||||
newQM.ticked = True
|
|
||||||
else:
|
|
||||||
newQM.ticked = False
|
|
||||||
|
|
||||||
newQM.comment = line[6]
|
|
||||||
try:
|
try:
|
||||||
# year and number are unique for a cave in CSV imports
|
n += 1
|
||||||
preexistingQM = QM.objects.get(
|
year = int(line[0][1:5])
|
||||||
number=QMnum, found_by__date__year=year
|
f"PH_{int(year)}_{int(n):02d}"
|
||||||
) # if we don't have this one in the DB, save it
|
QMnum = re.match(r".*?-\d*?-X?(?P<numb>\d*)", line[0]).group("numb")
|
||||||
if (
|
newQM = QM()
|
||||||
preexistingQM.new_since_parsing is False
|
# newQM.found_by=placeholder
|
||||||
): # if the pre-existing QM has not been modified, overwrite it - VERY OLD THING
|
newQM.number = QMnum
|
||||||
preexistingQM.delete()
|
newQM.cave = caveid
|
||||||
|
newQM.blockname = ""
|
||||||
|
if line[1] == "Dig":
|
||||||
|
newQM.grade = "D"
|
||||||
|
else:
|
||||||
|
newQM.grade = line[1]
|
||||||
|
newQM.area = line[2]
|
||||||
|
newQM.location_description = line[3]
|
||||||
|
|
||||||
|
# In the table, completion is indicated by the presence of a completion discription.
|
||||||
|
newQM.completion_description = line[4]
|
||||||
|
newQM.nearest_station_description = line[5]
|
||||||
|
if newQM.completion_description:
|
||||||
|
newQM.ticked = True
|
||||||
|
else:
|
||||||
|
newQM.ticked = False
|
||||||
|
|
||||||
|
newQM.comment = line[6]
|
||||||
|
try:
|
||||||
|
# year and number are unique for a cave in CSV imports
|
||||||
|
preexistingQM = QM.objects.get(
|
||||||
|
number=QMnum, found_by__date__year=year
|
||||||
|
) # if we don't have this one in the DB, save it
|
||||||
|
if (
|
||||||
|
preexistingQM.new_since_parsing is False
|
||||||
|
): # if the pre-existing QM has not been modified, overwrite it - VERY OLD THING
|
||||||
|
preexistingQM.delete()
|
||||||
|
newQM.expoyear = year
|
||||||
|
newQM.save()
|
||||||
|
else: # otherwise, print that it was ignored
|
||||||
|
print((" - preserving " + str(preexistingQM) + ", which was edited in admin \r"))
|
||||||
|
|
||||||
|
except QM.DoesNotExist: # if there is no pre-existing QM, save the new one
|
||||||
newQM.expoyear = year
|
newQM.expoyear = year
|
||||||
newQM.save()
|
newQM.save()
|
||||||
else: # otherwise, print that it was ignored
|
nqms += 1
|
||||||
print((" - preserving " + str(preexistingQM) + ", which was edited in admin \r"))
|
except KeyError: # check on this one
|
||||||
|
message = f" ! - {qmPath} KeyError {str(line)} "
|
||||||
except QM.DoesNotExist: # if there is no pre-existing QM, save the new one
|
print(message)
|
||||||
newQM.expoyear = year
|
DataIssue.objects.create(parser="QMs", message=message)
|
||||||
newQM.save()
|
continue
|
||||||
nqms += 1
|
except IndexError:
|
||||||
except KeyError: # check on this one
|
message = f" ! - {qmPath} IndexError {str(line)} "
|
||||||
message = f" ! - {qmPath} KeyError {str(line)} "
|
print(message)
|
||||||
print(message)
|
DataIssue.objects.create(parser="QMs", message=message)
|
||||||
DataIssue.objects.create(parser="QMs", message=message)
|
continue
|
||||||
continue
|
|
||||||
except IndexError:
|
|
||||||
message = f" ! - {qmPath} IndexError {str(line)} "
|
|
||||||
print(message)
|
|
||||||
DataIssue.objects.create(parser="QMs", message=message)
|
|
||||||
continue
|
|
||||||
return nqms
|
return nqms
|
||||||
|
|
||||||
|
|
||||||
def parse_KH_QMs(kh, inputFile, ticked):
|
def parse_KH_QMs(kh, inputFile, ticked):
|
||||||
"""import QMs from the 1623-161 (Kaninchenhohle) html pages, different format"""
|
"""import QMs from the 1623-161 (Kaninchenhohle) html pages, different format"""
|
||||||
khQMs = open(os.path.join(settings.EXPOWEB, inputFile), "r")
|
with open(os.path.join(settings.EXPOWEB, inputFile), "r") as khQMfile:
|
||||||
khQMs = khQMs.readlines()
|
khQMs = khQMfile.readlines()
|
||||||
nqms = 0
|
nqms = 0
|
||||||
for line in khQMs:
|
for line in khQMs:
|
||||||
res = re.search(
|
res = re.search(
|
||||||
r"name=\"[CB](?P<year>\d*)-(?P<cave>\d*)-(?P<number>\d*).*</a> (?P<grade>[ABDCV])<dd>(?P<description>.*)\[(?P<nearest_station>.*)\]",
|
r"name=\"[CB](?P<year>\d*)-(?P<cave>\d*)-(?P<number>\d*).*</a> (?P<grade>[ABDCV])<dd>(?P<description>.*)\[(?P<nearest_station>.*)\]",
|
||||||
line,
|
line,
|
||||||
)
|
)
|
||||||
if res:
|
if res:
|
||||||
res = res.groupdict()
|
res = res.groupdict()
|
||||||
year = int(res["year"])
|
year = int(res["year"])
|
||||||
# logbook placeholder code was previously here. No longer needed.
|
|
||||||
# check if placeholder exists for given year, create it if not
|
lookupAttribs = {
|
||||||
# message = " ! - "+ str(year) + " logbook: placeholder entry for '161 KH' created. DUMMY EXPEDITION ID. Should be re-attached to the actual trip."
|
#'found_by':placeholder,
|
||||||
# placeholder, hadToCreate = LogbookEntry.objects.get_or_create(date__year=year, place="161", title="placeholder for QMs in 161", text=message, entry_type="DUMMY", expedition_id=1, defaults={"date": date((year), 1, 1),"cave_slug":str(kh)})
|
"blockname": "",
|
||||||
# # if hadToCreate:
|
"expoyear": year,
|
||||||
# print(message)
|
"number": res["number"],
|
||||||
# DataIssue.objects.create(parser='QMs', message=message)
|
"cave": kh,
|
||||||
lookupArgs = {
|
"grade": res["grade"],
|
||||||
#'found_by':placeholder,
|
}
|
||||||
"blockname": "",
|
nonLookupAttribs = {
|
||||||
"expoyear": year,
|
"ticked": ticked,
|
||||||
"number": res["number"],
|
"nearest_station_name": res["nearest_station"],
|
||||||
"cave": kh,
|
"location_description": res["description"],
|
||||||
"grade": res["grade"],
|
}
|
||||||
}
|
# Create new. We know it doesn't exist as we deleted evrything when we started.
|
||||||
nonLookupArgs = {
|
instance = QM.objects.create(**nonLookupAttribs, **lookupAttribs)
|
||||||
"ticked": ticked,
|
|
||||||
"nearest_station_name": res["nearest_station"],
|
nqms += 1
|
||||||
"location_description": res["description"],
|
|
||||||
}
|
|
||||||
instance, created = save_carefully(QM, lookupArgs, nonLookupArgs)
|
|
||||||
# if created:
|
|
||||||
# message = f" ! - {instance.code()} QM entry for '161 KH' created. ticked: {ticked}"
|
|
||||||
# print(message)
|
|
||||||
# DataIssue.objects.create(parser='QMs', message=message)
|
|
||||||
nqms += 1
|
|
||||||
return nqms
|
return nqms
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user