forked from expo/troggle
faster db creation, safer file reading with 'with'
This commit is contained in:
parent
d9a4069662
commit
2704fc42d4
187
parsers/QMs.py
187
parsers/QMs.py
@ -1,6 +1,7 @@
|
||||
import csv
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
@ -65,112 +66,104 @@ def parseCaveQMs(cave, inputFile, ticked=False):
|
||||
return nqms
|
||||
|
||||
# qmPath = settings.EXPOWEB+inputFile
|
||||
qmPath = os.path.join(settings.EXPOWEB, inputFile) # why not use the pathlib stuff ?
|
||||
qmPath = Path(settings.EXPOWEB, inputFile)
|
||||
|
||||
qmCSVContents = open(qmPath, "r")
|
||||
dialect = csv.Sniffer().sniff(qmCSVContents.read())
|
||||
qmCSVContents.seek(0, 0)
|
||||
qmReader = csv.reader(qmCSVContents, dialect=dialect)
|
||||
next(qmReader) # Skip header row
|
||||
n = 0
|
||||
nqms = 0
|
||||
for line in qmReader:
|
||||
try:
|
||||
n += 1
|
||||
year = int(line[0][1:5])
|
||||
f"PH_{int(year)}_{int(n):02d}"
|
||||
QMnum = re.match(r".*?-\d*?-X?(?P<numb>\d*)", line[0]).group("numb")
|
||||
newQM = QM()
|
||||
# newQM.found_by=placeholder
|
||||
newQM.number = QMnum
|
||||
newQM.cave = caveid
|
||||
newQM.blockname = ""
|
||||
if line[1] == "Dig":
|
||||
newQM.grade = "D"
|
||||
else:
|
||||
newQM.grade = line[1]
|
||||
newQM.area = line[2]
|
||||
newQM.location_description = line[3]
|
||||
|
||||
# In the table, completion is indicated by the presence of a completion discription.
|
||||
newQM.completion_description = line[4]
|
||||
newQM.nearest_station_description = line[5]
|
||||
if newQM.completion_description:
|
||||
newQM.ticked = True
|
||||
else:
|
||||
newQM.ticked = False
|
||||
|
||||
newQM.comment = line[6]
|
||||
with open(qmPath, "r") as qmCSVContents:
|
||||
dialect = csv.Sniffer().sniff(qmCSVContents.read())
|
||||
qmCSVContents.seek(0, 0)
|
||||
qmReader = csv.reader(qmCSVContents, dialect=dialect)
|
||||
next(qmReader) # Skip header row
|
||||
n = 0
|
||||
nqms = 0
|
||||
for line in qmReader:
|
||||
try:
|
||||
# year and number are unique for a cave in CSV imports
|
||||
preexistingQM = QM.objects.get(
|
||||
number=QMnum, found_by__date__year=year
|
||||
) # if we don't have this one in the DB, save it
|
||||
if (
|
||||
preexistingQM.new_since_parsing is False
|
||||
): # if the pre-existing QM has not been modified, overwrite it - VERY OLD THING
|
||||
preexistingQM.delete()
|
||||
n += 1
|
||||
year = int(line[0][1:5])
|
||||
f"PH_{int(year)}_{int(n):02d}"
|
||||
QMnum = re.match(r".*?-\d*?-X?(?P<numb>\d*)", line[0]).group("numb")
|
||||
newQM = QM()
|
||||
# newQM.found_by=placeholder
|
||||
newQM.number = QMnum
|
||||
newQM.cave = caveid
|
||||
newQM.blockname = ""
|
||||
if line[1] == "Dig":
|
||||
newQM.grade = "D"
|
||||
else:
|
||||
newQM.grade = line[1]
|
||||
newQM.area = line[2]
|
||||
newQM.location_description = line[3]
|
||||
|
||||
# In the table, completion is indicated by the presence of a completion discription.
|
||||
newQM.completion_description = line[4]
|
||||
newQM.nearest_station_description = line[5]
|
||||
if newQM.completion_description:
|
||||
newQM.ticked = True
|
||||
else:
|
||||
newQM.ticked = False
|
||||
|
||||
newQM.comment = line[6]
|
||||
try:
|
||||
# year and number are unique for a cave in CSV imports
|
||||
preexistingQM = QM.objects.get(
|
||||
number=QMnum, found_by__date__year=year
|
||||
) # if we don't have this one in the DB, save it
|
||||
if (
|
||||
preexistingQM.new_since_parsing is False
|
||||
): # if the pre-existing QM has not been modified, overwrite it - VERY OLD THING
|
||||
preexistingQM.delete()
|
||||
newQM.expoyear = year
|
||||
newQM.save()
|
||||
else: # otherwise, print that it was ignored
|
||||
print((" - preserving " + str(preexistingQM) + ", which was edited in admin \r"))
|
||||
|
||||
except QM.DoesNotExist: # if there is no pre-existing QM, save the new one
|
||||
newQM.expoyear = year
|
||||
newQM.save()
|
||||
else: # otherwise, print that it was ignored
|
||||
print((" - preserving " + str(preexistingQM) + ", which was edited in admin \r"))
|
||||
|
||||
except QM.DoesNotExist: # if there is no pre-existing QM, save the new one
|
||||
newQM.expoyear = year
|
||||
newQM.save()
|
||||
nqms += 1
|
||||
except KeyError: # check on this one
|
||||
message = f" ! - {qmPath} KeyError {str(line)} "
|
||||
print(message)
|
||||
DataIssue.objects.create(parser="QMs", message=message)
|
||||
continue
|
||||
except IndexError:
|
||||
message = f" ! - {qmPath} IndexError {str(line)} "
|
||||
print(message)
|
||||
DataIssue.objects.create(parser="QMs", message=message)
|
||||
continue
|
||||
nqms += 1
|
||||
except KeyError: # check on this one
|
||||
message = f" ! - {qmPath} KeyError {str(line)} "
|
||||
print(message)
|
||||
DataIssue.objects.create(parser="QMs", message=message)
|
||||
continue
|
||||
except IndexError:
|
||||
message = f" ! - {qmPath} IndexError {str(line)} "
|
||||
print(message)
|
||||
DataIssue.objects.create(parser="QMs", message=message)
|
||||
continue
|
||||
return nqms
|
||||
|
||||
|
||||
def parse_KH_QMs(kh, inputFile, ticked):
|
||||
"""import QMs from the 1623-161 (Kaninchenhohle) html pages, different format"""
|
||||
khQMs = open(os.path.join(settings.EXPOWEB, inputFile), "r")
|
||||
khQMs = khQMs.readlines()
|
||||
nqms = 0
|
||||
for line in khQMs:
|
||||
res = re.search(
|
||||
r"name=\"[CB](?P<year>\d*)-(?P<cave>\d*)-(?P<number>\d*).*</a> (?P<grade>[ABDCV])<dd>(?P<description>.*)\[(?P<nearest_station>.*)\]",
|
||||
line,
|
||||
)
|
||||
if res:
|
||||
res = res.groupdict()
|
||||
year = int(res["year"])
|
||||
# logbook placeholder code was previously here. No longer needed.
|
||||
# check if placeholder exists for given year, create it if not
|
||||
# message = " ! - "+ str(year) + " logbook: placeholder entry for '161 KH' created. DUMMY EXPEDITION ID. Should be re-attached to the actual trip."
|
||||
# placeholder, hadToCreate = LogbookEntry.objects.get_or_create(date__year=year, place="161", title="placeholder for QMs in 161", text=message, entry_type="DUMMY", expedition_id=1, defaults={"date": date((year), 1, 1),"cave_slug":str(kh)})
|
||||
# # if hadToCreate:
|
||||
# print(message)
|
||||
# DataIssue.objects.create(parser='QMs', message=message)
|
||||
lookupArgs = {
|
||||
#'found_by':placeholder,
|
||||
"blockname": "",
|
||||
"expoyear": year,
|
||||
"number": res["number"],
|
||||
"cave": kh,
|
||||
"grade": res["grade"],
|
||||
}
|
||||
nonLookupArgs = {
|
||||
"ticked": ticked,
|
||||
"nearest_station_name": res["nearest_station"],
|
||||
"location_description": res["description"],
|
||||
}
|
||||
instance, created = save_carefully(QM, lookupArgs, nonLookupArgs)
|
||||
# if created:
|
||||
# message = f" ! - {instance.code()} QM entry for '161 KH' created. ticked: {ticked}"
|
||||
# print(message)
|
||||
# DataIssue.objects.create(parser='QMs', message=message)
|
||||
nqms += 1
|
||||
with open(os.path.join(settings.EXPOWEB, inputFile), "r") as khQMfile:
|
||||
khQMs = khQMfile.readlines()
|
||||
nqms = 0
|
||||
for line in khQMs:
|
||||
res = re.search(
|
||||
r"name=\"[CB](?P<year>\d*)-(?P<cave>\d*)-(?P<number>\d*).*</a> (?P<grade>[ABDCV])<dd>(?P<description>.*)\[(?P<nearest_station>.*)\]",
|
||||
line,
|
||||
)
|
||||
if res:
|
||||
res = res.groupdict()
|
||||
year = int(res["year"])
|
||||
|
||||
lookupAttribs = {
|
||||
#'found_by':placeholder,
|
||||
"blockname": "",
|
||||
"expoyear": year,
|
||||
"number": res["number"],
|
||||
"cave": kh,
|
||||
"grade": res["grade"],
|
||||
}
|
||||
nonLookupAttribs = {
|
||||
"ticked": ticked,
|
||||
"nearest_station_name": res["nearest_station"],
|
||||
"location_description": res["description"],
|
||||
}
|
||||
# Create new. We know it doesn't exist as we deleted evrything when we started.
|
||||
instance = QM.objects.create(**nonLookupAttribs, **lookupAttribs)
|
||||
|
||||
nqms += 1
|
||||
return nqms
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user