2009-05-13 05:24:37 +01:00
|
|
|
import csv
|
2020-05-28 02:20:50 +01:00
|
|
|
import os
|
|
|
|
import re
|
|
|
|
|
|
|
|
from django.conf import settings
|
|
|
|
|
2023-01-19 21:34:09 +00:00
|
|
|
from troggle.core.models.caves import QM, Cave
|
2023-01-19 18:33:04 +00:00
|
|
|
from troggle.core.models.troggle import DataIssue
|
2021-04-13 00:11:08 +01:00
|
|
|
from troggle.core.utils import save_carefully
|
2020-05-28 02:20:50 +01:00
|
|
|
|
2023-01-19 21:18:42 +00:00
|
|
|
"""Reads the CSV files containg QMs for a select few caves
|
2022-07-20 12:44:56 +01:00
|
|
|
See parsers/survex.py for the parser which extracts QMs from the survex files
|
2023-01-19 21:18:42 +00:00
|
|
|
"""
|
|
|
|
|
2009-05-13 05:24:37 +01:00
|
|
|
|
2009-05-13 06:15:48 +01:00
|
|
|
def deleteQMs():
|
|
|
|
QM.objects.all().delete()
|
2023-01-19 21:18:42 +00:00
|
|
|
DataIssue.objects.filter(parser="QMs").delete()
|
2020-07-04 01:10:53 +01:00
|
|
|
|
2009-05-13 05:24:37 +01:00
|
|
|
|
2022-07-20 12:44:56 +01:00
|
|
|
def parseCaveQMs(cave, inputFile, ticked=False):
|
2023-01-19 21:18:42 +00:00
|
|
|
"""Runs through the CSV file at inputFile (which is a relative path from expoweb) and
|
2020-07-04 01:10:53 +01:00
|
|
|
saves each QM as a QM instance.
|
|
|
|
This is creating and linking a Placeholder logbookentry dated 1st Jan. in the relevant
|
2023-01-19 21:18:42 +00:00
|
|
|
year. This is pointless but it is needed because found_by is a ForeignKey in the db
|
2020-07-04 01:10:53 +01:00
|
|
|
and we can't be arsed to fudge this properly with a null.(July 2020)
|
|
|
|
|
|
|
|
Linking to a passage in a SVX file might be more interesting as the QM does sometimes
|
|
|
|
have the passage name, e.g. in 204/qm.csv
|
|
|
|
C2000-204-39 B Tree Pitch in Cave Tree treeumphant.28 Gosser Streamway
|
|
|
|
The CSV file does not have the exact date for the QM, only the year, so links to
|
2022-07-06 15:35:08 +01:00
|
|
|
survex files might be ambiguous. But potentially useful?
|
2023-01-19 21:18:42 +00:00
|
|
|
|
2022-07-06 15:35:08 +01:00
|
|
|
Much of this code assumes that QMs are edited using troggle. This is not done so this code can be deleted.
|
|
|
|
All QMs are created afresh and this is all only run once on import on a fresh database.
|
|
|
|
"""
|
2009-05-13 05:24:37 +01:00
|
|
|
|
2023-01-19 21:18:42 +00:00
|
|
|
if cave == "204-steinBH":
|
2009-05-13 05:25:17 +01:00
|
|
|
try:
|
2023-01-19 21:18:42 +00:00
|
|
|
steinBr = Cave.objects.get(official_name="Steinbrückenhöhle")
|
2022-07-05 13:38:23 +01:00
|
|
|
caveid = steinBr
|
2009-05-13 05:25:17 +01:00
|
|
|
except Cave.DoesNotExist:
|
2023-01-19 21:18:42 +00:00
|
|
|
message = f" ! - {qmPath} Steinbruckenhoehle is not in the database. Please run cave parser"
|
2022-03-15 20:53:55 +00:00
|
|
|
print(message)
|
2023-01-19 21:18:42 +00:00
|
|
|
DataIssue.objects.create(parser="QMs", message=message)
|
2009-05-13 05:25:17 +01:00
|
|
|
return
|
2023-01-19 21:18:42 +00:00
|
|
|
elif cave == "234-Hauch":
|
2009-05-13 05:25:17 +01:00
|
|
|
try:
|
2023-01-19 21:18:42 +00:00
|
|
|
hauchHl = Cave.objects.get(official_name="Hauchhöhle")
|
2022-07-05 13:38:23 +01:00
|
|
|
caveid = hauchHl
|
2009-05-13 06:15:48 +01:00
|
|
|
except Cave.DoesNotExist:
|
2023-01-19 21:18:42 +00:00
|
|
|
message = f" ! - {qmPath} Hauchhoehle is not in the database. Please run cave parser"
|
2022-03-15 20:53:55 +00:00
|
|
|
print(message)
|
2023-01-19 21:18:42 +00:00
|
|
|
DataIssue.objects.create(parser="QMs", message=message)
|
2009-05-13 06:15:48 +01:00
|
|
|
return
|
2023-01-19 21:18:42 +00:00
|
|
|
elif cave == "161-KH":
|
2009-05-13 06:15:48 +01:00
|
|
|
try:
|
2023-01-19 21:18:42 +00:00
|
|
|
kh = Cave.objects.get(official_name="Kaninchenhöhle")
|
2022-07-05 13:38:23 +01:00
|
|
|
caveid = kh
|
2009-05-13 05:25:17 +01:00
|
|
|
except Cave.DoesNotExist:
|
2023-01-19 21:18:42 +00:00
|
|
|
message = f" ! - {qmPath} KH is not in the database. Please run cave parser"
|
2022-03-15 20:53:55 +00:00
|
|
|
print(message)
|
2023-01-19 21:18:42 +00:00
|
|
|
DataIssue.objects.create(parser="QMs", message=message)
|
|
|
|
nqms = parse_KH_QMs(kh, inputFile=inputFile, ticked=ticked)
|
2022-07-05 14:30:42 +01:00
|
|
|
return nqms
|
2009-05-13 06:15:48 +01:00
|
|
|
|
2023-01-19 21:18:42 +00:00
|
|
|
# qmPath = settings.EXPOWEB+inputFile
|
|
|
|
qmPath = os.path.join(settings.EXPOWEB, inputFile) # why not use the pathlib stuff ?
|
2021-03-24 00:55:36 +00:00
|
|
|
|
2023-01-19 21:18:42 +00:00
|
|
|
qmCSVContents = open(qmPath, "r")
|
|
|
|
dialect = csv.Sniffer().sniff(qmCSVContents.read())
|
|
|
|
qmCSVContents.seek(0, 0)
|
|
|
|
qmReader = csv.reader(qmCSVContents, dialect=dialect)
|
|
|
|
next(qmReader) # Skip header row
|
2021-05-02 14:50:46 +01:00
|
|
|
n = 0
|
2022-07-05 14:30:42 +01:00
|
|
|
nqms = 0
|
2009-05-13 05:24:37 +01:00
|
|
|
for line in qmReader:
|
2009-05-13 05:25:17 +01:00
|
|
|
try:
|
2021-05-02 14:50:46 +01:00
|
|
|
n += 1
|
2023-01-19 21:18:42 +00:00
|
|
|
year = int(line[0][1:5])
|
2023-01-19 21:34:09 +00:00
|
|
|
f"PH_{int(year)}_{int(n):02d}"
|
2023-01-19 21:18:42 +00:00
|
|
|
QMnum = re.match(r".*?-\d*?-X?(?P<numb>\d*)", line[0]).group("numb")
|
2009-05-13 05:25:17 +01:00
|
|
|
newQM = QM()
|
2022-07-06 15:35:08 +01:00
|
|
|
# newQM.found_by=placeholder
|
2023-01-19 21:18:42 +00:00
|
|
|
newQM.number = QMnum
|
2022-07-05 13:38:23 +01:00
|
|
|
newQM.cave = caveid
|
2022-07-06 15:35:08 +01:00
|
|
|
newQM.blockname = ""
|
2023-01-19 21:18:42 +00:00
|
|
|
if line[1] == "Dig":
|
|
|
|
newQM.grade = "D"
|
2009-05-13 05:25:17 +01:00
|
|
|
else:
|
2023-01-19 21:18:42 +00:00
|
|
|
newQM.grade = line[1]
|
|
|
|
newQM.area = line[2]
|
|
|
|
newQM.location_description = line[3]
|
|
|
|
|
|
|
|
# In the table, completion is indicated by the presence of a completion discription.
|
|
|
|
newQM.completion_description = line[4]
|
|
|
|
newQM.nearest_station_description = line[5]
|
|
|
|
if newQM.completion_description:
|
2022-07-20 12:44:56 +01:00
|
|
|
newQM.ticked = True
|
|
|
|
else:
|
|
|
|
newQM.ticked = False
|
2009-05-13 05:59:40 +01:00
|
|
|
|
2023-01-19 21:18:42 +00:00
|
|
|
newQM.comment = line[6]
|
2009-05-13 06:15:48 +01:00
|
|
|
try:
|
2022-07-06 13:38:53 +01:00
|
|
|
# year and number are unique for a cave in CSV imports
|
2023-01-19 21:18:42 +00:00
|
|
|
preexistingQM = QM.objects.get(
|
|
|
|
number=QMnum, found_by__date__year=year
|
|
|
|
) # if we don't have this one in the DB, save it
|
|
|
|
if (
|
2023-01-19 21:34:09 +00:00
|
|
|
preexistingQM.new_since_parsing is False
|
2023-01-19 21:18:42 +00:00
|
|
|
): # if the pre-existing QM has not been modified, overwrite it - VERY OLD THING
|
2009-05-13 06:15:48 +01:00
|
|
|
preexistingQM.delete()
|
2022-07-06 13:38:53 +01:00
|
|
|
newQM.expoyear = year
|
2009-05-13 06:15:48 +01:00
|
|
|
newQM.save()
|
|
|
|
else: # otherwise, print that it was ignored
|
2020-05-31 19:21:54 +01:00
|
|
|
print((" - preserving " + str(preexistingQM) + ", which was edited in admin \r"))
|
2023-01-19 21:18:42 +00:00
|
|
|
|
|
|
|
except QM.DoesNotExist: # if there is no pre-existing QM, save the new one
|
2022-07-06 13:38:53 +01:00
|
|
|
newQM.expoyear = year
|
2023-01-19 21:18:42 +00:00
|
|
|
newQM.save()
|
|
|
|
nqms += 1
|
|
|
|
except KeyError: # check on this one
|
|
|
|
message = f" ! - {qmPath} KeyError {str(line)} "
|
2022-03-15 20:53:55 +00:00
|
|
|
print(message)
|
2023-01-19 21:18:42 +00:00
|
|
|
DataIssue.objects.create(parser="QMs", message=message)
|
2009-05-13 05:25:17 +01:00
|
|
|
continue
|
2009-07-02 04:10:51 +01:00
|
|
|
except IndexError:
|
2023-01-19 21:18:42 +00:00
|
|
|
message = f" ! - {qmPath} IndexError {str(line)} "
|
2022-03-15 20:53:55 +00:00
|
|
|
print(message)
|
2023-01-19 21:18:42 +00:00
|
|
|
DataIssue.objects.create(parser="QMs", message=message)
|
2009-07-02 04:10:51 +01:00
|
|
|
continue
|
2022-07-05 14:30:42 +01:00
|
|
|
return nqms
|
2009-05-13 05:24:37 +01:00
|
|
|
|
2023-01-19 21:18:42 +00:00
|
|
|
|
2022-07-20 12:44:56 +01:00
|
|
|
def parse_KH_QMs(kh, inputFile, ticked):
|
2023-01-19 21:18:42 +00:00
|
|
|
"""import QMs from the 1623-161 (Kaninchenhohle) html pages, different format"""
|
|
|
|
khQMs = open(os.path.join(settings.EXPOWEB, inputFile), "r")
|
|
|
|
khQMs = khQMs.readlines()
|
2022-07-05 14:30:42 +01:00
|
|
|
nqms = 0
|
2009-05-13 06:15:48 +01:00
|
|
|
for line in khQMs:
|
2023-01-19 21:18:42 +00:00
|
|
|
res = re.search(
|
|
|
|
r"name=\"[CB](?P<year>\d*)-(?P<cave>\d*)-(?P<number>\d*).*</a> (?P<grade>[ABDCV])<dd>(?P<description>.*)\[(?P<nearest_station>.*)\]",
|
|
|
|
line,
|
|
|
|
)
|
2009-05-13 06:15:48 +01:00
|
|
|
if res:
|
2023-01-19 21:18:42 +00:00
|
|
|
res = res.groupdict()
|
|
|
|
year = int(res["year"])
|
2022-07-06 15:35:08 +01:00
|
|
|
# logbook placeholder code was previously here. No longer needed.
|
2023-01-19 21:18:42 +00:00
|
|
|
# check if placeholder exists for given year, create it if not
|
2022-07-06 15:35:08 +01:00
|
|
|
# message = " ! - "+ str(year) + " logbook: placeholder entry for '161 KH' created. DUMMY EXPEDITION ID. Should be re-attached to the actual trip."
|
|
|
|
# placeholder, hadToCreate = LogbookEntry.objects.get_or_create(date__year=year, place="161", title="placeholder for QMs in 161", text=message, entry_type="DUMMY", expedition_id=1, defaults={"date": date((year), 1, 1),"cave_slug":str(kh)})
|
|
|
|
# # if hadToCreate:
|
2023-01-19 21:18:42 +00:00
|
|
|
# print(message)
|
|
|
|
# DataIssue.objects.create(parser='QMs', message=message)
|
|
|
|
lookupArgs = {
|
2022-07-06 15:35:08 +01:00
|
|
|
#'found_by':placeholder,
|
2023-01-19 21:18:42 +00:00
|
|
|
"blockname": "",
|
|
|
|
"expoyear": year,
|
|
|
|
"number": res["number"],
|
|
|
|
"cave": kh,
|
|
|
|
"grade": res["grade"],
|
|
|
|
}
|
|
|
|
nonLookupArgs = {
|
|
|
|
"ticked": ticked,
|
|
|
|
"nearest_station_name": res["nearest_station"],
|
|
|
|
"location_description": res["description"],
|
|
|
|
}
|
|
|
|
instance, created = save_carefully(QM, lookupArgs, nonLookupArgs)
|
2022-07-06 15:35:08 +01:00
|
|
|
# if created:
|
2023-01-19 21:18:42 +00:00
|
|
|
# message = f" ! - {instance.code()} QM entry for '161 KH' created. ticked: {ticked}"
|
|
|
|
# print(message)
|
|
|
|
# DataIssue.objects.create(parser='QMs', message=message)
|
2022-07-05 14:30:42 +01:00
|
|
|
nqms += 1
|
|
|
|
return nqms
|
2023-01-19 21:18:42 +00:00
|
|
|
|
|
|
|
|
2020-06-06 22:51:55 +01:00
|
|
|
def Load_QMs():
|
2020-07-04 01:10:53 +01:00
|
|
|
deleteQMs()
|
2023-01-19 21:18:42 +00:00
|
|
|
n204 = parseCaveQMs(cave="204-steinBH", inputFile=r"1623/204/qm.csv")
|
|
|
|
n234 = parseCaveQMs(cave="234-Hauch", inputFile=r"1623/234/qm.csv")
|
|
|
|
n161 = parseCaveQMs(cave="161-KH", inputFile="1623/161/qmtodo.htm", ticked=False)
|
|
|
|
t161 = parseCaveQMs(cave="161-KH", inputFile="1623/161/qmdone.htm", ticked=True)
|
|
|
|
# parseCaveQMs(cave='balkonhoehle',inputFile=r"1623/264/qm.csv")
|
2022-07-20 12:44:56 +01:00
|
|
|
print(f" - Imported: {n204} QMs for 204, {n234} QMs for 234, {t161} QMs for 161 done, {n161} QMs for 161 not done.")
|
2022-07-05 14:30:42 +01:00
|
|
|
|
2023-01-19 21:18:42 +00:00
|
|
|
print()
|