troggle-unchained/parsers/QMs.py

145 lines
6.5 KiB
Python
Raw Normal View History

import csv
2020-05-28 02:20:50 +01:00
import os
import re
from datetime import date
2020-05-28 02:20:50 +01:00
from django.conf import settings
2021-04-13 00:43:57 +01:00
from troggle.core.models.troggle import DataIssue
2021-04-13 00:47:17 +01:00
from troggle.core.models.caves import QM, Cave, LogbookEntry
2021-04-13 00:11:08 +01:00
from troggle.core.utils import save_carefully
2020-05-28 02:20:50 +01:00
2021-04-13 01:37:42 +01:00
'''Reads the CSV files containg QMs for a select few caves'''
def deleteQMs():
QM.objects.all().delete()
DataIssue.objects.filter(parser='QMs').delete()
def parseCaveQMs(cave,inputFile):
"""Runs through the CSV file at inputFile (which is a relative path from expoweb) and
saves each QM as a QM instance.
This is creating and linking a Placeholder logbookentry dated 1st Jan. in the relevant
year. This is pointless but it is needed because found_by is a ForeignKey in the db
and we can't be arsed to fudge this properly with a null.(July 2020)
Linking to a passage in a SVX file might be more interesting as the QM does sometimes
have the passage name, e.g. in 204/qm.csv
C2000-204-39 B Tree Pitch in Cave Tree treeumphant.28 Gosser Streamway
The CSV file does not have the exact date for the QM, only the year, so links to
survex files might be ambiguous. But potentially useful?"""
2020-06-19 16:39:05 +01:00
if cave=='204-steinBH':
try:
steinBr=Cave.objects.get(official_name="Steinbrückenhöhle")
except Cave.DoesNotExist:
2020-05-28 02:20:50 +01:00
print("Steinbruckenhoehle is not in the database. Please run parsers.")
return
2020-06-19 16:39:05 +01:00
elif cave=='234-Hauch':
try:
hauchHl=Cave.objects.get(official_name="Hauchhöhle")
except Cave.DoesNotExist:
2020-06-19 16:39:05 +01:00
print("Hauchhoehle is not in the database. Please run parsers.")
return
2020-06-19 16:39:05 +01:00
elif cave =='161-KH':
try:
kh=Cave.objects.get(official_name="Kaninchenhöhle")
except Cave.DoesNotExist:
2020-05-28 02:20:50 +01:00
print("KH is not in the database. Please run parsers.")
parse_KH_QMs(kh, inputFile=inputFile)
return
2021-03-24 00:55:36 +00:00
#qmPath = settings.EXPOWEB+inputFile
qmPath = os.path.join(settings.EXPOWEB, inputFile)
2009-05-19 06:32:42 +01:00
qmCSVContents = open(qmPath,'rU')
dialect=csv.Sniffer().sniff(qmCSVContents.read())
qmCSVContents.seek(0,0)
qmReader = csv.reader(qmCSVContents,dialect=dialect)
2020-05-24 01:57:06 +01:00
next(qmReader) # Skip header row
2021-05-02 14:50:46 +01:00
n = 0
for line in qmReader:
try:
2021-05-02 14:50:46 +01:00
n += 1
year=int(line[0][1:5])
2021-05-02 14:50:46 +01:00
logslug = f'PH_{int(year)}_{int(n):02d}'
#check if placeholder exists for given year, create it if not
2020-06-19 16:39:05 +01:00
message = " ! - "+ str(year) + " logbook: placeholder entry for '" + cave + "' created. Should be re-attached to the actual trip."
if cave=='204-steinBH':
2021-05-02 14:50:46 +01:00
placeholder, hadToCreate = LogbookEntry.objects.get_or_create(date__year=year, place="204", title="placeholder for QMs in 204", text=message, defaults={"date": date(year, 1, 1),"cave_slug":str(steinBr), "slug": logslug})
2020-06-19 16:39:05 +01:00
elif cave=='234-Hauch':
placeholder, hadToCreate = LogbookEntry.objects.get_or_create(date__year=year, place="234", title="placeholder for QMs in 234", text=message, defaults={"date": date(year, 1, 1),"cave_slug":str(hauchHl)})
# if hadToCreate:
# print(message)
# DataIssue.objects.create(parser='QMs', message=message)
QMnum=re.match(r".*?-\d*?-X?(?P<numb>\d*)",line[0]).group("numb")
newQM = QM()
newQM.found_by=placeholder
newQM.number=QMnum
if line[1]=="Dig":
newQM.grade="D"
else:
newQM.grade=line[1]
newQM.area=line[2]
newQM.location_description=line[3]
# Troggle checks if QMs are completed by checking if they have a ticked_off_by trip.
# In the table, completion is indicated by the presence of a completion discription.
newQM.completion_description=line[4]
newQM.nearest_station_description=line[5]
if newQM.completion_description:
newQM.ticked_off_by=placeholder
newQM.comment=line[6]
try:
preexistingQM=QM.objects.get(number=QMnum, found_by__date__year=year) #if we don't have this one in the DB, save it
if preexistingQM.new_since_parsing==False: #if the pre-existing QM has not been modified, overwrite it
preexistingQM.delete()
newQM.save()
else: # otherwise, print that it was ignored
2020-05-31 19:21:54 +01:00
print((" - preserving " + str(preexistingQM) + ", which was edited in admin \r"))
except QM.DoesNotExist: #if there is no pre-existing QM, save the new one
newQM.save()
except KeyError: #check on this one
continue
except IndexError:
print("Index error in " + str(line))
continue
def parse_KH_QMs(kh, inputFile):
"""import QMs from the 1623-161 (Kaninchenh<6E>hle) html pages
"""
2021-03-24 00:55:36 +00:00
khQMs=open(os.path.join(settings.EXPOWEB, inputFile),'r')
khQMs=khQMs.readlines()
for line in khQMs:
2021-04-13 00:11:08 +01:00
res=re.search(r'name=\"[CB](?P<year>\d*)-(?P<cave>\d*)-(?P<number>\d*).*</a> (?P<grade>[ABDCV])<dd>(?P<description>.*)\[(?P<nearest_station>.*)\]',line)
if res:
res=res.groupdict()
year=int(res['year'])
2020-06-19 16:39:05 +01:00
#check if placeholder exists for given year, create it if not
message = " ! - "+ str(year) + " logbook: placeholder entry for '161 KH' created. Should be re-attached to the actual trip."
placeholder, hadToCreate = LogbookEntry.objects.get_or_create(date__year=year, place="161", title="placeholder for QMs in 161", text=message, defaults={"date": date((year), 1, 1),"cave_slug":str(kh)})
# if hadToCreate:
# print(message)
# DataIssue.objects.create(parser='QMs', message=message)
lookupArgs={
'found_by':placeholder,
'number':res['number']
}
nonLookupArgs={
'grade':res['grade'],
'nearest_station_name':res['nearest_station'],
'location_description':res['description']
}
save_carefully(QM,lookupArgs,nonLookupArgs)
2020-06-06 22:51:55 +01:00
def Load_QMs():
deleteQMs()
2020-06-19 16:39:05 +01:00
parseCaveQMs(cave='204-steinBH',inputFile=r"1623/204/qm.csv")
parseCaveQMs(cave='234-Hauch',inputFile=r"1623/234/qm.csv")
parseCaveQMs(cave='161-KH', inputFile="1623/161/qmtodo.htm")
2020-06-06 22:51:55 +01:00
#parseCaveQMs(cave='balkonhoehle',inputFile=r"1623/264/qm.csv")