2009-05-13 05:24:37 +01:00
import csv
2020-05-28 02:20:50 +01:00
import os
import re
2020-06-23 23:34:08 +01:00
from datetime import date
2020-05-28 02:20:50 +01:00
from django . conf import settings
2021-04-13 00:43:57 +01:00
from troggle . core . models . troggle import DataIssue
2021-04-13 00:47:17 +01:00
from troggle . core . models . caves import QM , Cave , LogbookEntry
2021-04-13 00:11:08 +01:00
from troggle . core . utils import save_carefully
2020-05-28 02:20:50 +01:00
2021-04-13 01:37:42 +01:00
''' Reads the CSV files containg QMs for a select few caves '''
2009-05-13 05:24:37 +01:00
2009-05-13 06:15:48 +01:00
def deleteQMs ( ) :
QM . objects . all ( ) . delete ( )
2020-07-04 01:10:53 +01:00
DataIssue . objects . filter ( parser = ' QMs ' ) . delete ( )
2009-05-13 05:24:37 +01:00
2009-05-13 06:15:48 +01:00
def parseCaveQMs ( cave , inputFile ) :
2020-07-04 01:10:53 +01:00
""" Runs through the CSV file at inputFile (which is a relative path from expoweb) and
saves each QM as a QM instance .
This is creating and linking a Placeholder logbookentry dated 1 st Jan . in the relevant
year . This is pointless but it is needed because found_by is a ForeignKey in the db
and we can ' t be arsed to fudge this properly with a null.(July 2020)
Linking to a passage in a SVX file might be more interesting as the QM does sometimes
have the passage name , e . g . in 204 / qm . csv
C2000 - 204 - 39 B Tree Pitch in Cave Tree treeumphant .28 Gosser Streamway
The CSV file does not have the exact date for the QM , only the year , so links to
2022-07-06 15:35:08 +01:00
survex files might be ambiguous . But potentially useful ?
Much of this code assumes that QMs are edited using troggle . This is not done so this code can be deleted .
All QMs are created afresh and this is all only run once on import on a fresh database .
"""
2009-05-13 05:24:37 +01:00
2020-06-19 16:39:05 +01:00
if cave == ' 204-steinBH ' :
2009-05-13 05:25:17 +01:00
try :
2009-05-13 06:15:48 +01:00
steinBr = Cave . objects . get ( official_name = " Steinbrückenhöhle " )
2022-07-05 13:38:23 +01:00
caveid = steinBr
2009-05-13 05:25:17 +01:00
except Cave . DoesNotExist :
2022-03-15 20:53:55 +00:00
message = f ' ! - { qmPath } Steinbruckenhoehle is not in the database. Please run cave parser '
print ( message )
DataIssue . objects . create ( parser = ' QMs ' , message = message )
2009-05-13 05:25:17 +01:00
return
2020-06-19 16:39:05 +01:00
elif cave == ' 234-Hauch ' :
2009-05-13 05:25:17 +01:00
try :
2009-05-13 06:15:48 +01:00
hauchHl = Cave . objects . get ( official_name = " Hauchhöhle " )
2022-07-05 13:38:23 +01:00
caveid = hauchHl
2009-05-13 06:15:48 +01:00
except Cave . DoesNotExist :
2022-03-15 20:53:55 +00:00
message = f ' ! - { qmPath } Hauchhoehle is not in the database. Please run cave parser '
print ( message )
DataIssue . objects . create ( parser = ' QMs ' , message = message )
2009-05-13 06:15:48 +01:00
return
2020-06-19 16:39:05 +01:00
elif cave == ' 161-KH ' :
2009-05-13 06:15:48 +01:00
try :
kh = Cave . objects . get ( official_name = " Kaninchenhöhle " )
2022-07-05 13:38:23 +01:00
caveid = kh
2009-05-13 05:25:17 +01:00
except Cave . DoesNotExist :
2022-03-15 20:53:55 +00:00
message = f ' ! - { qmPath } KH is not in the database. Please run cave parser '
print ( message )
DataIssue . objects . create ( parser = ' QMs ' , message = message )
2022-07-05 14:30:42 +01:00
nqms = parse_KH_QMs ( kh , inputFile = inputFile )
return nqms
2009-05-13 06:15:48 +01:00
2021-03-24 00:55:36 +00:00
#qmPath = settings.EXPOWEB+inputFile
2022-07-06 15:35:08 +01:00
qmPath = os . path . join ( settings . EXPOWEB , inputFile ) # why not use the pathlib stuff ?
2021-03-24 00:55:36 +00:00
2009-05-19 06:32:42 +01:00
qmCSVContents = open ( qmPath , ' rU ' )
2009-05-13 05:25:17 +01:00
dialect = csv . Sniffer ( ) . sniff ( qmCSVContents . read ( ) )
qmCSVContents . seek ( 0 , 0 )
qmReader = csv . reader ( qmCSVContents , dialect = dialect )
2020-05-24 01:57:06 +01:00
next ( qmReader ) # Skip header row
2021-05-02 14:50:46 +01:00
n = 0
2022-07-05 14:30:42 +01:00
nqms = 0
2009-05-13 05:24:37 +01:00
for line in qmReader :
2009-05-13 05:25:17 +01:00
try :
2021-05-02 14:50:46 +01:00
n + = 1
2009-05-13 05:25:17 +01:00
year = int ( line [ 0 ] [ 1 : 5 ] )
2021-05-02 14:50:46 +01:00
logslug = f ' PH_ { int ( year ) } _ { int ( n ) : 02d } '
2022-07-06 15:35:08 +01:00
# logbook placeholder code was previously here. No longer needed.
2009-05-13 05:25:17 +01:00
#check if placeholder exists for given year, create it if not
2022-07-06 15:35:08 +01:00
# message = " ! - "+ str(year) + " logbook: placeholder entry for '" + cave + "' created. DUMMY EXPEDITION ID. Should be re-attached to the actual trip."
# if cave=='204-steinBH':
# placeholder, hadToCreate = LogbookEntry.objects.get_or_create(date__year=year, place="204", title="placeholder for QMs in 204", text=message, entry_type="DUMMY", expedition_id=1, defaults={"date": date(year, 1, 1),"cave_slug":str(steinBr), "slug": logslug})
# elif cave=='234-Hauch':
# placeholder, hadToCreate = LogbookEntry.objects.get_or_create(date__year=year, place="234", title="placeholder for QMs in 234", text=message, entry_type="DUMMY", expedition_id=1, defaults={"date": date(year, 1, 1),"cave_slug":str(hauchHl)})
# # if hadToCreate:
# # print(message)
# # DataIssue.objects.create(parser='QMs', message=message)
2009-05-13 05:25:17 +01:00
QMnum = re . match ( r " .*?- \ d*?-X?(?P<numb> \ d*) " , line [ 0 ] ) . group ( " numb " )
newQM = QM ( )
2022-07-06 15:35:08 +01:00
# newQM.found_by=placeholder
2009-05-13 05:25:17 +01:00
newQM . number = QMnum
2022-07-05 13:38:23 +01:00
newQM . cave = caveid
2022-07-06 15:35:08 +01:00
newQM . blockname = " "
2009-05-13 05:25:17 +01:00
if line [ 1 ] == " Dig " :
newQM . grade = " D "
else :
newQM . grade = line [ 1 ]
newQM . area = line [ 2 ]
newQM . location_description = line [ 3 ]
2009-05-13 05:59:40 +01:00
2022-07-06 15:35:08 +01:00
# Troggle will in future (?! - written in 2006) check if QMs are completed by checking if they have a ticked_off_by trip.
2020-07-04 01:10:53 +01:00
# In the table, completion is indicated by the presence of a completion discription.
2009-05-13 05:59:40 +01:00
newQM . completion_description = line [ 4 ]
newQM . nearest_station_description = line [ 5 ]
2022-07-06 15:35:08 +01:00
# if newQM.completion_description:
# newQM.ticked_off_by=placeholder
2009-05-13 05:59:40 +01:00
2009-05-13 05:25:17 +01:00
newQM . comment = line [ 6 ]
2009-05-13 06:15:48 +01:00
try :
2022-07-06 13:38:53 +01:00
# year and number are unique for a cave in CSV imports
2009-05-13 06:15:48 +01:00
preexistingQM = QM . objects . get ( number = QMnum , found_by__date__year = year ) #if we don't have this one in the DB, save it
2022-07-06 13:38:53 +01:00
if preexistingQM . new_since_parsing == False : #if the pre-existing QM has not been modified, overwrite it - VERY OLD THING
2009-05-13 06:15:48 +01:00
preexistingQM . delete ( )
2022-07-06 13:38:53 +01:00
newQM . expoyear = year
2009-05-13 06:15:48 +01:00
newQM . save ( )
else : # otherwise, print that it was ignored
2020-05-31 19:21:54 +01:00
print ( ( " - preserving " + str ( preexistingQM ) + " , which was edited in admin \r " ) )
2009-05-13 06:15:48 +01:00
except QM . DoesNotExist : #if there is no pre-existing QM, save the new one
2022-07-06 13:38:53 +01:00
newQM . expoyear = year
2009-05-13 06:15:48 +01:00
newQM . save ( )
2022-07-05 14:30:42 +01:00
nqms + = 1
2009-05-13 06:15:48 +01:00
except KeyError : #check on this one
2022-03-15 20:53:55 +00:00
message = f ' ! - { qmPath } KeyError { str ( line ) } '
print ( message )
DataIssue . objects . create ( parser = ' QMs ' , message = message )
2009-05-13 05:25:17 +01:00
continue
2009-07-02 04:10:51 +01:00
except IndexError :
2022-03-15 20:53:55 +00:00
message = f ' ! - { qmPath } IndexError { str ( line ) } '
print ( message )
DataIssue . objects . create ( parser = ' QMs ' , message = message )
2009-07-02 04:10:51 +01:00
continue
2022-07-05 14:30:42 +01:00
return nqms
2009-05-13 05:24:37 +01:00
2009-05-13 06:15:48 +01:00
def parse_KH_QMs ( kh , inputFile ) :
2022-07-05 13:38:23 +01:00
""" import QMs from the 1623-161 (Kaninchenhohle) html pages, different format
2009-05-13 06:15:48 +01:00
"""
2021-03-24 00:55:36 +00:00
khQMs = open ( os . path . join ( settings . EXPOWEB , inputFile ) , ' r ' )
2009-05-13 06:15:48 +01:00
khQMs = khQMs . readlines ( )
2022-07-05 14:30:42 +01:00
nqms = 0
2009-05-13 06:15:48 +01:00
for line in khQMs :
2021-04-13 00:11:08 +01:00
res = re . search ( r ' name= \ " [CB](?P<year> \ d*)-(?P<cave> \ d*)-(?P<number> \ d*).*</a> (?P<grade>[ABDCV])<dd>(?P<description>.*) \ [(?P<nearest_station>.*) \ ] ' , line )
2009-05-13 06:15:48 +01:00
if res :
res = res . groupdict ( )
year = int ( res [ ' year ' ] )
2022-07-06 15:35:08 +01:00
# logbook placeholder code was previously here. No longer needed.
2020-06-19 16:39:05 +01:00
#check if placeholder exists for given year, create it if not
2022-07-06 15:35:08 +01:00
# message = " ! - "+ str(year) + " logbook: placeholder entry for '161 KH' created. DUMMY EXPEDITION ID. Should be re-attached to the actual trip."
# placeholder, hadToCreate = LogbookEntry.objects.get_or_create(date__year=year, place="161", title="placeholder for QMs in 161", text=message, entry_type="DUMMY", expedition_id=1, defaults={"date": date((year), 1, 1),"cave_slug":str(kh)})
# # if hadToCreate:
2020-07-04 01:10:53 +01:00
# print(message)
# DataIssue.objects.create(parser='QMs', message=message)
2009-05-13 06:15:48 +01:00
lookupArgs = {
2022-07-06 15:35:08 +01:00
#'found_by':placeholder,
' blockname ' : " " ,
2022-07-06 13:38:53 +01:00
' expoyear ' : year ,
' number ' : res [ ' number ' ] ,
' cave ' : kh ,
' grade ' : res [ ' grade ' ]
2009-05-13 06:15:48 +01:00
}
nonLookupArgs = {
2019-03-31 15:39:53 +01:00
' nearest_station_name ' : res [ ' nearest_station ' ] ,
2009-05-13 06:15:48 +01:00
' location_description ' : res [ ' description ' ]
}
2022-07-06 13:38:53 +01:00
instance , created = save_carefully ( QM , lookupArgs , nonLookupArgs )
2022-07-06 15:35:08 +01:00
# if created:
# message = " ! - "+ instance.code() + " QM entry for '161 KH' created. "
# print(message)
# DataIssue.objects.create(parser='QMs', message=message)
2022-07-05 14:30:42 +01:00
nqms + = 1
return nqms
2009-05-13 06:15:48 +01:00
2020-06-06 22:51:55 +01:00
def Load_QMs ( ) :
2020-07-04 01:10:53 +01:00
deleteQMs ( )
2022-07-05 14:30:42 +01:00
n204 = parseCaveQMs ( cave = ' 204-steinBH ' , inputFile = r " 1623/204/qm.csv " )
n234 = parseCaveQMs ( cave = ' 234-Hauch ' , inputFile = r " 1623/234/qm.csv " )
n161 = parseCaveQMs ( cave = ' 161-KH ' , inputFile = " 1623/161/qmtodo.htm " )
2020-06-06 22:51:55 +01:00
#parseCaveQMs(cave='balkonhoehle',inputFile=r"1623/264/qm.csv")
2022-07-05 14:30:42 +01:00
print ( f " - Imported: { n204 } QMs for 204, { n234 } QMs for 234, { n161 } QMs for 161. " )
print ( )