2009-05-13 05:24:37 +01:00
import csv
2020-05-28 02:20:50 +01:00
import os
import re
2020-06-23 23:34:08 +01:00
from datetime import date
2020-05-28 02:20:50 +01:00
from django . conf import settings
2021-04-13 00:43:57 +01:00
from troggle . core . models . troggle import DataIssue
2021-04-13 00:47:17 +01:00
from troggle . core . models . caves import QM , Cave , LogbookEntry
2021-04-13 00:11:08 +01:00
from troggle . core . utils import save_carefully
2020-05-28 02:20:50 +01:00
2021-04-13 01:37:42 +01:00
''' Reads the CSV files containg QMs for a select few caves '''
2009-05-13 05:24:37 +01:00
2009-05-13 06:15:48 +01:00
def deleteQMs ( ) :
QM . objects . all ( ) . delete ( )
2020-07-04 01:10:53 +01:00
DataIssue . objects . filter ( parser = ' QMs ' ) . delete ( )
2009-05-13 05:24:37 +01:00
2009-05-13 06:15:48 +01:00
def parseCaveQMs ( cave , inputFile ) :
2020-07-04 01:10:53 +01:00
""" Runs through the CSV file at inputFile (which is a relative path from expoweb) and
saves each QM as a QM instance .
This is creating and linking a Placeholder logbookentry dated 1 st Jan . in the relevant
year . This is pointless but it is needed because found_by is a ForeignKey in the db
and we can ' t be arsed to fudge this properly with a null.(July 2020)
Linking to a passage in a SVX file might be more interesting as the QM does sometimes
have the passage name , e . g . in 204 / qm . csv
C2000 - 204 - 39 B Tree Pitch in Cave Tree treeumphant .28 Gosser Streamway
The CSV file does not have the exact date for the QM , only the year , so links to
survex files might be ambiguous . But potentially useful ? """
2009-05-13 05:24:37 +01:00
2020-06-19 16:39:05 +01:00
if cave == ' 204-steinBH ' :
2009-05-13 05:25:17 +01:00
try :
2009-05-13 06:15:48 +01:00
steinBr = Cave . objects . get ( official_name = " Steinbrückenhöhle " )
2009-05-13 05:25:17 +01:00
except Cave . DoesNotExist :
2022-03-15 20:53:55 +00:00
message = f ' ! - { qmPath } Steinbruckenhoehle is not in the database. Please run cave parser '
print ( message )
DataIssue . objects . create ( parser = ' QMs ' , message = message )
2009-05-13 05:25:17 +01:00
return
2020-06-19 16:39:05 +01:00
elif cave == ' 234-Hauch ' :
2009-05-13 05:25:17 +01:00
try :
2009-05-13 06:15:48 +01:00
hauchHl = Cave . objects . get ( official_name = " Hauchhöhle " )
except Cave . DoesNotExist :
2022-03-15 20:53:55 +00:00
message = f ' ! - { qmPath } Hauchhoehle is not in the database. Please run cave parser '
print ( message )
DataIssue . objects . create ( parser = ' QMs ' , message = message )
2009-05-13 06:15:48 +01:00
return
2020-06-19 16:39:05 +01:00
elif cave == ' 161-KH ' :
2009-05-13 06:15:48 +01:00
try :
kh = Cave . objects . get ( official_name = " Kaninchenhöhle " )
2009-05-13 05:25:17 +01:00
except Cave . DoesNotExist :
2022-03-15 20:53:55 +00:00
message = f ' ! - { qmPath } KH is not in the database. Please run cave parser '
print ( message )
DataIssue . objects . create ( parser = ' QMs ' , message = message )
2009-05-17 04:31:23 +01:00
parse_KH_QMs ( kh , inputFile = inputFile )
2009-05-13 06:15:48 +01:00
return
2021-03-24 00:55:36 +00:00
#qmPath = settings.EXPOWEB+inputFile
qmPath = os . path . join ( settings . EXPOWEB , inputFile )
2009-05-19 06:32:42 +01:00
qmCSVContents = open ( qmPath , ' rU ' )
2009-05-13 05:25:17 +01:00
dialect = csv . Sniffer ( ) . sniff ( qmCSVContents . read ( ) )
qmCSVContents . seek ( 0 , 0 )
qmReader = csv . reader ( qmCSVContents , dialect = dialect )
2020-05-24 01:57:06 +01:00
next ( qmReader ) # Skip header row
2021-05-02 14:50:46 +01:00
n = 0
2009-05-13 05:24:37 +01:00
for line in qmReader :
2009-05-13 05:25:17 +01:00
try :
2021-05-02 14:50:46 +01:00
n + = 1
2009-05-13 05:25:17 +01:00
year = int ( line [ 0 ] [ 1 : 5 ] )
2021-05-02 14:50:46 +01:00
logslug = f ' PH_ { int ( year ) } _ { int ( n ) : 02d } '
2009-05-13 05:25:17 +01:00
#check if placeholder exists for given year, create it if not
2022-06-22 21:08:32 +01:00
message = " ! - " + str ( year ) + " logbook: placeholder entry for ' " + cave + " ' created. DUMMY EXPEDITION ID. Should be re-attached to the actual trip. "
2020-06-19 16:39:05 +01:00
if cave == ' 204-steinBH ' :
2022-06-22 21:08:32 +01:00
placeholder , hadToCreate = LogbookEntry . objects . get_or_create ( date__year = year , place = " 204 " , title = " placeholder for QMs in 204 " , text = message , entry_type = " DUMMY " , expedition_id = 1 , defaults = { " date " : date ( year , 1 , 1 ) , " cave_slug " : str ( steinBr ) , " slug " : logslug } )
2020-06-19 16:39:05 +01:00
elif cave == ' 234-Hauch ' :
2022-06-22 21:08:32 +01:00
placeholder , hadToCreate = LogbookEntry . objects . get_or_create ( date__year = year , place = " 234 " , title = " placeholder for QMs in 234 " , text = message , entry_type = " DUMMY " , expedition_id = 1 , defaults = { " date " : date ( year , 1 , 1 ) , " cave_slug " : str ( hauchHl ) } )
2020-07-04 01:10:53 +01:00
# if hadToCreate:
# print(message)
# DataIssue.objects.create(parser='QMs', message=message)
2009-05-13 05:25:17 +01:00
QMnum = re . match ( r " .*?- \ d*?-X?(?P<numb> \ d*) " , line [ 0 ] ) . group ( " numb " )
newQM = QM ( )
newQM . found_by = placeholder
newQM . number = QMnum
if line [ 1 ] == " Dig " :
newQM . grade = " D "
else :
newQM . grade = line [ 1 ]
newQM . area = line [ 2 ]
newQM . location_description = line [ 3 ]
2009-05-13 05:59:40 +01:00
2020-07-04 01:10:53 +01:00
# Troggle checks if QMs are completed by checking if they have a ticked_off_by trip.
# In the table, completion is indicated by the presence of a completion discription.
2009-05-13 05:59:40 +01:00
newQM . completion_description = line [ 4 ]
newQM . nearest_station_description = line [ 5 ]
2020-07-04 01:10:53 +01:00
if newQM . completion_description :
2009-05-13 05:59:40 +01:00
newQM . ticked_off_by = placeholder
2009-05-13 05:25:17 +01:00
newQM . comment = line [ 6 ]
2009-05-13 06:15:48 +01:00
try :
preexistingQM = QM . objects . get ( number = QMnum , found_by__date__year = year ) #if we don't have this one in the DB, save it
if preexistingQM . new_since_parsing == False : #if the pre-existing QM has not been modified, overwrite it
preexistingQM . delete ( )
newQM . save ( )
else : # otherwise, print that it was ignored
2020-05-31 19:21:54 +01:00
print ( ( " - preserving " + str ( preexistingQM ) + " , which was edited in admin \r " ) )
2009-05-13 06:15:48 +01:00
except QM . DoesNotExist : #if there is no pre-existing QM, save the new one
newQM . save ( )
except KeyError : #check on this one
2022-03-15 20:53:55 +00:00
message = f ' ! - { qmPath } KeyError { str ( line ) } '
print ( message )
DataIssue . objects . create ( parser = ' QMs ' , message = message )
2009-05-13 05:25:17 +01:00
continue
2009-07-02 04:10:51 +01:00
except IndexError :
2022-03-15 20:53:55 +00:00
message = f ' ! - { qmPath } IndexError { str ( line ) } '
print ( message )
DataIssue . objects . create ( parser = ' QMs ' , message = message )
2009-07-02 04:10:51 +01:00
continue
2009-05-13 05:24:37 +01:00
2009-05-13 06:15:48 +01:00
def parse_KH_QMs ( kh , inputFile ) :
2018-04-15 16:28:13 +01:00
""" import QMs from the 1623-161 (Kaninchenh<6E> hle) html pages
2009-05-13 06:15:48 +01:00
"""
2021-03-24 00:55:36 +00:00
khQMs = open ( os . path . join ( settings . EXPOWEB , inputFile ) , ' r ' )
2009-05-13 06:15:48 +01:00
khQMs = khQMs . readlines ( )
for line in khQMs :
2021-04-13 00:11:08 +01:00
res = re . search ( r ' name= \ " [CB](?P<year> \ d*)-(?P<cave> \ d*)-(?P<number> \ d*).*</a> (?P<grade>[ABDCV])<dd>(?P<description>.*) \ [(?P<nearest_station>.*) \ ] ' , line )
2009-05-13 06:15:48 +01:00
if res :
res = res . groupdict ( )
year = int ( res [ ' year ' ] )
2020-06-19 16:39:05 +01:00
#check if placeholder exists for given year, create it if not
2022-06-22 21:08:32 +01:00
message = " ! - " + str ( year ) + " logbook: placeholder entry for ' 161 KH ' created. DUMMY EXPEDITION ID. Should be re-attached to the actual trip. "
placeholder , hadToCreate = LogbookEntry . objects . get_or_create ( date__year = year , place = " 161 " , title = " placeholder for QMs in 161 " , text = message , entry_type = " DUMMY " , expedition_id = 1 , defaults = { " date " : date ( ( year ) , 1 , 1 ) , " cave_slug " : str ( kh ) } )
2020-07-04 01:10:53 +01:00
# if hadToCreate:
# print(message)
# DataIssue.objects.create(parser='QMs', message=message)
2009-05-13 06:15:48 +01:00
lookupArgs = {
' found_by ' : placeholder ,
' number ' : res [ ' number ' ]
}
nonLookupArgs = {
' grade ' : res [ ' grade ' ] ,
2019-03-31 15:39:53 +01:00
' nearest_station_name ' : res [ ' nearest_station ' ] ,
2009-05-13 06:15:48 +01:00
' location_description ' : res [ ' description ' ]
}
2009-05-17 04:31:23 +01:00
2009-05-13 06:15:48 +01:00
save_carefully ( QM , lookupArgs , nonLookupArgs )
2020-06-06 22:51:55 +01:00
def Load_QMs ( ) :
2020-07-04 01:10:53 +01:00
deleteQMs ( )
2020-06-19 16:39:05 +01:00
parseCaveQMs ( cave = ' 204-steinBH ' , inputFile = r " 1623/204/qm.csv " )
parseCaveQMs ( cave = ' 234-Hauch ' , inputFile = r " 1623/234/qm.csv " )
parseCaveQMs ( cave = ' 161-KH ' , inputFile = " 1623/161/qmtodo.htm " )
2020-06-06 22:51:55 +01:00
#parseCaveQMs(cave='balkonhoehle',inputFile=r"1623/264/qm.csv")