2009-05-13 05:27:43 +01:00
import sys
import os
import types
2009-05-13 06:23:57 +01:00
#sys.path.append('C:\\Expo\\expoweb')
#from troggle import *
#os.environ['DJANGO_SETTINGS_MODULE']='troggle.settings'
2009-05-13 05:27:43 +01:00
import troggle . settings as settings
2009-05-13 06:22:53 +01:00
from troggle . expo . models import *
2009-05-13 06:23:57 +01:00
from PIL import Image
2009-05-13 05:27:43 +01:00
#import settings
#import expo.models as models
import csv
import re
import datetime
2009-05-13 06:23:57 +01:00
def readSurveysFromCSV ( logfile = None ) :
2009-05-13 06:22:53 +01:00
try :
surveytab = open ( os . path . join ( settings . SURVEYS , " Surveys.csv " ) )
except IOError :
import cStringIO , urllib
surveytab = cStringIO . StringIO ( urllib . urlopen ( settings . SURVEYS + " download/Surveys.csv " ) . read ( ) )
dialect = csv . Sniffer ( ) . sniff ( surveytab . read ( ) )
surveytab . seek ( 0 , 0 )
surveyreader = csv . reader ( surveytab , dialect = dialect )
headers = surveyreader . next ( )
header = dict ( zip ( headers , range ( len ( headers ) ) ) ) #set up a dictionary where the indexes are header names and the values are column numbers
2009-05-13 05:27:43 +01:00
2009-05-13 06:22:53 +01:00
# test if the expeditions have been added yet
if Expedition . objects . count ( ) == 0 :
print " There are no expeditions in the database. Please run the logbook parser. "
sys . exit ( )
2009-05-13 06:23:57 +01:00
if logfile :
logfile . write ( " Deleting all scanned images " )
2009-05-13 06:22:53 +01:00
ScannedImage . objects . all ( ) . delete ( )
2009-05-13 06:23:57 +01:00
if logfile :
logfile . write ( " Deleting all survey objects " )
2009-05-13 06:22:53 +01:00
Survey . objects . all ( ) . delete ( )
2009-05-13 06:23:57 +01:00
if logfile :
logfile . write ( " Beginning to import surveys from " + str ( os . path . join ( settings . SURVEYS , " Surveys.csv " ) ) + " \n " + " - " * 60 + " \n " )
2009-05-13 06:22:53 +01:00
for survey in surveyreader :
walletNumberLetter = re . match ( r ' (?P<number> \ d*)(?P<letter>[a-zA-Z]*) ' , survey [ header [ ' Survey Number ' ] ] ) #I hate this, but some surveys have a letter eg 2000#34a. This line deals with that.
# print walletNumberLetter.groups()
2009-05-13 05:58:18 +01:00
2009-05-13 06:22:53 +01:00
surveyobj = Survey (
expedition = Expedition . objects . filter ( year = survey [ header [ ' Year ' ] ] ) [ 0 ] ,
wallet_number = walletNumberLetter . group ( ' number ' ) ,
2009-05-13 05:27:43 +01:00
2009-05-13 06:22:53 +01:00
comments = survey [ header [ ' Comments ' ] ] ,
location = survey [ header [ ' Location ' ] ]
)
surveyobj . wallet_letter = walletNumberLetter . group ( ' letter ' )
if survey [ header [ ' Finished ' ] ] == ' Yes ' :
#try and find the sketch_scan
pass
surveyobj . save ( )
2009-05-13 06:23:57 +01:00
if logfile :
logfile . write ( " added survey " + survey [ header [ ' Year ' ] ] + " # " + surveyobj . wallet_number + " \r " )
2009-05-13 05:58:18 +01:00
def listdir ( * directories ) :
try :
return os . listdir ( os . path . join ( settings . SURVEYS , * directories ) )
except :
import urllib
url = settings . SURVEYS + reduce ( lambda x , y : x + " / " + y , [ " listdir " ] + list ( directories ) )
folders = urllib . urlopen ( url . replace ( " # " , " % 23 " ) ) . readlines ( )
return [ folder . rstrip ( r " / " ) for folder in folders ]
2009-05-13 05:27:43 +01:00
# add survey scans
2009-05-13 06:23:57 +01:00
def parseSurveyScans ( year , logfile = None ) :
2009-05-13 06:22:53 +01:00
# yearFileList = listdir(year.year)
yearPath = os . path . join ( settings . SURVEY_SCANS , year . year )
yearFileList = os . listdir ( yearPath )
print yearFileList
2009-05-13 05:52:15 +01:00
for surveyFolder in yearFileList :
2009-05-13 05:27:43 +01:00
try :
surveyNumber = re . match ( r ' \ d \ d \ d \ d#0*( \ d+) ' , surveyFolder ) . groups ( )
2009-05-13 06:22:53 +01:00
# scanList = listdir(year.year, surveyFolder)
scanList = os . listdir ( os . path . join ( yearPath , surveyFolder ) )
2009-05-13 05:27:43 +01:00
except AttributeError :
2009-05-13 05:52:59 +01:00
print surveyFolder + " ignored " ,
2009-05-13 05:27:43 +01:00
continue
for scan in scanList :
try :
2009-05-13 05:31:21 +01:00
scanChopped = re . match ( r ' (?i).*(notes|elev|plan|elevation|extend)( \ d*) \ .(png|jpg|jpeg) ' , scan ) . groups ( )
2009-05-13 05:27:43 +01:00
scanType , scanNumber , scanFormat = scanChopped
except AttributeError :
2009-05-13 06:22:53 +01:00
print scan + " ignored \r " ,
2009-05-13 05:27:43 +01:00
continue
2009-05-13 06:22:53 +01:00
if scanType == ' elev ' or scanType == ' extend ' :
scanType = ' elevation '
2009-05-13 05:31:21 +01:00
2009-05-13 05:27:43 +01:00
if scanNumber == ' ' :
scanNumber = 1
if type ( surveyNumber ) == types . TupleType :
surveyNumber = surveyNumber [ 0 ]
try :
2009-05-13 06:22:53 +01:00
survey = Survey . objects . get_or_create ( wallet_number = surveyNumber , expedition = year ) [ 0 ]
except Survey . MultipleObjectsReturned :
survey = Survey . objects . filter ( wallet_number = surveyNumber , expedition = year ) [ 0 ]
2009-05-13 06:23:57 +01:00
file = os . path . join ( year . year , surveyFolder , scan )
2009-05-13 06:22:53 +01:00
scanObj = ScannedImage (
2009-05-13 06:23:57 +01:00
file = file ,
2009-05-13 05:27:43 +01:00
contents = scanType ,
number_in_wallet = scanNumber ,
2009-05-13 06:23:57 +01:00
survey = survey ,
new_since_parsing = False ,
2009-05-13 05:27:43 +01:00
)
2009-05-13 05:52:15 +01:00
#print "Added scanned image at " + str(scanObj)
2009-05-13 06:23:57 +01:00
if scanFormat == " png " :
if isInterlacedPNG ( os . path . join ( settings . SURVEY_SCANS , file ) ) :
print file + " is an interlaced PNG. No can do. "
continue
2009-05-13 05:27:43 +01:00
scanObj . save ( )
2009-05-13 06:22:53 +01:00
2009-05-13 06:23:57 +01:00
def parseSurveys ( logfile = None ) :
2009-05-13 06:22:53 +01:00
readSurveysFromCSV ( )
for year in Expedition . objects . filter ( year__gte = 2000 ) : #expos since 2000, because paths and filenames were nonstandard before then
parseSurveyScans ( year )
2009-05-13 06:23:57 +01:00
def isInterlacedPNG ( filePath ) : #We need to check for interlaced PNGs because the thumbnail engine can't handle them (uses PIL)
file = Image . open ( filePath )
return file . info [ ' interlace ' ]