diff --git a/parsers/surveys.py b/parsers/surveys.py index 450725c..ec6298c 100644 --- a/parsers/surveys.py +++ b/parsers/surveys.py @@ -1,16 +1,21 @@ -import sys, os, types, logging, stat -#sys.path.append('C:\\Expo\\expoweb') -#from troggle import * -#os.environ['DJANGO_SETTINGS_MODULE']='troggle.settings' -import settings -from troggle.core.models import * -from PIL import Image -#import settings -#import core.models as models +from __future__ import (absolute_import, division, + print_function, unicode_literals) + +import sys +import os +import types +import logging +import stat import csv import re import datetime + +#from PIL import Image from utils import save_carefully +from functools import reduce + +import settings +from troggle.core.models import * def get_or_create_placeholder(year): """ All surveys must be related to a logbookentry. We don't have a way to @@ -24,146 +29,146 @@ def get_or_create_placeholder(year): placeholder_logbook_entry, newly_created = save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs) return placeholder_logbook_entry -# dead -def readSurveysFromCSV(): - try: # could probably combine these two - surveytab = open(os.path.join(settings.SURVEY_SCANS, "Surveys.csv")) - except IOError: - import cStringIO, urllib - surveytab = cStringIO.StringIO(urllib.urlopen(settings.SURVEY_SCANS + "/Surveys.csv").read()) - dialect=csv.Sniffer().sniff(surveytab.read()) - surveytab.seek(0,0) - surveyreader = csv.reader(surveytab,dialect=dialect) - headers = surveyreader.next() - header = dict(zip(headers, range(len(headers)))) #set up a dictionary where the indexes are header names and the values are column numbers +# obsolete surveys.csv does not exist. +# def readSurveysFromCSV(): + # try: # could probably combine these two + # surveytab = open(os.path.join(settings.SURVEY_SCANS, "Surveys.csv")) + # except IOError: + # import io, urllib.request, urllib.parse, urllib.error + # surveytab = io.StringIO(urllib.request.urlopen(settings.SURVEY_SCANS + "/Surveys.csv").read()) + # dialect=csv.Sniffer().sniff(surveytab.read()) + # surveytab.seek(0,0) + # surveyreader = csv.reader(surveytab,dialect=dialect) + # headers = next(surveyreader) + # header = dict(list(zip(headers, list(range(len(headers)))))) #set up a dictionary where the indexes are header names and the values are column numbers - # test if the expeditions have been added yet - if Expedition.objects.count()==0: - print("There are no expeditions in the database. Please run the logbook parser.") - sys.exit() + # # test if the expeditions have been added yet + # if Expedition.objects.count()==0: + # print("There are no expeditions in the database. Please run the logbook parser.") + # sys.exit() - logging.info("Deleting all scanned images") - ScannedImage.objects.all().delete() + # logging.info("Deleting all scanned images") + # ScannedImage.objects.all().delete() - logging.info("Deleting all survey objects") - Survey.objects.all().delete() + # logging.info("Deleting all survey objects") + # Survey.objects.all().delete() - logging.info("Beginning to import surveys from "+str(os.path.join(settings.SURVEYS, "Surveys.csv"))+"\n"+"-"*60+"\n") + # logging.info("Beginning to import surveys from "+str(os.path.join(settings.SURVEYS, "Surveys.csv"))+"\n"+"-"*60+"\n") - for survey in surveyreader: - #I hate this, but some surveys have a letter eg 2000#34a. The next line deals with that. - walletNumberLetter = re.match(r'(?P\d*)(?P[a-zA-Z]*)',survey[header['Survey Number']]) - # print(walletNumberLetter.groups()) - year=survey[header['Year']] + # for survey in surveyreader: + # #I hate this, but some surveys have a letter eg 2000#34a. The next line deals with that. + # walletNumberLetter = re.match(r'(?P\d*)(?P[a-zA-Z]*)',survey[header['Survey Number']]) + # # print(walletNumberLetter.groups()) + # year=survey[header['Year']] - surveyobj = Survey( - expedition = Expedition.objects.filter(year=year)[0], - wallet_number = walletNumberLetter.group('number'), - logbook_entry = get_or_create_placeholder(year), - comments = survey[header['Comments']], - location = survey[header['Location']] - ) - surveyobj.wallet_letter = walletNumberLetter.group('letter') - if survey[header['Finished']]=='Yes': - #try and find the sketch_scan - pass - surveyobj.save() + # surveyobj = Survey( + # expedition = Expedition.objects.filter(year=year)[0], + # wallet_number = walletNumberLetter.group('number'), + # logbook_entry = get_or_create_placeholder(year), + # comments = survey[header['Comments']], + # location = survey[header['Location']] + # ) + # surveyobj.wallet_letter = walletNumberLetter.group('letter') + # if survey[header['Finished']]=='Yes': + # #try and find the sketch_scan + # pass + # surveyobj.save() - logging.info("added survey " + survey[header['Year']] + "#" + surveyobj.wallet_number + "\r") + # logging.info("added survey " + survey[header['Year']] + "#" + surveyobj.wallet_number + "\r") # dead def listdir(*directories): try: return os.listdir(os.path.join(settings.SURVEYS, *directories)) except: - import urllib + import urllib.request, urllib.parse, urllib.error url = settings.SURVEYS + reduce(lambda x, y: x + "/" + y, ["listdir"] + list(directories)) - folders = urllib.urlopen(url.replace("#", "%23")).readlines() + folders = urllib.request.urlopen(url.replace("#", "%23")).readlines() return [folder.rstrip(r"/") for folder in folders] # add survey scans -def parseSurveyScans(expedition, logfile=None): -# yearFileList = listdir(expedition.year) - try: - yearPath=os.path.join(settings.SURVEY_SCANS, "surveyscans", expedition.year) - yearFileList=os.listdir(yearPath) - print(yearFileList) - for surveyFolder in yearFileList: - try: - surveyNumber=re.match(r'\d\d\d\d#(X?)0*(\d+)',surveyFolder).groups() - #scanList = listdir(expedition.year, surveyFolder) - scanList=os.listdir(os.path.join(yearPath,surveyFolder)) - except AttributeError: - print("Ignoring file in year folder: " + surveyFolder + "\r") - continue +# def parseSurveyScans(expedition, logfile=None): +# # yearFileList = listdir(expedition.year) + # try: + # yearPath=os.path.join(settings.SURVEY_SCANS, "surveyscans", expedition.year) + # yearFileList=os.listdir(yearPath) + # print(yearFileList) + # for surveyFolder in yearFileList: + # try: + # surveyNumber=re.match(rb'\d\d\d\d#(X?)0*(\d+)',surveyFolder).groups() + # #scanList = listdir(expedition.year, surveyFolder) + # scanList=os.listdir(os.path.join(yearPath,surveyFolder)) + # except AttributeError: + # print(("Ignoring file in year folder: " + surveyFolder + "\r")) + # continue - for scan in scanList: - # Why does this insist on renaming all the scanned image files? - # It produces duplicates names and all images have type .jpg in the scanObj. - # It seems to rely on end users being particularly diligent in filenames which is NGtH - try: - #scanChopped=re.match(r'(?i).*(notes|elev|plan|extend|elevation)-?(\d*)\.(png|jpg|jpeg|pdf)',scan).groups() - scanChopped=re.match(r'(?i)([a-z_-]*\d?[a-z_-]*)(\d*)\.(png|jpg|jpeg|pdf|top|dxf|svg|tdr|th2|xml|txt)',scan).groups() - scanType,scanNumber,scanFormat=scanChopped - except AttributeError: - print("Ignored (bad name format): " + surveyFolder + '/' + scan + "\r") - continue - scanTest = scanType - scanType = 'notes' - match = re.search(r'(?i)(elev|extend)',scanTest) - if match: - scanType = 'elevation' + # for scan in scanList: + # # Why does this insist on renaming all the scanned image files? + # # It produces duplicates names and all images have type .jpg in the scanObj. + # # It seems to rely on end users being particularly diligent in filenames which is NGtH + # try: + # #scanChopped=re.match(rb'(?i).*(notes|elev|plan|extend|elevation)-?(\d*)\.(png|jpg|jpeg|pdf)',scan).groups() + # scanChopped=re.match(rb'(?i)([a-z_-]*\d?[a-z_-]*)(\d*)\.(png|jpg|jpeg|pdf|top|dxf|svg|tdr|th2|xml|txt)',scan).groups() + # scanType,scanNumber,scanFormat=scanChopped + # except AttributeError: + # print(("Ignored (bad name format): " + surveyFolder + '/' + scan + "\r")) + # continue + # scanTest = scanType + # scanType = 'notes' + # match = re.search(rb'(?i)(elev|extend)',scanTest) + # if match: + # scanType = 'elevation' - match = re.search(r'(?i)(plan)',scanTest) - if match: - scanType = 'plan' + # match = re.search(rb'(?i)(plan)',scanTest) + # if match: + # scanType = 'plan' - if scanNumber=='': - scanNumber=1 + # if scanNumber=='': + # scanNumber=1 - if type(surveyNumber)==types.TupleType: - surveyLetter=surveyNumber[0] - surveyNumber=surveyNumber[1] - try: - placeholder=get_or_create_placeholder(year=int(expedition.year)) - survey=Survey.objects.get_or_create(wallet_number=surveyNumber, wallet_letter=surveyLetter, expedition=expedition, defaults={'logbook_entry':placeholder})[0] - except Survey.MultipleObjectsReturned: - survey=Survey.objects.filter(wallet_number=surveyNumber, wallet_letter=surveyLetter, expedition=expedition)[0] - file_=os.path.join(yearPath, surveyFolder, scan) - scanObj = ScannedImage( - file=file_, - contents=scanType, - number_in_wallet=scanNumber, - survey=survey, - new_since_parsing=False, - ) - print("Added scanned image at " + str(scanObj)) - #if scanFormat=="png": - #if isInterlacedPNG(os.path.join(settings.SURVEY_SCANS, "surveyscans", file_)): - # print file_+ " is an interlaced PNG. No can do." - #continue - scanObj.save() - except (IOError, OSError): - yearPath=os.path.join(settings.SURVEY_SCANS, "surveyscans", expedition.year) - print(" ! No folder found for " + expedition.year + " at:- " + yearPath) + # if isinstance(surveyNumber, tuple): + # surveyLetter=surveyNumber[0] + # surveyNumber=surveyNumber[1] + # try: + # placeholder=get_or_create_placeholder(year=int(expedition.year)) + # survey=Survey.objects.get_or_create(wallet_number=surveyNumber, wallet_letter=surveyLetter, expedition=expedition, defaults={'logbook_entry':placeholder})[0] + # except Survey.MultipleObjectsReturned: + # survey=Survey.objects.filter(wallet_number=surveyNumber, wallet_letter=surveyLetter, expedition=expedition)[0] + # file_=os.path.join(yearPath, surveyFolder, scan) + # scanObj = ScannedImage( + # file=file_, + # contents=scanType, + # number_in_wallet=scanNumber, + # survey=survey, + # new_since_parsing=False, + # ) + # print(("Added scanned image at " + str(scanObj))) + # #if scanFormat=="png": + # #if isInterlacedPNG(os.path.join(settings.SURVEY_SCANS, "surveyscans", file_)): + # # print file_+ " is an interlaced PNG. No can do." + # #continue + # scanObj.save() + # except (IOError, OSError): + # yearPath=os.path.join(settings.SURVEY_SCANS, "surveyscans", expedition.year) + # print((" ! No folder found for " + expedition.year + " at:- " + yearPath)) # dead -def parseSurveys(logfile=None): - try: - readSurveysFromCSV() - except (IOError, OSError): - print(" ! Survey CSV not found..") - pass +# def parseSurveys(logfile=None): + # try: + # readSurveysFromCSV() + # except (IOError, OSError): + # print(" ! Survey CSV not found..") + # pass - print " - Loading scans by expedition year" - for expedition in Expedition.objects.filter(year__gte=2000): #expos since 2000, because paths and filenames were nonstandard before then - print "%s" % expedition, - parseSurveyScans(expedition) + # print(" - Loading scans by expedition year") + # for expedition in Expedition.objects.filter(year__gte=2000): #expos since 2000, because paths and filenames were nonstandard before then + # print("%s" % expedition, end=' ') + # parseSurveyScans(expedition) # dead def isInterlacedPNG(filePath): #We need to check for interlaced PNGs because the thumbnail engine can't handle them (uses PIL) @@ -180,7 +185,7 @@ def GetListDir(sdir): res = [ ] if sdir[:7] == "http://": assert False, "Not written" - s = urllib.urlopen(sdir) + s = urllib.request.urlopen(sdir) else: for f in os.listdir(sdir): if f[0] != ".": @@ -223,14 +228,14 @@ def LoadListScans(): # iterate into the surveyscans directory - print ' - ', + print(' - ', end=' ') for f, ff, fisdir in GetListDir(os.path.join(settings.SURVEY_SCANS, "surveyscans")): if not fisdir: continue # do the year folders if re.match(r"\d\d\d\d$", f): - print "%s" % f, + print("%s" % f, end=' ') for fy, ffy, fisdiry in GetListDir(ff): if fisdiry: assert fisdiry, ffy @@ -257,9 +262,9 @@ def FindTunnelScan(tunnelfile, path): scansfilel = scansfolder.survexscansingle_set.filter(name=mscansdir.group(2)) if len(scansfilel): if len(scansfilel) > 1: - print "BORK more than one image filename matches filter query. ", scansfilel[0] - print "BORK ", tunnelfile.tunnelpath, path - print "BORK ", mscansdir.group(1), mscansdir.group(2), len(scansfilel) + print("BORK more than one image filename matches filter query. ", scansfilel[0]) + print("BORK ", tunnelfile.tunnelpath, path) + print("BORK ", mscansdir.group(1), mscansdir.group(2), len(scansfilel)) #assert len(scansfilel) == 1 scansfile = scansfilel[0] @@ -284,22 +289,22 @@ def FindTunnelScan(tunnelfile, path): def SetTunnelfileInfo(tunnelfile): ff = os.path.join(settings.TUNNEL_DATA, tunnelfile.tunnelpath) tunnelfile.filesize = os.stat(ff)[stat.ST_SIZE] - fin = open(ff) + fin = open(ff,'rb') ttext = fin.read() fin.close() if tunnelfile.filesize <= 0: - print "DEBUG - zero length xml file", ff + print("DEBUG - zero length xml file", ff) return - mtype = re.search("<(fontcolours|sketch)", ttext) + mtype = re.search(r"<(fontcolours|sketch)", ttext) assert mtype, ff tunnelfile.bfontcolours = (mtype.group(1)=="fontcolours") - tunnelfile.npaths = len(re.findall(" # - for path, style in re.findall('