2020-05-24 13:30:39 +01:00
from __future__ import ( absolute_import , division ,
print_function , unicode_literals )
import sys
import os
import types
import logging
import stat
2011-07-11 02:10:22 +01:00
import csv
import re
import datetime
2020-05-24 13:30:39 +01:00
2020-05-24 01:57:06 +01:00
from PIL import Image
2011-07-11 02:10:22 +01:00
from utils import save_carefully
2020-05-24 13:30:39 +01:00
from functools import reduce
import settings
from troggle . core . models import *
2020-05-28 04:54:53 +01:00
from troggle . core . models_caves import *
from troggle . core . models_survex import *
2011-07-11 02:10:22 +01:00
def get_or_create_placeholder ( year ) :
""" All surveys must be related to a logbookentry. We don ' t have a way to
automatically figure out which survey went with which logbookentry ,
so we create a survey placeholder logbook entry for each year . This
function always returns such a placeholder , and creates it if it doesn ' t
exist yet .
"""
lookupAttribs = { ' date__year ' : int ( year ) , ' title ' : " placeholder for surveys " , }
nonLookupAttribs = { ' text ' : " surveys temporarily attached to this should be re-attached to their actual trips " , ' date ' : datetime . date ( int ( year ) , 1 , 1 ) }
placeholder_logbook_entry , newly_created = save_carefully ( LogbookEntry , lookupAttribs , nonLookupAttribs )
return placeholder_logbook_entry
2020-05-24 13:30:39 +01:00
# obsolete surveys.csv does not exist.
# def readSurveysFromCSV():
# try: # could probably combine these two
# surveytab = open(os.path.join(settings.SURVEY_SCANS, "Surveys.csv"))
# except IOError:
# import io, urllib.request, urllib.parse, urllib.error
# surveytab = io.StringIO(urllib.request.urlopen(settings.SURVEY_SCANS + "/Surveys.csv").read())
# dialect=csv.Sniffer().sniff(surveytab.read())
# surveytab.seek(0,0)
# surveyreader = csv.reader(surveytab,dialect=dialect)
# headers = next(surveyreader)
# header = dict(list(zip(headers, list(range(len(headers)))))) #set up a dictionary where the indexes are header names and the values are column numbers
2011-07-11 02:10:22 +01:00
2020-05-24 13:30:39 +01:00
# # test if the expeditions have been added yet
# if Expedition.objects.count()==0:
# print("There are no expeditions in the database. Please run the logbook parser.")
# sys.exit()
2011-07-11 02:10:22 +01:00
2020-05-24 13:30:39 +01:00
# logging.info("Deleting all scanned images")
# ScannedImage.objects.all().delete()
2011-07-11 02:10:22 +01:00
2020-05-24 13:30:39 +01:00
# logging.info("Deleting all survey objects")
# Survey.objects.all().delete()
2011-07-11 02:10:22 +01:00
2020-05-24 13:30:39 +01:00
# logging.info("Beginning to import surveys from "+str(os.path.join(settings.SURVEYS, "Surveys.csv"))+"\n"+"-"*60+"\n")
2011-07-11 02:10:22 +01:00
2020-05-24 13:30:39 +01:00
# for survey in surveyreader:
# #I hate this, but some surveys have a letter eg 2000#34a. The next line deals with that.
# walletNumberLetter = re.match(r'(?P<number>\d*)(?P<letter>[a-zA-Z]*)',survey[header['Survey Number']])
# # print(walletNumberLetter.groups())
# year=survey[header['Year']]
2011-07-11 02:10:22 +01:00
2020-05-24 13:30:39 +01:00
# surveyobj = Survey(
# expedition = Expedition.objects.filter(year=year)[0],
# wallet_number = walletNumberLetter.group('number'),
# logbook_entry = get_or_create_placeholder(year),
# comments = survey[header['Comments']],
# location = survey[header['Location']]
# )
# surveyobj.wallet_letter = walletNumberLetter.group('letter')
# if survey[header['Finished']]=='Yes':
# #try and find the sketch_scan
# pass
# surveyobj.save()
2011-07-11 02:10:22 +01:00
2020-05-24 13:30:39 +01:00
# logging.info("added survey " + survey[header['Year']] + "#" + surveyobj.wallet_number + "\r")
2011-07-11 02:10:22 +01:00
# dead
2020-05-24 01:57:06 +01:00
def listdir ( * directories ) :
try :
return os . listdir ( os . path . join ( settings . SURVEYS , * directories ) )
except :
import urllib . request , urllib . parse , urllib . error
url = settings . SURVEYS + reduce ( lambda x , y : x + " / " + y , [ " listdir " ] + list ( directories ) )
folders = urllib . request . urlopen ( url . replace ( " # " , " % 23 " ) ) . readlines ( )
return [ folder . rstrip ( r " / " ) for folder in folders ]
2011-07-11 02:10:22 +01:00
# add survey scans
2020-05-24 13:30:39 +01:00
# def parseSurveyScans(expedition, logfile=None):
# # yearFileList = listdir(expedition.year)
# try:
# yearPath=os.path.join(settings.SURVEY_SCANS, "surveyscans", expedition.year)
# yearFileList=os.listdir(yearPath)
# print(yearFileList)
# for surveyFolder in yearFileList:
# try:
# surveyNumber=re.match(rb'\d\d\d\d#(X?)0*(\d+)',surveyFolder).groups()
# #scanList = listdir(expedition.year, surveyFolder)
# scanList=os.listdir(os.path.join(yearPath,surveyFolder))
# except AttributeError:
# print(("Ignoring file in year folder: " + surveyFolder + "\r"))
# continue
2011-07-11 02:10:22 +01:00
2020-05-24 13:30:39 +01:00
# for scan in scanList:
# # Why does this insist on renaming all the scanned image files?
# # It produces duplicates names and all images have type .jpg in the scanObj.
# # It seems to rely on end users being particularly diligent in filenames which is NGtH
# try:
# #scanChopped=re.match(rb'(?i).*(notes|elev|plan|extend|elevation)-?(\d*)\.(png|jpg|jpeg|pdf)',scan).groups()
# scanChopped=re.match(rb'(?i)([a-z_-]*\d?[a-z_-]*)(\d*)\.(png|jpg|jpeg|pdf|top|dxf|svg|tdr|th2|xml|txt)',scan).groups()
# scanType,scanNumber,scanFormat=scanChopped
# except AttributeError:
# print(("Ignored (bad name format): " + surveyFolder + '/' + scan + "\r"))
# continue
# scanTest = scanType
# scanType = 'notes'
# match = re.search(rb'(?i)(elev|extend)',scanTest)
# if match:
# scanType = 'elevation'
2011-07-11 02:10:22 +01:00
2020-05-24 13:30:39 +01:00
# match = re.search(rb'(?i)(plan)',scanTest)
# if match:
# scanType = 'plan'
2020-04-09 02:40:32 +01:00
2020-05-24 13:30:39 +01:00
# if scanNumber=='':
# scanNumber=1
2019-02-24 14:29:14 +00:00
2020-05-24 13:30:39 +01:00
# if isinstance(surveyNumber, tuple):
# surveyLetter=surveyNumber[0]
# surveyNumber=surveyNumber[1]
# try:
# placeholder=get_or_create_placeholder(year=int(expedition.year))
# survey=Survey.objects.get_or_create(wallet_number=surveyNumber, wallet_letter=surveyLetter, expedition=expedition, defaults={'logbook_entry':placeholder})[0]
# except Survey.MultipleObjectsReturned:
# survey=Survey.objects.filter(wallet_number=surveyNumber, wallet_letter=surveyLetter, expedition=expedition)[0]
# file_=os.path.join(yearPath, surveyFolder, scan)
# scanObj = ScannedImage(
# file=file_,
# contents=scanType,
# number_in_wallet=scanNumber,
# survey=survey,
# new_since_parsing=False,
# )
# print(("Added scanned image at " + str(scanObj)))
# #if scanFormat=="png":
# #if isInterlacedPNG(os.path.join(settings.SURVEY_SCANS, "surveyscans", file_)):
# # print file_+ " is an interlaced PNG. No can do."
# #continue
# scanObj.save()
# except (IOError, OSError):
# yearPath=os.path.join(settings.SURVEY_SCANS, "surveyscans", expedition.year)
# print((" ! No folder found for " + expedition.year + " at:- " + yearPath))
2011-07-11 02:10:22 +01:00
# dead
2020-05-24 13:30:39 +01:00
# def parseSurveys(logfile=None):
# try:
# readSurveysFromCSV()
# except (IOError, OSError):
# print(" ! Survey CSV not found..")
# pass
2019-02-24 16:46:02 +00:00
2020-05-24 13:30:39 +01:00
# print(" - Loading scans by expedition year")
# for expedition in Expedition.objects.filter(year__gte=2000): #expos since 2000, because paths and filenames were nonstandard before then
# print("%s" % expedition, end=' ')
# parseSurveyScans(expedition)
2011-07-11 02:10:22 +01:00
# dead
2020-05-24 13:35:47 +01:00
# def isInterlacedPNG(filePath): #We need to check for interlaced PNGs because the thumbnail engine can't handle them (uses PIL)
# file=Image.open(filePath)
# print(filePath)
# if 'interlace' in file.info:
# return file.info['interlace']
# else:
# return False
2011-07-11 02:10:22 +01:00
# handles url or file, so we can refer to a set of scans on another server
def GetListDir ( sdir ) :
res = [ ]
if sdir [ : 7 ] == " http:// " :
assert False , " Not written "
2020-05-24 13:30:39 +01:00
s = urllib . request . urlopen ( sdir )
2011-07-11 02:10:22 +01:00
else :
for f in os . listdir ( sdir ) :
if f [ 0 ] != " . " :
ff = os . path . join ( sdir , f )
res . append ( ( f , ff , os . path . isdir ( ff ) ) )
return res
def LoadListScansFile ( survexscansfolder ) :
gld = [ ]
2020-04-27 23:51:41 +01:00
# flatten out any directories in these wallet folders - should not be any
2011-07-11 02:10:22 +01:00
for ( fyf , ffyf , fisdiryf ) in GetListDir ( survexscansfolder . fpath ) :
if fisdiryf :
gld . extend ( GetListDir ( ffyf ) )
else :
gld . append ( ( fyf , ffyf , fisdiryf ) )
2020-05-31 19:23:07 +01:00
c = 0
2011-07-11 02:10:22 +01:00
for ( fyf , ffyf , fisdiryf ) in gld :
2019-02-24 16:46:02 +00:00
#assert not fisdiryf, ffyf
2020-05-31 19:23:07 +01:00
if re . search ( r " \ .(?:png|jpg|jpeg|pdf|svg|gif)(?i)$ " , fyf ) :
2011-07-11 02:10:22 +01:00
survexscansingle = SurvexScanSingle ( ffile = ffyf , name = fyf , survexscansfolder = survexscansfolder )
survexscansingle . save ( )
2020-05-31 19:23:07 +01:00
c + = 1
if c > = 10 :
print ( " . " , end = ' ' )
c = 0
2011-07-11 02:10:22 +01:00
# this iterates through the scans directories (either here or on the remote server)
# and builds up the models we can access later
def LoadListScans ( ) :
2015-01-19 22:41:48 +00:00
2020-05-31 19:23:07 +01:00
print ( ' - Loading Survey Scans ' )
2015-01-19 22:48:50 +00:00
2011-07-11 02:10:22 +01:00
SurvexScanSingle . objects . all ( ) . delete ( )
SurvexScansFolder . objects . all ( ) . delete ( )
2020-05-31 19:23:07 +01:00
print ( ' - deleting all scansFolder and scansSingle objects ' )
2011-07-11 02:10:22 +01:00
# first do the smkhs (large kh survey scans) directory
2020-05-31 19:23:07 +01:00
survexscansfoldersmkhs = SurvexScansFolder ( fpath = os . path . join ( settings . SURVEY_SCANS , " ../surveys/smkhs " ) , walletname = " smkhs " )
print ( " smkhs " , end = ' ' )
2011-07-11 02:10:22 +01:00
if os . path . isdir ( survexscansfoldersmkhs . fpath ) :
survexscansfoldersmkhs . save ( )
LoadListScansFile ( survexscansfoldersmkhs )
# iterate into the surveyscans directory
2020-05-24 13:30:39 +01:00
print ( ' - ' , end = ' ' )
2020-05-31 19:23:07 +01:00
for f , ff , fisdir in GetListDir ( settings . SURVEY_SCANS ) :
2011-07-11 02:10:22 +01:00
if not fisdir :
continue
# do the year folders
2019-02-24 14:29:14 +00:00
if re . match ( r " \ d \ d \ d \ d$ " , f ) :
2020-05-24 13:30:39 +01:00
print ( " %s " % f , end = ' ' )
2011-07-11 02:10:22 +01:00
for fy , ffy , fisdiry in GetListDir ( ff ) :
if fisdiry :
assert fisdiry , ffy
survexscansfolder = SurvexScansFolder ( fpath = ffy , walletname = fy )
survexscansfolder . save ( )
LoadListScansFile ( survexscansfolder )
# do the
elif f != " thumbs " :
survexscansfolder = SurvexScansFolder ( fpath = ff , walletname = f )
survexscansfolder . save ( )
LoadListScansFile ( survexscansfolder )
def FindTunnelScan ( tunnelfile , path ) :
scansfolder , scansfile = None , None
2020-05-24 01:57:06 +01:00
mscansdir = re . search ( rb " ( \ d \ d \ d \ d#X? \ d+ \ w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg))$ " , path )
2011-07-11 02:10:22 +01:00
if mscansdir :
scansfolderl = SurvexScansFolder . objects . filter ( walletname = mscansdir . group ( 1 ) )
if len ( scansfolderl ) :
assert len ( scansfolderl ) == 1
scansfolder = scansfolderl [ 0 ]
if scansfolder :
scansfilel = scansfolder . survexscansingle_set . filter ( name = mscansdir . group ( 2 ) )
if len ( scansfilel ) :
2020-04-11 00:36:27 +01:00
if len ( scansfilel ) > 1 :
2020-05-24 13:30:39 +01:00
print ( " BORK more than one image filename matches filter query. " , scansfilel [ 0 ] )
print ( " BORK " , tunnelfile . tunnelpath , path )
print ( " BORK " , mscansdir . group ( 1 ) , mscansdir . group ( 2 ) , len ( scansfilel ) )
2020-04-11 00:36:27 +01:00
#assert len(scansfilel) == 1
2011-07-11 02:10:22 +01:00
scansfile = scansfilel [ 0 ]
if scansfolder :
tunnelfile . survexscansfolders . add ( scansfolder )
if scansfile :
tunnelfile . survexscans . add ( scansfile )
2020-05-24 01:57:06 +01:00
elif path and not re . search ( rb " \ .(?:png|jpg|pdf|jpeg)$(?i) " , path ) :
2011-07-11 02:10:22 +01:00
name = os . path . split ( path ) [ 1 ]
2020-04-11 00:36:27 +01:00
#print("debug-tunnelfileobjects ", tunnelfile.tunnelpath, path, name)
2011-07-11 02:10:22 +01:00
rtunnelfilel = TunnelFile . objects . filter ( tunnelname = name )
if len ( rtunnelfilel ) :
assert len ( rtunnelfilel ) == 1 , ( " two paths with name of " , path , " need more discrimination coded " )
rtunnelfile = rtunnelfilel [ 0 ]
#print "ttt", tunnelfile.tunnelpath, path, name, rtunnelfile.tunnelpath
tunnelfile . tunnelcontains . add ( rtunnelfile )
tunnelfile . save ( )
def SetTunnelfileInfo ( tunnelfile ) :
ff = os . path . join ( settings . TUNNEL_DATA , tunnelfile . tunnelpath )
tunnelfile . filesize = os . stat ( ff ) [ stat . ST_SIZE ]
2020-05-24 13:30:39 +01:00
fin = open ( ff , ' rb ' )
2011-07-11 02:10:22 +01:00
ttext = fin . read ( )
fin . close ( )
2020-04-11 00:36:27 +01:00
if tunnelfile . filesize < = 0 :
2020-05-24 13:30:39 +01:00
print ( " DEBUG - zero length xml file " , ff )
2020-04-11 00:36:27 +01:00
return
2020-05-24 01:57:06 +01:00
mtype = re . search ( rb " <(fontcolours|sketch) " , ttext )
2020-04-11 00:36:27 +01:00
2011-07-11 02:10:22 +01:00
assert mtype , ff
tunnelfile . bfontcolours = ( mtype . group ( 1 ) == " fontcolours " )
2020-05-24 01:57:06 +01:00
tunnelfile . npaths = len ( re . findall ( rb " <skpath " , ttext ) )
2011-07-11 02:10:22 +01:00
tunnelfile . save ( )
# <tunnelxml tunnelversion="version2009-06-21 Matienzo" tunnelproject="ireby" tunneluser="goatchurch" tunneldate="2009-06-29 23:22:17">
# <pcarea area_signal="frame" sfscaledown="12.282584" sfrotatedeg="-90.76982" sfxtrans="11.676667377221136" sfytrans="-15.677173422877454" sfsketch="204description/scans/plan(38).png" sfstyle="" nodeconnzsetrelative="0.0">
2020-05-24 01:57:06 +01:00
for path , style in re . findall ( rb ' <pcarea area_signal= " frame " .*?sfsketch= " ([^ " ]*) " sfstyle= " ([^ " ]*) " ' , ttext ) :
2011-07-11 02:10:22 +01:00
FindTunnelScan ( tunnelfile , path )
# should also scan and look for survex blocks that might have been included
# and also survex titles as well.
tunnelfile . save ( )
def LoadTunnelFiles ( ) :
tunneldatadir = settings . TUNNEL_DATA
TunnelFile . objects . all ( ) . delete ( )
tunneldirs = [ " " ]
while tunneldirs :
tunneldir = tunneldirs . pop ( )
for f in os . listdir ( os . path . join ( tunneldatadir , tunneldir ) ) :
if f [ 0 ] == " . " or f [ - 1 ] == " ~ " :
continue
lf = os . path . join ( tunneldir , f )
ff = os . path . join ( tunneldatadir , lf )
if os . path . isdir ( ff ) :
tunneldirs . append ( lf )
elif f [ - 4 : ] == " .xml " :
tunnelfile = TunnelFile ( tunnelpath = lf , tunnelname = os . path . split ( f [ : - 4 ] ) [ 1 ] )
tunnelfile . save ( )
for tunnelfile in TunnelFile . objects . all ( ) :
SetTunnelfileInfo ( tunnelfile )