2020-05-24 13:30:39 +01:00
import sys
import os
import types
import stat
2011-07-11 02:10:22 +01:00
import csv
import re
import datetime
2020-05-24 13:30:39 +01:00
2020-05-24 01:57:06 +01:00
from PIL import Image
2020-05-24 13:30:39 +01:00
from functools import reduce
import settings
2021-04-26 18:18:16 +01:00
from troggle . core . models . survex import SingleScan , Wallet , DrawingFile
2021-04-13 00:43:57 +01:00
from troggle . core . models . troggle import DataIssue
2021-04-13 00:11:08 +01:00
from troggle . core . utils import save_carefully
2020-06-30 15:52:29 +01:00
2021-04-13 01:37:42 +01:00
''' Scans through all the :drawings: repository looking
for tunnel and therion files
'''
2011-07-11 02:10:22 +01:00
def get_or_create_placeholder ( year ) :
""" All surveys must be related to a logbookentry. We don ' t have a way to
automatically figure out which survey went with which logbookentry ,
so we create a survey placeholder logbook entry for each year . This
function always returns such a placeholder , and creates it if it doesn ' t
exist yet .
"""
lookupAttribs = { ' date__year ' : int ( year ) , ' title ' : " placeholder for surveys " , }
nonLookupAttribs = { ' text ' : " surveys temporarily attached to this should be re-attached to their actual trips " , ' date ' : datetime . date ( int ( year ) , 1 , 1 ) }
placeholder_logbook_entry , newly_created = save_carefully ( LogbookEntry , lookupAttribs , nonLookupAttribs )
return placeholder_logbook_entry
2020-05-24 01:57:06 +01:00
def listdir ( * directories ) :
try :
return os . listdir ( os . path . join ( settings . SURVEYS , * directories ) )
except :
import urllib . request , urllib . parse , urllib . error
url = settings . SURVEYS + reduce ( lambda x , y : x + " / " + y , [ " listdir " ] + list ( directories ) )
folders = urllib . request . urlopen ( url . replace ( " # " , " % 23 " ) ) . readlines ( )
return [ folder . rstrip ( r " / " ) for folder in folders ]
2011-07-11 02:10:22 +01:00
# handles url or file, so we can refer to a set of scans on another server
def GetListDir ( sdir ) :
res = [ ]
if sdir [ : 7 ] == " http:// " :
2021-04-13 22:27:01 +01:00
# s = urllib.request.urlopen(sdir)
message = f " ! Requesting loading from http:// NOT IMPLEMENTED. [ { sdir } ] "
print ( message )
DataIssue . objects . create ( parser = ' Drawings ' , message = message )
sdir [ : 7 ] = " "
for f in os . listdir ( sdir ) :
if f [ 0 ] != " . " :
ff = os . path . join ( sdir , f )
res . append ( ( f , ff , os . path . isdir ( ff ) ) )
2011-07-11 02:10:22 +01:00
return res
2020-06-23 23:46:33 +01:00
def LoadListScansFile ( scansfolder ) :
2011-07-11 02:10:22 +01:00
gld = [ ]
2020-04-27 23:51:41 +01:00
# flatten out any directories in these wallet folders - should not be any
2020-06-23 23:46:33 +01:00
for ( fyf , ffyf , fisdiryf ) in GetListDir ( scansfolder . fpath ) :
2011-07-11 02:10:22 +01:00
if fisdiryf :
gld . extend ( GetListDir ( ffyf ) )
else :
gld . append ( ( fyf , ffyf , fisdiryf ) )
2020-05-31 19:23:07 +01:00
c = 0
2011-07-11 02:10:22 +01:00
for ( fyf , ffyf , fisdiryf ) in gld :
2020-05-31 19:23:07 +01:00
if re . search ( r " \ .(?:png|jpg|jpeg|pdf|svg|gif)(?i)$ " , fyf ) :
2020-06-24 00:18:01 +01:00
singlescan = SingleScan ( ffile = ffyf , name = fyf , scansfolder = scansfolder )
singlescan . save ( )
2020-05-31 19:23:07 +01:00
c + = 1
if c > = 10 :
2020-06-01 00:42:48 +01:00
print ( " . " , end = ' ' )
2020-05-31 19:23:07 +01:00
c = 0
2011-07-11 02:10:22 +01:00
# this iterates through the scans directories (either here or on the remote server)
# and builds up the models we can access later
def LoadListScans ( ) :
2015-01-19 22:41:48 +00:00
2020-05-31 19:23:07 +01:00
print ( ' - Loading Survey Scans ' )
2015-01-19 22:48:50 +00:00
2020-06-24 00:18:01 +01:00
SingleScan . objects . all ( ) . delete ( )
2021-04-26 18:18:16 +01:00
Wallet . objects . all ( ) . delete ( )
2020-05-31 19:23:07 +01:00
print ( ' - deleting all scansFolder and scansSingle objects ' )
2011-07-11 02:10:22 +01:00
# first do the smkhs (large kh survey scans) directory
2021-04-26 19:22:29 +01:00
manywallets_smkhs = Wallet ( fpath = os . path . join ( settings . SURVEY_SCANS , " ../surveys/smkhs " ) , walletname = " smkhs " )
2020-05-31 19:23:07 +01:00
print ( " smkhs " , end = ' ' )
2021-04-26 19:22:29 +01:00
if os . path . isdir ( manywallets_smkhs . fpath ) :
manywallets_smkhs . save ( )
LoadListScansFile ( manywallets_smkhs )
2011-07-11 02:10:22 +01:00
# iterate into the surveyscans directory
2020-05-24 13:30:39 +01:00
print ( ' - ' , end = ' ' )
2020-05-31 19:23:07 +01:00
for f , ff , fisdir in GetListDir ( settings . SURVEY_SCANS ) :
2011-07-11 02:10:22 +01:00
if not fisdir :
continue
# do the year folders
2019-02-24 14:29:14 +00:00
if re . match ( r " \ d \ d \ d \ d$ " , f ) :
2020-05-24 13:30:39 +01:00
print ( " %s " % f , end = ' ' )
2011-07-11 02:10:22 +01:00
for fy , ffy , fisdiry in GetListDir ( ff ) :
if fisdiry :
2021-04-26 18:18:16 +01:00
scansfolder = Wallet ( fpath = ffy , walletname = fy )
2020-06-23 23:46:33 +01:00
scansfolder . save ( )
LoadListScansFile ( scansfolder )
2011-07-11 02:10:22 +01:00
# do the
elif f != " thumbs " :
2021-04-26 18:18:16 +01:00
scansfolder = Wallet ( fpath = ff , walletname = f )
2020-06-23 23:46:33 +01:00
scansfolder . save ( )
LoadListScansFile ( scansfolder )
2011-07-11 02:10:22 +01:00
2021-04-26 18:08:42 +01:00
def find_tunnel_scan ( dwgfile , path ) :
2021-04-07 21:53:43 +01:00
''' Is given a line of text ' path ' which may or may not contain a recognisable name of a scanned file
2021-04-13 22:27:01 +01:00
which we have already seen when we imported all the files we could find in the surveyscans direstories
2021-04-07 21:53:43 +01:00
'''
2011-07-11 02:10:22 +01:00
scansfolder , scansfile = None , None
2021-04-07 21:53:43 +01:00
mscansdir = re . search ( r " ( \ d \ d \ d \ d#X? \ d+ \ w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg))$ " , path )
2011-07-11 02:10:22 +01:00
if mscansdir :
2021-04-26 18:18:16 +01:00
scansfolderl = Wallet . objects . filter ( walletname = mscansdir . group ( 1 ) )
2021-04-13 22:27:01 +01:00
# This should properly detect if a list of folders is returned and do something sensible, not just pick the first.
2011-07-11 02:10:22 +01:00
if len ( scansfolderl ) :
scansfolder = scansfolderl [ 0 ]
2021-04-13 22:27:01 +01:00
if len ( scansfolderl ) > 1 :
2021-04-26 18:11:14 +01:00
message = " ! More than one scan FOLDER matches filter query. [ {} ]: {} {} {} {} " . format ( scansfilel [ 0 ] , mscansdir . group ( 1 ) , mscansdir . group ( 2 ) , dwgfile . dwgpath , path )
2021-04-13 22:27:01 +01:00
print ( message )
DataIssue . objects . create ( parser = ' Tunnel ' , message = message )
2011-07-11 02:10:22 +01:00
if scansfolder :
2020-06-24 00:18:01 +01:00
scansfilel = scansfolder . singlescan_set . filter ( name = mscansdir . group ( 2 ) )
2011-07-11 02:10:22 +01:00
if len ( scansfilel ) :
2020-04-11 00:36:27 +01:00
if len ( scansfilel ) > 1 :
2021-04-26 18:11:14 +01:00
message = " ! More than one image FILENAME matches filter query. [ {} ]: {} {} {} {} " . format ( scansfilel [ 0 ] , mscansdir . group ( 1 ) , mscansdir . group ( 2 ) , dwgfile . dwgpath , path )
2020-07-29 22:54:53 +01:00
print ( message )
2021-04-07 21:53:43 +01:00
DataIssue . objects . create ( parser = ' Tunnel ' , message = message )
2011-07-11 02:10:22 +01:00
scansfile = scansfilel [ 0 ]
2020-07-29 22:54:53 +01:00
2011-07-11 02:10:22 +01:00
if scansfolder :
2021-04-26 19:22:29 +01:00
dwgfile . manywallets . add ( scansfolder )
2011-07-11 02:10:22 +01:00
if scansfile :
2021-04-26 18:08:42 +01:00
dwgfile . scans . add ( scansfile )
2011-07-11 02:10:22 +01:00
2021-04-07 21:53:43 +01:00
elif path and not re . search ( r " \ .(?:png|jpg|pdf|jpeg|gif|pdf)$(?i) " , path ) :
2011-07-11 02:10:22 +01:00
name = os . path . split ( path ) [ 1 ]
2021-04-26 18:37:59 +01:00
rdwgfilel = DrawingFile . objects . filter ( dwgname = name )
2021-04-26 18:08:42 +01:00
if len ( rdwgfilel ) :
2020-07-29 22:54:53 +01:00
message = " ! Two paths with same name [ {} ]: {} " . format ( path , name )
print ( message )
2021-04-07 21:53:43 +01:00
DataIssue . objects . create ( parser = ' Tunnel ' , message = message )
2021-04-26 18:08:42 +01:00
rdwgfile = rdwgfilel [ 0 ]
2021-04-26 18:54:17 +01:00
dwgfile . dwgcontains . add ( rdwgfile )
2011-07-11 02:10:22 +01:00
2021-04-26 18:08:42 +01:00
dwgfile . save ( )
2011-07-11 02:10:22 +01:00
2021-04-07 21:53:43 +01:00
def findimageinsert ( therionfile , xth_me ) :
''' Tries to link the drawing file (Therion format) to the referenced image (scan) file
'''
pass
2011-07-11 02:10:22 +01:00
2021-04-07 21:53:43 +01:00
def findimportinsert ( therionfile , imp ) :
''' Tries to link the scrap (Therion format) to the referenced therion scrap
'''
pass
rx_xth_me = re . compile ( r ' xth_me_image_insert.* { .*}$ ' , re . MULTILINE )
rx_scrap = re . compile ( r ' ^survey ( \ w*).*$ ' , re . MULTILINE )
rx_input = re . compile ( r ' ^input ( \ w*).*$ ' , re . MULTILINE )
def settherionfileinfo ( filetuple ) :
2021-04-26 18:08:42 +01:00
''' Read in the drawing file contents and sets values on the dwgfile object
2021-04-07 21:53:43 +01:00
'''
thtype , therionfile = filetuple
2021-04-26 18:42:10 +01:00
ff = os . path . join ( settings . DRAWINGS_DATA , therionfile . dwgpath )
2021-04-07 21:53:43 +01:00
therionfile . filesize = os . stat ( ff ) [ stat . ST_SIZE ]
if therionfile . filesize < = 0 :
message = " ! Zero length therion file {} " . format ( ff )
print ( message )
DataIssue . objects . create ( parser = ' Therion ' , message = message )
return
fin = open ( ff , ' r ' )
ttext = fin . read ( )
fin . close ( )
# The equivalent for a tunnel 'path' would be a .th2 'line wall' or 'scrap'
# print(len(re.findall(r"line", ttext)))
if thtype == ' th ' :
therionfile . npaths = len ( re . findall ( r " ^input " , ttext , re . MULTILINE ) )
elif thtype == ' th2 ' :
therionfile . npaths = len ( re . findall ( r " ^line " , ttext , re . MULTILINE ) )
therionfile . save ( )
# scan and look for survex blocks that might have been included, and image scans (as for tunnel drawings)
2021-04-26 18:08:42 +01:00
# which would populate dwgfile.survexfile
2021-04-07 21:53:43 +01:00
# in .th2 files:
# ##XTHERION## xth_me_image_insert {500 1 1.0} {1700 {}} ../../../expofiles/surveyscans/2014/01popped_elev1.jpeg 0 {}
# scrap blownout -projection plan -scale [-81.0 -42.0 216.0 -42.0 0.0 0.0 7.5438 0.0 m]
for xth_me in rx_xth_me . findall ( ttext ) :
2021-04-26 18:37:59 +01:00
message = f ' ! Un-parsed image filename: { therionfile . dwgname } : { xth_me . split ( ) [ - 3 ] } - { therionfile . dwgpath } '
2021-04-07 21:53:43 +01:00
#print(message)
DataIssue . objects . create ( parser = ' Therion ' , message = message )
findimageinsert ( therionfile , xth_me )
for inp in rx_input . findall ( ttext ) :
# if this 'input' is a .th2 file we have already seen, then we can assign this as a sub-file
# but we would need to disentangle to get the current path properly
2021-04-26 18:37:59 +01:00
message = f ' ! Un-set Therion .th2 input: - { therionfile . dwgname } : { inp } - { therionfile . dwgpath } '
2021-04-07 21:53:43 +01:00
#print(message)
DataIssue . objects . create ( parser = ' Therion ' , message = message )
findimportinsert ( therionfile , inp )
therionfile . save ( )
rx_skpath = re . compile ( rb ' <skpath ' )
rx_pcpath = re . compile ( rb ' <pcarea area_signal= " frame " .*?sfsketch= " ([^ " ]*) " sfstyle= " ([^ " ]*) " ' )
2021-04-26 18:08:42 +01:00
def setdwgfileinfo ( dwgfile ) :
''' Read in the drawing file contents and sets values on the dwgfile object
2021-04-08 01:09:06 +01:00
Should try to read the date too e . g . tunneldate = " 2010-08-16 22:51:57
then we could display on the master calendar per expo .
2021-04-07 21:53:43 +01:00
'''
2021-04-26 18:42:10 +01:00
ff = os . path . join ( settings . DRAWINGS_DATA , dwgfile . dwgpath )
2021-04-26 18:08:42 +01:00
dwgfile . filesize = os . stat ( ff ) [ stat . ST_SIZE ]
if dwgfile . filesize < = 0 :
2020-07-29 22:54:53 +01:00
message = " ! Zero length xml file {} " . format ( ff )
print ( message )
DataIssue . objects . create ( parser = ' Drawings ' , message = message )
return
2020-05-24 13:30:39 +01:00
fin = open ( ff , ' rb ' )
2011-07-11 02:10:22 +01:00
ttext = fin . read ( )
fin . close ( )
2021-04-07 21:53:43 +01:00
2021-04-26 18:08:42 +01:00
dwgfile . npaths = len ( rx_skpath . findall ( ttext ) )
dwgfile . save ( )
2011-07-11 02:10:22 +01:00
2021-04-07 21:53:43 +01:00
# example drawing file in Tunnel format.
2011-07-11 02:10:22 +01:00
# <tunnelxml tunnelversion="version2009-06-21 Matienzo" tunnelproject="ireby" tunneluser="goatchurch" tunneldate="2009-06-29 23:22:17">
# <pcarea area_signal="frame" sfscaledown="12.282584" sfrotatedeg="-90.76982" sfxtrans="11.676667377221136" sfytrans="-15.677173422877454" sfsketch="204description/scans/plan(38).png" sfstyle="" nodeconnzsetrelative="0.0">
2021-04-07 21:53:43 +01:00
for path , style in rx_pcpath . findall ( ttext ) :
2021-04-26 18:08:42 +01:00
find_tunnel_scan ( dwgfile , path . decode ( ) )
2021-04-07 21:53:43 +01:00
# should also scan and look for survex blocks that might have been included, and image scans
2021-04-26 18:08:42 +01:00
# which would populate dwgfile.survexfile
2020-07-29 22:54:53 +01:00
2021-04-26 18:08:42 +01:00
dwgfile . save ( )
2011-07-11 02:10:22 +01:00
2021-04-07 21:53:43 +01:00
def load_drawings_files ( ) :
''' Breadth first search of drawings directory looking for sub-directories and *.xml filesize
'''
all_xml = [ ]
2021-04-26 18:42:10 +01:00
drawdatadir = settings . DRAWINGS_DATA
2021-04-26 18:08:42 +01:00
DrawingFile . objects . all ( ) . delete ( )
2021-04-07 21:53:43 +01:00
DataIssue . objects . filter ( parser = ' Drawings ' ) . delete ( )
DataIssue . objects . filter ( parser = ' Therion ' ) . delete ( )
DataIssue . objects . filter ( parser = ' Tunnel ' ) . delete ( )
2020-06-30 15:52:29 +01:00
drawingsdirs = [ " " ]
while drawingsdirs :
drawdir = drawingsdirs . pop ( )
for f in os . listdir ( os . path . join ( drawdatadir , drawdir ) ) :
2011-07-11 02:10:22 +01:00
if f [ 0 ] == " . " or f [ - 1 ] == " ~ " :
continue
2020-06-30 15:52:29 +01:00
lf = os . path . join ( drawdir , f )
ff = os . path . join ( drawdatadir , lf )
2011-07-11 02:10:22 +01:00
if os . path . isdir ( ff ) :
2021-04-07 21:53:43 +01:00
drawingsdirs . append ( lf ) # lunatic! adding to list in middle of list while loop!
2011-07-11 02:10:22 +01:00
elif f [ - 4 : ] == " .xml " :
2021-04-07 21:53:43 +01:00
# Always creates new
2021-04-26 18:37:59 +01:00
dwgfile = DrawingFile ( dwgpath = lf , dwgname = os . path . split ( f [ : - 4 ] ) [ 1 ] )
2021-04-26 18:08:42 +01:00
dwgfile . save ( )
all_xml . append ( ( ' xml ' , dwgfile ) )
2021-04-07 21:53:43 +01:00
elif f [ - 3 : ] == " .th " :
# Always creates new
2021-04-26 18:37:59 +01:00
dwgfile = DrawingFile ( dwgpath = lf , dwgname = os . path . split ( f [ : - 4 ] ) [ 1 ] )
2021-04-26 18:08:42 +01:00
dwgfile . save ( )
all_xml . append ( ( ' th ' , dwgfile ) )
2021-04-07 21:53:43 +01:00
elif f [ - 4 : ] == " .th2 " :
# Always creates new
2021-04-26 18:37:59 +01:00
dwgfile = DrawingFile ( dwgpath = lf , dwgname = os . path . split ( f [ : - 4 ] ) [ 1 ] )
2021-04-26 18:08:42 +01:00
dwgfile . save ( )
all_xml . append ( ( ' th2 ' , dwgfile ) )
2021-04-07 21:53:43 +01:00
print ( f ' - { len ( all_xml ) } Drawings files found ' )
for d in all_xml :
if d [ 0 ] == ' xml ' :
2021-04-26 18:08:42 +01:00
setdwgfileinfo ( d [ 1 ] )
2021-04-07 21:53:43 +01:00
# important to import .th2 files before .th so that we can assign them when found in .th files
if d [ 0 ] == ' th2 ' :
settherionfileinfo ( d )
if d [ 0 ] == ' th ' :
settherionfileinfo ( d )
2021-04-26 18:08:42 +01:00
# for drawfile in DrawingFile.objects.all():
2021-04-07 21:53:43 +01:00
# SetTunnelfileInfo(drawfile)