2020-05-24 13:30:39 +01:00
import sys
import os
import types
import stat
2011-07-11 02:10:22 +01:00
import csv
import re
import datetime
2021-05-04 02:46:56 +01:00
from pathlib import Path
2020-05-24 13:30:39 +01:00
2020-05-24 01:57:06 +01:00
from PIL import Image
2020-05-24 13:30:39 +01:00
from functools import reduce
import settings
2021-04-26 18:18:16 +01:00
from troggle . core . models . survex import SingleScan , Wallet , DrawingFile
2021-04-13 00:43:57 +01:00
from troggle . core . models . troggle import DataIssue
2021-05-04 20:57:16 +01:00
from troggle . core . utils import save_carefully , GetListDir
2020-06-30 15:52:29 +01:00
2021-04-27 20:44:24 +01:00
''' Searches through all the :drawings: repository looking
2021-04-13 01:37:42 +01:00
for tunnel and therion files
'''
2021-05-04 20:57:16 +01:00
todo = '''
'''
2011-07-11 02:10:22 +01:00
def get_or_create_placeholder ( year ) :
""" All surveys must be related to a logbookentry. We don ' t have a way to
automatically figure out which survey went with which logbookentry ,
so we create a survey placeholder logbook entry for each year . This
function always returns such a placeholder , and creates it if it doesn ' t
exist yet .
"""
lookupAttribs = { ' date__year ' : int ( year ) , ' title ' : " placeholder for surveys " , }
nonLookupAttribs = { ' text ' : " surveys temporarily attached to this should be re-attached to their actual trips " , ' date ' : datetime . date ( int ( year ) , 1 , 1 ) }
placeholder_logbook_entry , newly_created = save_carefully ( LogbookEntry , lookupAttribs , nonLookupAttribs )
return placeholder_logbook_entry
2021-05-05 00:35:10 +01:00
def find_dwg_file ( dwgfile , path ) :
2021-04-07 21:53:43 +01:00
''' Is given a line of text ' path ' which may or may not contain a recognisable name of a scanned file
2021-04-13 22:27:01 +01:00
which we have already seen when we imported all the files we could find in the surveyscans direstories
2021-04-07 21:53:43 +01:00
'''
2021-04-26 19:50:03 +01:00
wallet , scansfile = None , None
2021-04-27 20:44:24 +01:00
mscansdir = re . search ( r " ( \ d \ d \ d \ d#X? \ d+ \ w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg|gif))$ " , path )
2011-07-11 02:10:22 +01:00
if mscansdir :
2021-04-26 19:50:03 +01:00
scanswalletl = Wallet . objects . filter ( walletname = mscansdir . group ( 1 ) )
2021-04-27 20:44:24 +01:00
# This should be chnaged to properly detect if a list of folders is returned and do something sensible, not just pick the first.
2021-04-26 19:50:03 +01:00
if len ( scanswalletl ) :
wallet = scanswalletl [ 0 ]
if len ( scanswalletl ) > 1 :
2021-04-26 18:11:14 +01:00
message = " ! More than one scan FOLDER matches filter query. [ {} ]: {} {} {} {} " . format ( scansfilel [ 0 ] , mscansdir . group ( 1 ) , mscansdir . group ( 2 ) , dwgfile . dwgpath , path )
2021-04-13 22:27:01 +01:00
print ( message )
DataIssue . objects . create ( parser = ' Tunnel ' , message = message )
2021-04-26 19:50:03 +01:00
if wallet :
scansfilel = wallet . singlescan_set . filter ( name = mscansdir . group ( 2 ) )
2011-07-11 02:10:22 +01:00
if len ( scansfilel ) :
2020-04-11 00:36:27 +01:00
if len ( scansfilel ) > 1 :
2021-04-26 18:11:14 +01:00
message = " ! More than one image FILENAME matches filter query. [ {} ]: {} {} {} {} " . format ( scansfilel [ 0 ] , mscansdir . group ( 1 ) , mscansdir . group ( 2 ) , dwgfile . dwgpath , path )
2020-07-29 22:54:53 +01:00
print ( message )
2021-04-07 21:53:43 +01:00
DataIssue . objects . create ( parser = ' Tunnel ' , message = message )
2011-07-11 02:10:22 +01:00
scansfile = scansfilel [ 0 ]
2020-07-29 22:54:53 +01:00
2021-04-26 19:50:03 +01:00
if wallet :
dwgfile . manywallets . add ( wallet )
2011-07-11 02:10:22 +01:00
if scansfile :
2021-04-26 18:08:42 +01:00
dwgfile . scans . add ( scansfile )
2011-07-11 02:10:22 +01:00
2021-04-27 20:44:24 +01:00
elif path and not re . search ( r " \ .(?:png|jpg|pdf|jpeg|gif)$(?i) " , path ) :
2011-07-11 02:10:22 +01:00
name = os . path . split ( path ) [ 1 ]
2021-04-26 18:37:59 +01:00
rdwgfilel = DrawingFile . objects . filter ( dwgname = name )
2021-04-26 18:08:42 +01:00
if len ( rdwgfilel ) :
2020-07-29 22:54:53 +01:00
message = " ! Two paths with same name [ {} ]: {} " . format ( path , name )
print ( message )
2021-04-07 21:53:43 +01:00
DataIssue . objects . create ( parser = ' Tunnel ' , message = message )
2021-04-26 18:08:42 +01:00
rdwgfile = rdwgfilel [ 0 ]
2021-04-26 18:54:17 +01:00
dwgfile . dwgcontains . add ( rdwgfile )
2011-07-11 02:10:22 +01:00
2021-04-26 18:08:42 +01:00
dwgfile . save ( )
2011-07-11 02:10:22 +01:00
2021-04-07 21:53:43 +01:00
def findimageinsert ( therionfile , xth_me ) :
''' Tries to link the drawing file (Therion format) to the referenced image (scan) file
'''
pass
2011-07-11 02:10:22 +01:00
2021-04-07 21:53:43 +01:00
def findimportinsert ( therionfile , imp ) :
''' Tries to link the scrap (Therion format) to the referenced therion scrap
'''
pass
rx_xth_me = re . compile ( r ' xth_me_image_insert.* { .*}$ ' , re . MULTILINE )
rx_scrap = re . compile ( r ' ^survey ( \ w*).*$ ' , re . MULTILINE )
rx_input = re . compile ( r ' ^input ( \ w*).*$ ' , re . MULTILINE )
def settherionfileinfo ( filetuple ) :
2021-04-26 18:08:42 +01:00
''' Read in the drawing file contents and sets values on the dwgfile object
2021-04-07 21:53:43 +01:00
'''
thtype , therionfile = filetuple
2021-04-26 18:42:10 +01:00
ff = os . path . join ( settings . DRAWINGS_DATA , therionfile . dwgpath )
2021-04-07 21:53:43 +01:00
therionfile . filesize = os . stat ( ff ) [ stat . ST_SIZE ]
if therionfile . filesize < = 0 :
message = " ! Zero length therion file {} " . format ( ff )
print ( message )
DataIssue . objects . create ( parser = ' Therion ' , message = message )
return
fin = open ( ff , ' r ' )
ttext = fin . read ( )
fin . close ( )
# The equivalent for a tunnel 'path' would be a .th2 'line wall' or 'scrap'
# print(len(re.findall(r"line", ttext)))
if thtype == ' th ' :
therionfile . npaths = len ( re . findall ( r " ^input " , ttext , re . MULTILINE ) )
elif thtype == ' th2 ' :
therionfile . npaths = len ( re . findall ( r " ^line " , ttext , re . MULTILINE ) )
therionfile . save ( )
# scan and look for survex blocks that might have been included, and image scans (as for tunnel drawings)
2021-04-26 18:08:42 +01:00
# which would populate dwgfile.survexfile
2021-04-07 21:53:43 +01:00
# in .th2 files:
# ##XTHERION## xth_me_image_insert {500 1 1.0} {1700 {}} ../../../expofiles/surveyscans/2014/01popped_elev1.jpeg 0 {}
# scrap blownout -projection plan -scale [-81.0 -42.0 216.0 -42.0 0.0 0.0 7.5438 0.0 m]
for xth_me in rx_xth_me . findall ( ttext ) :
2021-04-26 18:37:59 +01:00
message = f ' ! Un-parsed image filename: { therionfile . dwgname } : { xth_me . split ( ) [ - 3 ] } - { therionfile . dwgpath } '
2021-04-07 21:53:43 +01:00
#print(message)
DataIssue . objects . create ( parser = ' Therion ' , message = message )
findimageinsert ( therionfile , xth_me )
for inp in rx_input . findall ( ttext ) :
# if this 'input' is a .th2 file we have already seen, then we can assign this as a sub-file
# but we would need to disentangle to get the current path properly
2021-04-26 18:37:59 +01:00
message = f ' ! Un-set Therion .th2 input: - { therionfile . dwgname } : { inp } - { therionfile . dwgpath } '
2021-04-07 21:53:43 +01:00
#print(message)
DataIssue . objects . create ( parser = ' Therion ' , message = message )
findimportinsert ( therionfile , inp )
therionfile . save ( )
rx_skpath = re . compile ( rb ' <skpath ' )
rx_pcpath = re . compile ( rb ' <pcarea area_signal= " frame " .*?sfsketch= " ([^ " ]*) " sfstyle= " ([^ " ]*) " ' )
2021-04-26 18:08:42 +01:00
def setdwgfileinfo ( dwgfile ) :
''' Read in the drawing file contents and sets values on the dwgfile object
2021-04-08 01:09:06 +01:00
Should try to read the date too e . g . tunneldate = " 2010-08-16 22:51:57
then we could display on the master calendar per expo .
2021-04-07 21:53:43 +01:00
'''
2021-04-26 18:42:10 +01:00
ff = os . path . join ( settings . DRAWINGS_DATA , dwgfile . dwgpath )
2021-04-26 18:08:42 +01:00
dwgfile . filesize = os . stat ( ff ) [ stat . ST_SIZE ]
if dwgfile . filesize < = 0 :
2020-07-29 22:54:53 +01:00
message = " ! Zero length xml file {} " . format ( ff )
print ( message )
DataIssue . objects . create ( parser = ' Drawings ' , message = message )
return
2020-05-24 13:30:39 +01:00
fin = open ( ff , ' rb ' )
2011-07-11 02:10:22 +01:00
ttext = fin . read ( )
fin . close ( )
2021-04-07 21:53:43 +01:00
2021-04-26 18:08:42 +01:00
dwgfile . npaths = len ( rx_skpath . findall ( ttext ) )
dwgfile . save ( )
2011-07-11 02:10:22 +01:00
2021-04-07 21:53:43 +01:00
# example drawing file in Tunnel format.
2011-07-11 02:10:22 +01:00
# <tunnelxml tunnelversion="version2009-06-21 Matienzo" tunnelproject="ireby" tunneluser="goatchurch" tunneldate="2009-06-29 23:22:17">
# <pcarea area_signal="frame" sfscaledown="12.282584" sfrotatedeg="-90.76982" sfxtrans="11.676667377221136" sfytrans="-15.677173422877454" sfsketch="204description/scans/plan(38).png" sfstyle="" nodeconnzsetrelative="0.0">
2021-04-07 21:53:43 +01:00
for path , style in rx_pcpath . findall ( ttext ) :
2021-05-05 00:35:10 +01:00
find_dwg_file ( dwgfile , path . decode ( ) )
2021-04-07 21:53:43 +01:00
# should also scan and look for survex blocks that might have been included, and image scans
2021-04-26 18:08:42 +01:00
# which would populate dwgfile.survexfile
2020-07-29 22:54:53 +01:00
2021-04-26 18:08:42 +01:00
dwgfile . save ( )
2011-07-11 02:10:22 +01:00
2021-04-07 21:53:43 +01:00
def load_drawings_files ( ) :
''' Breadth first search of drawings directory looking for sub-directories and *.xml filesize
2021-05-04 02:46:56 +01:00
Why do we have all this detection of file types / ! Why not use get_mime_types ?
What is it all for ? ?
ALL THIS NEEDS TO DETCT UPPER CASE suffices
2021-04-07 21:53:43 +01:00
'''
all_xml = [ ]
2021-04-26 18:42:10 +01:00
drawdatadir = settings . DRAWINGS_DATA
2021-04-26 18:08:42 +01:00
DrawingFile . objects . all ( ) . delete ( )
2021-04-07 21:53:43 +01:00
DataIssue . objects . filter ( parser = ' Drawings ' ) . delete ( )
DataIssue . objects . filter ( parser = ' Therion ' ) . delete ( )
DataIssue . objects . filter ( parser = ' Tunnel ' ) . delete ( )
2020-06-30 15:52:29 +01:00
drawingsdirs = [ " " ]
while drawingsdirs :
drawdir = drawingsdirs . pop ( )
for f in os . listdir ( os . path . join ( drawdatadir , drawdir ) ) :
2011-07-11 02:10:22 +01:00
if f [ 0 ] == " . " or f [ - 1 ] == " ~ " :
continue
2020-06-30 15:52:29 +01:00
lf = os . path . join ( drawdir , f )
ff = os . path . join ( drawdatadir , lf )
2011-07-11 02:10:22 +01:00
if os . path . isdir ( ff ) :
2021-04-07 21:53:43 +01:00
drawingsdirs . append ( lf ) # lunatic! adding to list in middle of list while loop!
2011-07-11 02:10:22 +01:00
elif f [ - 4 : ] == " .xml " :
2021-04-07 21:53:43 +01:00
# Always creates new
2021-04-26 18:37:59 +01:00
dwgfile = DrawingFile ( dwgpath = lf , dwgname = os . path . split ( f [ : - 4 ] ) [ 1 ] )
2021-04-26 18:08:42 +01:00
dwgfile . save ( )
all_xml . append ( ( ' xml ' , dwgfile ) )
2021-04-07 21:53:43 +01:00
elif f [ - 3 : ] == " .th " :
# Always creates new
2021-04-26 18:37:59 +01:00
dwgfile = DrawingFile ( dwgpath = lf , dwgname = os . path . split ( f [ : - 4 ] ) [ 1 ] )
2021-04-26 18:08:42 +01:00
dwgfile . save ( )
all_xml . append ( ( ' th ' , dwgfile ) )
2021-04-07 21:53:43 +01:00
elif f [ - 4 : ] == " .th2 " :
# Always creates new
2021-04-26 18:37:59 +01:00
dwgfile = DrawingFile ( dwgpath = lf , dwgname = os . path . split ( f [ : - 4 ] ) [ 1 ] )
2021-04-26 18:08:42 +01:00
dwgfile . save ( )
all_xml . append ( ( ' th2 ' , dwgfile ) )
2021-05-04 02:46:56 +01:00
elif f [ - 4 : ] == " .pdf " :
# Always creates new
dwgfile = DrawingFile ( dwgpath = lf , dwgname = os . path . split ( f [ : - 4 ] ) [ 1 ] )
dwgfile . save ( )
all_xml . append ( ( ' pdf ' , dwgfile ) )
elif f [ - 4 : ] == " .svg " :
# Always creates new
dwgfile = DrawingFile ( dwgpath = lf , dwgname = os . path . split ( f [ : - 4 ] ) [ 1 ] )
dwgfile . save ( )
all_xml . append ( ( ' svg ' , dwgfile ) )
elif f [ - 4 : ] == " .jpg " :
# Always creates new
dwgfile = DrawingFile ( dwgpath = lf , dwgname = os . path . split ( f [ : - 4 ] ) [ 1 ] )
dwgfile . save ( )
all_xml . append ( ( ' jpg ' , dwgfile ) )
elif Path ( f ) . suffix == ' ' :
# therion file
dwgfile = DrawingFile ( dwgpath = lf , dwgname = os . path . split ( f ) [ 1 ] )
dwgfile . save ( )
all_xml . append ( ( ' ' , dwgfile ) )
2021-04-07 21:53:43 +01:00
print ( f ' - { len ( all_xml ) } Drawings files found ' )
for d in all_xml :
if d [ 0 ] == ' xml ' :
2021-04-26 18:08:42 +01:00
setdwgfileinfo ( d [ 1 ] )
2021-04-07 21:53:43 +01:00
# important to import .th2 files before .th so that we can assign them when found in .th files
if d [ 0 ] == ' th2 ' :
settherionfileinfo ( d )
if d [ 0 ] == ' th ' :
settherionfileinfo ( d )
2021-04-26 18:08:42 +01:00
# for drawfile in DrawingFile.objects.all():
2021-04-07 21:53:43 +01:00
# SetTunnelfileInfo(drawfile)