2020-05-24 13:30:39 +01:00
import sys
import os
import types
import logging
import stat
2011-07-11 02:10:22 +01:00
import csv
import re
import datetime
2020-05-24 13:30:39 +01:00
2020-05-24 01:57:06 +01:00
from PIL import Image
2011-07-11 02:10:22 +01:00
from utils import save_carefully
2020-05-24 13:30:39 +01:00
from functools import reduce
import settings
2020-06-24 00:18:01 +01:00
from troggle . core . models_survex import SingleScan , ScansFolder , TunnelFile
2020-06-30 15:52:29 +01:00
from troggle . core . models import DataIssue
2011-07-11 02:10:22 +01:00
def get_or_create_placeholder ( year ) :
""" All surveys must be related to a logbookentry. We don ' t have a way to
automatically figure out which survey went with which logbookentry ,
so we create a survey placeholder logbook entry for each year . This
function always returns such a placeholder , and creates it if it doesn ' t
exist yet .
"""
lookupAttribs = { ' date__year ' : int ( year ) , ' title ' : " placeholder for surveys " , }
nonLookupAttribs = { ' text ' : " surveys temporarily attached to this should be re-attached to their actual trips " , ' date ' : datetime . date ( int ( year ) , 1 , 1 ) }
placeholder_logbook_entry , newly_created = save_carefully ( LogbookEntry , lookupAttribs , nonLookupAttribs )
return placeholder_logbook_entry
2020-05-24 01:57:06 +01:00
def listdir ( * directories ) :
try :
return os . listdir ( os . path . join ( settings . SURVEYS , * directories ) )
except :
import urllib . request , urllib . parse , urllib . error
url = settings . SURVEYS + reduce ( lambda x , y : x + " / " + y , [ " listdir " ] + list ( directories ) )
folders = urllib . request . urlopen ( url . replace ( " # " , " % 23 " ) ) . readlines ( )
return [ folder . rstrip ( r " / " ) for folder in folders ]
2011-07-11 02:10:22 +01:00
# handles url or file, so we can refer to a set of scans on another server
def GetListDir ( sdir ) :
res = [ ]
if sdir [ : 7 ] == " http:// " :
assert False , " Not written "
2020-05-24 13:30:39 +01:00
s = urllib . request . urlopen ( sdir )
2011-07-11 02:10:22 +01:00
else :
for f in os . listdir ( sdir ) :
if f [ 0 ] != " . " :
ff = os . path . join ( sdir , f )
res . append ( ( f , ff , os . path . isdir ( ff ) ) )
return res
2020-06-23 23:46:33 +01:00
def LoadListScansFile ( scansfolder ) :
2011-07-11 02:10:22 +01:00
gld = [ ]
2020-04-27 23:51:41 +01:00
# flatten out any directories in these wallet folders - should not be any
2020-06-23 23:46:33 +01:00
for ( fyf , ffyf , fisdiryf ) in GetListDir ( scansfolder . fpath ) :
2011-07-11 02:10:22 +01:00
if fisdiryf :
gld . extend ( GetListDir ( ffyf ) )
else :
gld . append ( ( fyf , ffyf , fisdiryf ) )
2020-05-31 19:23:07 +01:00
c = 0
2011-07-11 02:10:22 +01:00
for ( fyf , ffyf , fisdiryf ) in gld :
2019-02-24 16:46:02 +00:00
#assert not fisdiryf, ffyf
2020-05-31 19:23:07 +01:00
if re . search ( r " \ .(?:png|jpg|jpeg|pdf|svg|gif)(?i)$ " , fyf ) :
2020-06-24 00:18:01 +01:00
singlescan = SingleScan ( ffile = ffyf , name = fyf , scansfolder = scansfolder )
singlescan . save ( )
2020-05-31 19:23:07 +01:00
c + = 1
if c > = 10 :
2020-06-01 00:42:48 +01:00
print ( " . " , end = ' ' )
2020-05-31 19:23:07 +01:00
c = 0
2011-07-11 02:10:22 +01:00
# this iterates through the scans directories (either here or on the remote server)
# and builds up the models we can access later
def LoadListScans ( ) :
2015-01-19 22:41:48 +00:00
2020-05-31 19:23:07 +01:00
print ( ' - Loading Survey Scans ' )
2015-01-19 22:48:50 +00:00
2020-06-24 00:18:01 +01:00
SingleScan . objects . all ( ) . delete ( )
2020-06-23 23:34:08 +01:00
ScansFolder . objects . all ( ) . delete ( )
2020-05-31 19:23:07 +01:00
print ( ' - deleting all scansFolder and scansSingle objects ' )
2011-07-11 02:10:22 +01:00
# first do the smkhs (large kh survey scans) directory
2020-06-23 23:34:08 +01:00
manyscansfoldersmkhs = ScansFolder ( fpath = os . path . join ( settings . SURVEY_SCANS , " ../surveys/smkhs " ) , walletname = " smkhs " )
2020-05-31 19:23:07 +01:00
print ( " smkhs " , end = ' ' )
2020-06-23 23:34:08 +01:00
if os . path . isdir ( manyscansfoldersmkhs . fpath ) :
manyscansfoldersmkhs . save ( )
LoadListScansFile ( manyscansfoldersmkhs )
2011-07-11 02:10:22 +01:00
# iterate into the surveyscans directory
2020-05-24 13:30:39 +01:00
print ( ' - ' , end = ' ' )
2020-05-31 19:23:07 +01:00
for f , ff , fisdir in GetListDir ( settings . SURVEY_SCANS ) :
2011-07-11 02:10:22 +01:00
if not fisdir :
continue
# do the year folders
2019-02-24 14:29:14 +00:00
if re . match ( r " \ d \ d \ d \ d$ " , f ) :
2020-05-24 13:30:39 +01:00
print ( " %s " % f , end = ' ' )
2011-07-11 02:10:22 +01:00
for fy , ffy , fisdiry in GetListDir ( ff ) :
if fisdiry :
assert fisdiry , ffy
2020-06-23 23:46:33 +01:00
scansfolder = ScansFolder ( fpath = ffy , walletname = fy )
scansfolder . save ( )
LoadListScansFile ( scansfolder )
2011-07-11 02:10:22 +01:00
# do the
elif f != " thumbs " :
2020-06-23 23:46:33 +01:00
scansfolder = ScansFolder ( fpath = ff , walletname = f )
scansfolder . save ( )
LoadListScansFile ( scansfolder )
2011-07-11 02:10:22 +01:00
2021-04-07 21:53:43 +01:00
def find_tunnel_scan ( tunnelfile , path ) :
''' Is given a line of text ' path ' which may or may not contain a recognisable name of a scanned file
which we have already seen when we imported all the files we could find in teh surveyscans direstories
'''
2011-07-11 02:10:22 +01:00
scansfolder , scansfile = None , None
2021-04-07 21:53:43 +01:00
mscansdir = re . search ( r " ( \ d \ d \ d \ d#X? \ d+ \ w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg))$ " , path )
2011-07-11 02:10:22 +01:00
if mscansdir :
2020-06-23 23:34:08 +01:00
scansfolderl = ScansFolder . objects . filter ( walletname = mscansdir . group ( 1 ) )
2011-07-11 02:10:22 +01:00
if len ( scansfolderl ) :
assert len ( scansfolderl ) == 1
scansfolder = scansfolderl [ 0 ]
if scansfolder :
2020-06-24 00:18:01 +01:00
scansfilel = scansfolder . singlescan_set . filter ( name = mscansdir . group ( 2 ) )
2011-07-11 02:10:22 +01:00
if len ( scansfilel ) :
2020-04-11 00:36:27 +01:00
if len ( scansfilel ) > 1 :
2020-07-29 22:54:53 +01:00
message = " ! More than one image filename matches filter query. [ {} ]: {} {} {} {} " . format ( scansfilel [ 0 ] , mscansdir . group ( 1 ) , mscansdir . group ( 2 ) , tunnelfile . tunnelpath , path )
print ( message )
2021-04-07 21:53:43 +01:00
DataIssue . objects . create ( parser = ' Tunnel ' , message = message )
2011-07-11 02:10:22 +01:00
scansfile = scansfilel [ 0 ]
2020-07-29 22:54:53 +01:00
2011-07-11 02:10:22 +01:00
if scansfolder :
2020-06-23 23:34:08 +01:00
tunnelfile . manyscansfolders . add ( scansfolder )
2011-07-11 02:10:22 +01:00
if scansfile :
2020-06-24 00:36:32 +01:00
tunnelfile . scans . add ( scansfile )
2011-07-11 02:10:22 +01:00
2021-04-07 21:53:43 +01:00
elif path and not re . search ( r " \ .(?:png|jpg|pdf|jpeg|gif|pdf)$(?i) " , path ) :
2011-07-11 02:10:22 +01:00
name = os . path . split ( path ) [ 1 ]
rtunnelfilel = TunnelFile . objects . filter ( tunnelname = name )
if len ( rtunnelfilel ) :
2020-07-29 22:54:53 +01:00
message = " ! Two paths with same name [ {} ]: {} " . format ( path , name )
print ( message )
2021-04-07 21:53:43 +01:00
DataIssue . objects . create ( parser = ' Tunnel ' , message = message )
2011-07-11 02:10:22 +01:00
rtunnelfile = rtunnelfilel [ 0 ]
tunnelfile . tunnelcontains . add ( rtunnelfile )
tunnelfile . save ( )
2021-04-07 21:53:43 +01:00
def findimageinsert ( therionfile , xth_me ) :
''' Tries to link the drawing file (Therion format) to the referenced image (scan) file
'''
pass
2011-07-11 02:10:22 +01:00
2021-04-07 21:53:43 +01:00
def findimportinsert ( therionfile , imp ) :
''' Tries to link the scrap (Therion format) to the referenced therion scrap
'''
pass
rx_xth_me = re . compile ( r ' xth_me_image_insert.* { .*}$ ' , re . MULTILINE )
rx_scrap = re . compile ( r ' ^survey ( \ w*).*$ ' , re . MULTILINE )
rx_input = re . compile ( r ' ^input ( \ w*).*$ ' , re . MULTILINE )
def settherionfileinfo ( filetuple ) :
''' Read in the drawing file contents and sets values on the tunnelfile object
'''
thtype , therionfile = filetuple
ff = os . path . join ( settings . TUNNEL_DATA , therionfile . tunnelpath )
therionfile . filesize = os . stat ( ff ) [ stat . ST_SIZE ]
if therionfile . filesize < = 0 :
message = " ! Zero length therion file {} " . format ( ff )
print ( message )
DataIssue . objects . create ( parser = ' Therion ' , message = message )
return
fin = open ( ff , ' r ' )
ttext = fin . read ( )
fin . close ( )
# The equivalent for a tunnel 'path' would be a .th2 'line wall' or 'scrap'
# print(len(re.findall(r"line", ttext)))
if thtype == ' th ' :
therionfile . npaths = len ( re . findall ( r " ^input " , ttext , re . MULTILINE ) )
elif thtype == ' th2 ' :
therionfile . npaths = len ( re . findall ( r " ^line " , ttext , re . MULTILINE ) )
therionfile . save ( )
# scan and look for survex blocks that might have been included, and image scans (as for tunnel drawings)
# which would populate tunnelfile.survexfile
# in .th2 files:
# ##XTHERION## xth_me_image_insert {500 1 1.0} {1700 {}} ../../../expofiles/surveyscans/2014/01popped_elev1.jpeg 0 {}
# scrap blownout -projection plan -scale [-81.0 -42.0 216.0 -42.0 0.0 0.0 7.5438 0.0 m]
for xth_me in rx_xth_me . findall ( ttext ) :
message = f ' ! Un-parsed image filename: { therionfile . tunnelname } : { xth_me . split ( ) [ - 3 ] } - { therionfile . tunnelpath } '
#print(message)
DataIssue . objects . create ( parser = ' Therion ' , message = message )
findimageinsert ( therionfile , xth_me )
for inp in rx_input . findall ( ttext ) :
# if this 'input' is a .th2 file we have already seen, then we can assign this as a sub-file
# but we would need to disentangle to get the current path properly
message = f ' ! Un-set Therion .th2 input: - { therionfile . tunnelname } : { inp } - { therionfile . tunnelpath } '
#print(message)
DataIssue . objects . create ( parser = ' Therion ' , message = message )
findimportinsert ( therionfile , inp )
therionfile . save ( )
rx_skpath = re . compile ( rb ' <skpath ' )
rx_pcpath = re . compile ( rb ' <pcarea area_signal= " frame " .*?sfsketch= " ([^ " ]*) " sfstyle= " ([^ " ]*) " ' )
def settunnelfileinfo ( tunnelfile ) :
''' Read in the drawing file contents and sets values on the tunnelfile object
2021-04-08 01:09:06 +01:00
Should try to read the date too e . g . tunneldate = " 2010-08-16 22:51:57
then we could display on the master calendar per expo .
2021-04-07 21:53:43 +01:00
'''
2011-07-11 02:10:22 +01:00
ff = os . path . join ( settings . TUNNEL_DATA , tunnelfile . tunnelpath )
tunnelfile . filesize = os . stat ( ff ) [ stat . ST_SIZE ]
2020-07-29 22:54:53 +01:00
if tunnelfile . filesize < = 0 :
message = " ! Zero length xml file {} " . format ( ff )
print ( message )
DataIssue . objects . create ( parser = ' Drawings ' , message = message )
return
2020-05-24 13:30:39 +01:00
fin = open ( ff , ' rb ' )
2011-07-11 02:10:22 +01:00
ttext = fin . read ( )
fin . close ( )
2021-04-07 21:53:43 +01:00
tunnelfile . npaths = len ( rx_skpath . findall ( ttext ) )
2011-07-11 02:10:22 +01:00
tunnelfile . save ( )
2021-04-07 21:53:43 +01:00
# example drawing file in Tunnel format.
2011-07-11 02:10:22 +01:00
# <tunnelxml tunnelversion="version2009-06-21 Matienzo" tunnelproject="ireby" tunneluser="goatchurch" tunneldate="2009-06-29 23:22:17">
# <pcarea area_signal="frame" sfscaledown="12.282584" sfrotatedeg="-90.76982" sfxtrans="11.676667377221136" sfytrans="-15.677173422877454" sfsketch="204description/scans/plan(38).png" sfstyle="" nodeconnzsetrelative="0.0">
2021-04-07 21:53:43 +01:00
for path , style in rx_pcpath . findall ( ttext ) :
find_tunnel_scan ( tunnelfile , path . decode ( ) )
# should also scan and look for survex blocks that might have been included, and image scans
2020-07-29 22:54:53 +01:00
# which would populate tunnelfile.survexfile
2011-07-11 02:10:22 +01:00
tunnelfile . save ( )
2021-04-07 21:53:43 +01:00
def load_drawings_files ( ) :
''' Breadth first search of drawings directory looking for sub-directories and *.xml filesize
'''
all_xml = [ ]
2020-06-30 15:52:29 +01:00
drawdatadir = settings . TUNNEL_DATA
2011-07-11 02:10:22 +01:00
TunnelFile . objects . all ( ) . delete ( )
2021-04-07 21:53:43 +01:00
DataIssue . objects . filter ( parser = ' Drawings ' ) . delete ( )
DataIssue . objects . filter ( parser = ' Therion ' ) . delete ( )
DataIssue . objects . filter ( parser = ' Tunnel ' ) . delete ( )
2020-06-30 15:52:29 +01:00
drawingsdirs = [ " " ]
while drawingsdirs :
drawdir = drawingsdirs . pop ( )
for f in os . listdir ( os . path . join ( drawdatadir , drawdir ) ) :
2011-07-11 02:10:22 +01:00
if f [ 0 ] == " . " or f [ - 1 ] == " ~ " :
continue
2020-06-30 15:52:29 +01:00
lf = os . path . join ( drawdir , f )
ff = os . path . join ( drawdatadir , lf )
2011-07-11 02:10:22 +01:00
if os . path . isdir ( ff ) :
2021-04-07 21:53:43 +01:00
drawingsdirs . append ( lf ) # lunatic! adding to list in middle of list while loop!
2011-07-11 02:10:22 +01:00
elif f [ - 4 : ] == " .xml " :
2021-04-07 21:53:43 +01:00
# Always creates new
2011-07-11 02:10:22 +01:00
tunnelfile = TunnelFile ( tunnelpath = lf , tunnelname = os . path . split ( f [ : - 4 ] ) [ 1 ] )
tunnelfile . save ( )
2021-04-07 21:53:43 +01:00
all_xml . append ( ( ' xml ' , tunnelfile ) )
elif f [ - 3 : ] == " .th " :
# Always creates new
tunnelfile = TunnelFile ( tunnelpath = lf , tunnelname = os . path . split ( f [ : - 4 ] ) [ 1 ] )
tunnelfile . save ( )
all_xml . append ( ( ' th ' , tunnelfile ) )
elif f [ - 4 : ] == " .th2 " :
# Always creates new
tunnelfile = TunnelFile ( tunnelpath = lf , tunnelname = os . path . split ( f [ : - 4 ] ) [ 1 ] )
tunnelfile . save ( )
all_xml . append ( ( ' th2 ' , tunnelfile ) )
print ( f ' - { len ( all_xml ) } Drawings files found ' )
for d in all_xml :
if d [ 0 ] == ' xml ' :
settunnelfileinfo ( d [ 1 ] )
# important to import .th2 files before .th so that we can assign them when found in .th files
if d [ 0 ] == ' th2 ' :
settherionfileinfo ( d )
if d [ 0 ] == ' th ' :
settherionfileinfo ( d )
# for drawfile in TunnelFile.objects.all():
# SetTunnelfileInfo(drawfile)