troggle-unchained/parsers/surveys.py

import sys
import os
import types
import stat
import csv
import re
import datetime

from PIL import Image
from functools import reduce

import settings
from troggle.core.models.survex import SingleScan, Wallet, DrawingFile
from troggle.core.models.troggle import DataIssue
from troggle.core.utils import save_carefully

'''Scans through all the :drawings: repository looking
for tunnel and therion files
'''


def get_or_create_placeholder(year):
    """ All surveys must be related to a logbookentry. We don't have a way to
        automatically figure out which survey went with which logbookentry,
        so we create a survey placeholder logbook entry for each year. This
        function always returns such a placeholder, and creates it if it doesn't
        exist yet.
    """
    lookupAttribs={'date__year':int(year),  'title':"placeholder for surveys",}
    nonLookupAttribs={'text':"surveys temporarily attached to this should be re-attached to their actual trips", 'date':datetime.date(int(year),1,1)}
    placeholder_logbook_entry, newly_created = save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs)
    return placeholder_logbook_entry

def listdir(*directories):
    try:
        return os.listdir(os.path.join(settings.SURVEYS, *directories))
    except:
        import urllib.request, urllib.parse, urllib.error
        url = settings.SURVEYS + reduce(lambda x, y: x + "/" + y, ["listdir"] + list(directories))
        folders = urllib.request.urlopen(url.replace("#", "%23")).readlines()
        return [folder.rstrip(r"/") for folder in folders]


# handles url or file, so we can refer to a set of scans on another server
def GetListDir(sdir):
    res = [ ]
    if sdir[:7] == "http://":
        # s = urllib.request.urlopen(sdir)
        message = f"! Requesting loading from http:// NOT IMPLEMENTED. [{sdir}]"         
        print(message)
        DataIssue.objects.create(parser='Drawings', message=message)
        sdir[:7] = ""

    for f in os.listdir(sdir):
        if f[0] != ".":
            ff = os.path.join(sdir, f)
            res.append((f, ff, os.path.isdir(ff)))
    return res


def LoadListScansFile(scansfolder):
    gld = [ ]
    # flatten out any directories in these wallet folders - should not be any
    for (fyf, ffyf, fisdiryf) in GetListDir(scansfolder.fpath):
        if fisdiryf:
            gld.extend(GetListDir(ffyf))
        else:
            gld.append((fyf, ffyf, fisdiryf))
    
    c=0
    for (fyf, ffyf, fisdiryf) in gld:
        if re.search(r"\.(?:png|jpg|jpeg|pdf|svg|gif)(?i)$", fyf):
            singlescan = SingleScan(ffile=ffyf, name=fyf, scansfolder=scansfolder)
            singlescan.save()
            c+=1
            if c>=10:
                print(".", end='')
                c = 0

        
# this iterates through the scans directories (either here or on the remote server)
# and builds up the models we can access later
def LoadListScans():

    print(' - Loading Survey Scans')

    SingleScan.objects.all().delete()
    Wallet.objects.all().delete()
    print(' - deleting all scansFolder and scansSingle objects')

    # first do the smkhs (large kh survey scans) directory
    manyscansfoldersmkhs = Wallet(fpath=os.path.join(settings.SURVEY_SCANS, "../surveys/smkhs"), walletname="smkhs") 
    print("smkhs", end=' ')
    if os.path.isdir(manyscansfoldersmkhs.fpath):
        manyscansfoldersmkhs.save()
        LoadListScansFile(manyscansfoldersmkhs)
        
    
    # iterate into the surveyscans directory
    print(' - ', end=' ')
    for f, ff, fisdir in GetListDir(settings.SURVEY_SCANS):
        if not fisdir:
            continue
        
        # do the year folders
        if re.match(r"\d\d\d\d$", f):
            print("%s" % f, end=' ')
            for fy, ffy, fisdiry in GetListDir(ff):
                if fisdiry:
                    scansfolder = Wallet(fpath=ffy, walletname=fy)
                    scansfolder.save()
                    LoadListScansFile(scansfolder)
        
        # do the 
        elif f != "thumbs":
            scansfolder = Wallet(fpath=ff, walletname=f)
            scansfolder.save()
            LoadListScansFile(scansfolder)
            

def find_tunnel_scan(dwgfile, path):
    '''Is given a line of text 'path' which may or may not contain a recognisable name of a scanned file
    which we have already seen when we imported all the files we could find in the surveyscans direstories
    '''
    scansfolder, scansfile = None, None
    mscansdir = re.search(r"(\d\d\d\d#X?\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg))$", path)
    if mscansdir:
        scansfolderl = Wallet.objects.filter(walletname=mscansdir.group(1))
        # This should properly detect if a list of folders is returned and do something sensible, not just pick the first.
        if len(scansfolderl):
            scansfolder = scansfolderl[0]
            if len(scansfolderl) > 1:
                message = "! More than one scan FOLDER matches filter query. [{}]: {} {} {} {}".format(scansfilel[0], mscansdir.group(1), mscansdir.group(2), dwgfile.dwgpath, path)
                print(message)
                DataIssue.objects.create(parser='Tunnel', message=message)
            
        if scansfolder:
            scansfilel = scansfolder.singlescan_set.filter(name=mscansdir.group(2))
            if len(scansfilel):
                if len(scansfilel) > 1:
                    message = "! More than one image FILENAME matches filter query. [{}]: {} {} {} {}".format(scansfilel[0], mscansdir.group(1), mscansdir.group(2), dwgfile.dwgpath, path)
                    print(message)
                    DataIssue.objects.create(parser='Tunnel', message=message)
                scansfile = scansfilel[0]

        if scansfolder:
            dwgfile.manyscansfolders.add(scansfolder)
        if scansfile:
            dwgfile.scans.add(scansfile)
    
    elif path and not re.search(r"\.(?:png|jpg|pdf|jpeg|gif|pdf)$(?i)", path):
        name = os.path.split(path)[1]
        rdwgfilel = DrawingFile.objects.filter(dwgname=name)
        if len(rdwgfilel):
            message = "! Two paths with same name [{}]: {}".format(path, name)
            print(message)
            DataIssue.objects.create(parser='Tunnel', message=message)
            rdwgfile = rdwgfilel[0]
            dwgfile.dwgcontains.add(rdwgfile)

    dwgfile.save()

def findimageinsert(therionfile, xth_me):
    '''Tries to link the drawing file (Therion format) to the referenced image (scan) file
    '''
    pass

def findimportinsert(therionfile, imp):
    '''Tries to link the scrap (Therion format) to the referenced therion scrap
    '''
    pass

rx_xth_me = re.compile(r'xth_me_image_insert.*{.*}$', re.MULTILINE)
rx_scrap  = re.compile(r'^survey (\w*).*$', re.MULTILINE)
rx_input  = re.compile(r'^input (\w*).*$', re.MULTILINE)

def settherionfileinfo(filetuple):
    '''Read in the drawing file contents and sets values on the dwgfile object
    '''
    thtype, therionfile = filetuple
    
    ff = os.path.join(settings.DRAWINGS_DATA, therionfile.dwgpath)
    therionfile.filesize = os.stat(ff)[stat.ST_SIZE]
    if therionfile.filesize <= 0:
        message = "! Zero length therion file {}".format(ff)
        print(message)
        DataIssue.objects.create(parser='Therion', message=message)
        return
    fin = open(ff,'r')
    ttext = fin.read()
    fin.close()
    
    # The equivalent for a tunnel 'path' would be a .th2 'line wall'  or 'scrap'
    # print(len(re.findall(r"line", ttext)))
    if thtype=='th':
        therionfile.npaths = len(re.findall(r"^input ", ttext, re.MULTILINE))
    elif thtype=='th2':
        therionfile.npaths = len(re.findall(r"^line ", ttext, re.MULTILINE))
    therionfile.save()
  
    # scan and look for survex blocks that might have been included, and image scans (as for tunnel drawings)
    # which would populate dwgfile.survexfile
    
    # in .th2 files:
    # ##XTHERION## xth_me_image_insert {500 1 1.0} {1700 {}} ../../../expofiles/surveyscans/2014/01popped_elev1.jpeg 0 {}
    # scrap blownout -projection plan -scale [-81.0 -42.0 216.0 -42.0 0.0 0.0 7.5438 0.0 m]
    
    for xth_me in rx_xth_me.findall(ttext):
        message = f'! Un-parsed image filename: {therionfile.dwgname} : {xth_me.split()[-3]} - {therionfile.dwgpath}'
        #print(message)
        DataIssue.objects.create(parser='Therion', message=message)
        findimageinsert(therionfile, xth_me)
        
    for inp in rx_input.findall(ttext):
        # if this 'input' is a .th2 file we have already seen, then we can assign this as a sub-file
        # but we would need to disentangle to get the current path properly
        message = f'! Un-set Therion .th2 input: - {therionfile.dwgname} : {inp} - {therionfile.dwgpath}'
        #print(message)
        DataIssue.objects.create(parser='Therion', message=message)
        findimportinsert(therionfile, inp)
    
    therionfile.save()
    
rx_skpath = re.compile(rb'<skpath')
rx_pcpath = re.compile(rb'<pcarea area_signal="frame".*?sfsketch="([^"]*)" sfstyle="([^"]*)"')

def setdwgfileinfo(dwgfile):
    '''Read in the drawing file contents and sets values on the dwgfile object
    Should try to read the date too e.g. tunneldate="2010-08-16 22:51:57
    then we could display on the master calendar per expo.
    '''
    ff = os.path.join(settings.DRAWINGS_DATA, dwgfile.dwgpath)
    dwgfile.filesize = os.stat(ff)[stat.ST_SIZE]
    if dwgfile.filesize <= 0:
        message = "! Zero length xml file {}".format(ff)
        print(message)
        DataIssue.objects.create(parser='Drawings', message=message)
        return
    fin = open(ff,'rb')
    ttext = fin.read()
    fin.close()
   
    dwgfile.npaths = len(rx_skpath.findall(ttext))
    dwgfile.save()
    
    # example drawing file in Tunnel format.
    # <tunnelxml tunnelversion="version2009-06-21 Matienzo" tunnelproject="ireby" tunneluser="goatchurch" tunneldate="2009-06-29 23:22:17">
    # <pcarea area_signal="frame" sfscaledown="12.282584" sfrotatedeg="-90.76982" sfxtrans="11.676667377221136" sfytrans="-15.677173422877454" sfsketch="204description/scans/plan(38).png" sfstyle="" nodeconnzsetrelative="0.0">
    
    for path, style in rx_pcpath.findall(ttext):
        find_tunnel_scan(dwgfile, path.decode())
    
    # should also scan and look for survex blocks that might have been included, and image scans
    # which would populate dwgfile.survexfile

    dwgfile.save()


def load_drawings_files():
    '''Breadth first search of drawings directory looking for sub-directories and *.xml filesize
    '''
    all_xml = []
    drawdatadir = settings.DRAWINGS_DATA
    DrawingFile.objects.all().delete()
    DataIssue.objects.filter(parser='Drawings').delete()
    DataIssue.objects.filter(parser='Therion').delete()
    DataIssue.objects.filter(parser='Tunnel').delete()

    drawingsdirs = [ "" ]
    while drawingsdirs:
        drawdir = drawingsdirs.pop()
        for f in os.listdir(os.path.join(drawdatadir, drawdir)):
            if f[0] == "." or f[-1] == "~":
                continue
            lf = os.path.join(drawdir, f)
            ff = os.path.join(drawdatadir, lf)
            if os.path.isdir(ff):
                drawingsdirs.append(lf) # lunatic! adding to list in middle of list while loop!
            elif f[-4:] == ".xml":
                # Always creates new
                dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
                dwgfile.save()
                all_xml.append(('xml',dwgfile))
            elif f[-3:] == ".th":
                # Always creates new
                dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
                dwgfile.save()
                all_xml.append(('th',dwgfile))
            elif f[-4:] == ".th2":
                # Always creates new
                dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
                dwgfile.save()
                all_xml.append(('th2',dwgfile))

    print(f' - {len(all_xml)} Drawings files found')

    for d in all_xml:
        if d[0] == 'xml':
            setdwgfileinfo(d[1])
        # important to import .th2 files before .th so that we can assign them when found in .th files
        if d[0] == 'th2':
            settherionfileinfo(d)
        if d[0] == 'th':
            settherionfileinfo(d)
           
    # for drawfile in DrawingFile.objects.all():
        # SetTunnelfileInfo(drawfile)
unused code commented out 2020-05-24 13:30:39 +01:00			`import sys`
			`import os`
			`import types`
			`import stat`
remove all the DOS linefeeds 2011-07-11 02:10:22 +01:00			`import csv`
			`import re`
			`import datetime`
unused code commented out 2020-05-24 13:30:39 +01:00
Convert codebase for python3 usage 2020-05-24 01:57:06 +01:00			`from PIL import Image`
unused code commented out 2020-05-24 13:30:39 +01:00			`from functools import reduce`

			`import settings`
rename ScansFolder class as Wallet 2021-04-26 18:18:16 +01:00			`from troggle.core.models.survex import SingleScan, Wallet, DrawingFile`
create core/models/ directroy 2021-04-13 00:43:57 +01:00			`from troggle.core.models.troggle import DataIssue`
moving save_carefully() 2021-04-13 00:11:08 +01:00			`from troggle.core.utils import save_carefully`
rename Tunnel files to Drawings files - phase 1 2020-06-30 15:52:29 +01:00
Docstrings for all modules 2021-04-13 01:37:42 +01:00			`'''Scans through all the :drawings: repository looking`
			`for tunnel and therion files`
			`'''`

remove all the DOS linefeeds 2011-07-11 02:10:22 +01:00
			`def get_or_create_placeholder(year):`
			`""" All surveys must be related to a logbookentry. We don't have a way to`
			`automatically figure out which survey went with which logbookentry,`
			`so we create a survey placeholder logbook entry for each year. This`
			`function always returns such a placeholder, and creates it if it doesn't`
			`exist yet.`
			`"""`
			`lookupAttribs={'date__year':int(year), 'title':"placeholder for surveys",}`
			`nonLookupAttribs={'text':"surveys temporarily attached to this should be re-attached to their actual trips", 'date':datetime.date(int(year),1,1)}`
			`placeholder_logbook_entry, newly_created = save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs)`
			`return placeholder_logbook_entry`

Convert codebase for python3 usage 2020-05-24 01:57:06 +01:00			`def listdir(*directories):`
			`try:`
			`return os.listdir(os.path.join(settings.SURVEYS, *directories))`
			`except:`
			`import urllib.request, urllib.parse, urllib.error`
			`url = settings.SURVEYS + reduce(lambda x, y: x + "/" + y, ["listdir"] + list(directories))`
			`folders = urllib.request.urlopen(url.replace("#", "%23")).readlines()`
			`return [folder.rstrip(r"/") for folder in folders]`
remove all the DOS linefeeds 2011-07-11 02:10:22 +01:00

			`# handles url or file, so we can refer to a set of scans on another server`
			`def GetListDir(sdir):`
			`res = [ ]`
			`if sdir[:7] == "http://":`
replace assert() with message logging 2021-04-13 22:27:01 +01:00			`# s = urllib.request.urlopen(sdir)`
			`message = f"! Requesting loading from http:// NOT IMPLEMENTED. [{sdir}]"`
			`print(message)`
			`DataIssue.objects.create(parser='Drawings', message=message)`
			`sdir[:7] = ""`

			`for f in os.listdir(sdir):`
			`if f[0] != ".":`
			`ff = os.path.join(sdir, f)`
			`res.append((f, ff, os.path.isdir(ff)))`
remove all the DOS linefeeds 2011-07-11 02:10:22 +01:00			`return res`


simple rename survexscansfolder to scansfolder 2020-06-23 23:46:33 +01:00			`def LoadListScansFile(scansfolder):`
remove all the DOS linefeeds 2011-07-11 02:10:22 +01:00			`gld = [ ]`
Thorough spring clean and profiling 2020-04-27 23:51:41 +01:00			`# flatten out any directories in these wallet folders - should not be any`
simple rename survexscansfolder to scansfolder 2020-06-23 23:46:33 +01:00			`for (fyf, ffyf, fisdiryf) in GetListDir(scansfolder.fpath):`
remove all the DOS linefeeds 2011-07-11 02:10:22 +01:00			`if fisdiryf:`
			`gld.extend(GetListDir(ffyf))`
			`else:`
			`gld.append((fyf, ffyf, fisdiryf))`

Adding progress dots to import print output and fix SURVEY_SCANS 2020-05-31 19:23:07 +01:00			`c=0`
remove all the DOS linefeeds 2011-07-11 02:10:22 +01:00			`for (fyf, ffyf, fisdiryf) in gld:`
Adding progress dots to import print output and fix SURVEY_SCANS 2020-05-31 19:23:07 +01:00			`if re.search(r"\.(?:png\|jpg\|jpeg\|pdf\|svg\|gif)(?i)$", fyf):`
Simple renaming too-similar variables 2020-06-24 00:18:01 +01:00			`singlescan = SingleScan(ffile=ffyf, name=fyf, scansfolder=scansfolder)`
			`singlescan.save()`
Adding progress dots to import print output and fix SURVEY_SCANS 2020-05-31 19:23:07 +01:00			`c+=1`
			`if c>=10:`
Progress dots on importing data 2020-06-01 00:42:48 +01:00			`print(".", end='')`
Adding progress dots to import print output and fix SURVEY_SCANS 2020-05-31 19:23:07 +01:00			`c = 0`
remove all the DOS linefeeds 2011-07-11 02:10:22 +01:00

			`# this iterates through the scans directories (either here or on the remote server)`
			`# and builds up the models we can access later`
			`def LoadListScans():`
Add a little verbosity 2015-01-19 22:41:48 +00:00
Adding progress dots to import print output and fix SURVEY_SCANS 2020-05-31 19:23:07 +01:00			`print(' - Loading Survey Scans')`
A little more verbosity 2015-01-19 22:48:50 +00:00
Simple renaming too-similar variables 2020-06-24 00:18:01 +01:00			`SingleScan.objects.all().delete()`
rename ScansFolder class as Wallet 2021-04-26 18:18:16 +01:00			`Wallet.objects.all().delete()`
Adding progress dots to import print output and fix SURVEY_SCANS 2020-05-31 19:23:07 +01:00			`print(' - deleting all scansFolder and scansSingle objects')`
remove all the DOS linefeeds 2011-07-11 02:10:22 +01:00
			`# first do the smkhs (large kh survey scans) directory`
rename ScansFolder class as Wallet 2021-04-26 18:18:16 +01:00			`manyscansfoldersmkhs = Wallet(fpath=os.path.join(settings.SURVEY_SCANS, "../surveys/smkhs"), walletname="smkhs")`
Adding progress dots to import print output and fix SURVEY_SCANS 2020-05-31 19:23:07 +01:00			`print("smkhs", end=' ')`
rename SurvexScansFolders and tidy survex parser 2020-06-23 23:34:08 +01:00			`if os.path.isdir(manyscansfoldersmkhs.fpath):`
			`manyscansfoldersmkhs.save()`
			`LoadListScansFile(manyscansfoldersmkhs)`
remove all the DOS linefeeds 2011-07-11 02:10:22 +01:00

			`# iterate into the surveyscans directory`
unused code commented out 2020-05-24 13:30:39 +01:00			`print(' - ', end=' ')`
Adding progress dots to import print output and fix SURVEY_SCANS 2020-05-31 19:23:07 +01:00			`for f, ff, fisdir in GetListDir(settings.SURVEY_SCANS):`
remove all the DOS linefeeds 2011-07-11 02:10:22 +01:00			`if not fisdir:`
			`continue`

			`# do the year folders`
Make the suryeys importer not explode 2019-02-24 14:29:14 +00:00			`if re.match(r"\d\d\d\d$", f):`
unused code commented out 2020-05-24 13:30:39 +01:00			`print("%s" % f, end=' ')`
remove all the DOS linefeeds 2011-07-11 02:10:22 +01:00			`for fy, ffy, fisdiry in GetListDir(ff):`
			`if fisdiry:`
rename ScansFolder class as Wallet 2021-04-26 18:18:16 +01:00			`scansfolder = Wallet(fpath=ffy, walletname=fy)`
simple rename survexscansfolder to scansfolder 2020-06-23 23:46:33 +01:00			`scansfolder.save()`
			`LoadListScansFile(scansfolder)`
remove all the DOS linefeeds 2011-07-11 02:10:22 +01:00
			`# do the`
			`elif f != "thumbs":`
rename ScansFolder class as Wallet 2021-04-26 18:18:16 +01:00			`scansfolder = Wallet(fpath=ff, walletname=f)`
simple rename survexscansfolder to scansfolder 2020-06-23 23:46:33 +01:00			`scansfolder.save()`
			`LoadListScansFile(scansfolder)`
remove all the DOS linefeeds 2011-07-11 02:10:22 +01:00

renamed tunnel to drawing or dwg 2021-04-26 18:08:42 +01:00			`def find_tunnel_scan(dwgfile, path):`
Import Therion files too 2021-04-07 21:53:43 +01:00			`'''Is given a line of text 'path' which may or may not contain a recognisable name of a scanned file`
replace assert() with message logging 2021-04-13 22:27:01 +01:00			`which we have already seen when we imported all the files we could find in the surveyscans direstories`
Import Therion files too 2021-04-07 21:53:43 +01:00			`'''`
remove all the DOS linefeeds 2011-07-11 02:10:22 +01:00			`scansfolder, scansfile = None, None`
Import Therion files too 2021-04-07 21:53:43 +01:00			`mscansdir = re.search(r"(\d\d\d\d#X?\d+\w?\|1995-96kh\|92-94Surveybookkh\|1991surveybook\|smkhs)/(.*?(?:png\|jpg\|pdf\|jpeg))$", path)`
remove all the DOS linefeeds 2011-07-11 02:10:22 +01:00			`if mscansdir:`
rename ScansFolder class as Wallet 2021-04-26 18:18:16 +01:00			`scansfolderl = Wallet.objects.filter(walletname=mscansdir.group(1))`
replace assert() with message logging 2021-04-13 22:27:01 +01:00			`# This should properly detect if a list of folders is returned and do something sensible, not just pick the first.`
remove all the DOS linefeeds 2011-07-11 02:10:22 +01:00			`if len(scansfolderl):`
			`scansfolder = scansfolderl[0]`
replace assert() with message logging 2021-04-13 22:27:01 +01:00			`if len(scansfolderl) > 1:`
tunnelpath to dwgpath 2021-04-26 18:11:14 +01:00			`message = "! More than one scan FOLDER matches filter query. [{}]: {} {} {} {}".format(scansfilel[0], mscansdir.group(1), mscansdir.group(2), dwgfile.dwgpath, path)`
replace assert() with message logging 2021-04-13 22:27:01 +01:00			`print(message)`
			`DataIssue.objects.create(parser='Tunnel', message=message)`

remove all the DOS linefeeds 2011-07-11 02:10:22 +01:00			`if scansfolder:`
Simple renaming too-similar variables 2020-06-24 00:18:01 +01:00			`scansfilel = scansfolder.singlescan_set.filter(name=mscansdir.group(2))`
remove all the DOS linefeeds 2011-07-11 02:10:22 +01:00			`if len(scansfilel):`
Fixed bad import of surveyscans references from tunnel files 2020-04-11 00:36:27 +01:00			`if len(scansfilel) > 1:`
tunnelpath to dwgpath 2021-04-26 18:11:14 +01:00			`message = "! More than one image FILENAME matches filter query. [{}]: {} {} {} {}".format(scansfilel[0], mscansdir.group(1), mscansdir.group(2), dwgfile.dwgpath, path)`
Fix skipped import error messages for drawings 2020-07-29 22:54:53 +01:00			`print(message)`
Import Therion files too 2021-04-07 21:53:43 +01:00			`DataIssue.objects.create(parser='Tunnel', message=message)`
remove all the DOS linefeeds 2011-07-11 02:10:22 +01:00			`scansfile = scansfilel[0]`
Fix skipped import error messages for drawings 2020-07-29 22:54:53 +01:00
remove all the DOS linefeeds 2011-07-11 02:10:22 +01:00			`if scansfolder:`
renamed tunnel to drawing or dwg 2021-04-26 18:08:42 +01:00			`dwgfile.manyscansfolders.add(scansfolder)`
remove all the DOS linefeeds 2011-07-11 02:10:22 +01:00			`if scansfile:`
renamed tunnel to drawing or dwg 2021-04-26 18:08:42 +01:00			`dwgfile.scans.add(scansfile)`
remove all the DOS linefeeds 2011-07-11 02:10:22 +01:00
Import Therion files too 2021-04-07 21:53:43 +01:00			`elif path and not re.search(r"\.(?:png\|jpg\|pdf\|jpeg\|gif\|pdf)$(?i)", path):`
remove all the DOS linefeeds 2011-07-11 02:10:22 +01:00			`name = os.path.split(path)[1]`
rename tunnelname as dwgname 2021-04-26 18:37:59 +01:00			`rdwgfilel = DrawingFile.objects.filter(dwgname=name)`
renamed tunnel to drawing or dwg 2021-04-26 18:08:42 +01:00			`if len(rdwgfilel):`
Fix skipped import error messages for drawings 2020-07-29 22:54:53 +01:00			`message = "! Two paths with same name [{}]: {}".format(path, name)`
			`print(message)`
Import Therion files too 2021-04-07 21:53:43 +01:00			`DataIssue.objects.create(parser='Tunnel', message=message)`
renamed tunnel to drawing or dwg 2021-04-26 18:08:42 +01:00			`rdwgfile = rdwgfilel[0]`
reanem tunnelcontains to dwgcontains 2021-04-26 18:54:17 +01:00			`dwgfile.dwgcontains.add(rdwgfile)`
remove all the DOS linefeeds 2011-07-11 02:10:22 +01:00
renamed tunnel to drawing or dwg 2021-04-26 18:08:42 +01:00			`dwgfile.save()`
remove all the DOS linefeeds 2011-07-11 02:10:22 +01:00
Import Therion files too 2021-04-07 21:53:43 +01:00			`def findimageinsert(therionfile, xth_me):`
			`'''Tries to link the drawing file (Therion format) to the referenced image (scan) file`
			`'''`
			`pass`
remove all the DOS linefeeds 2011-07-11 02:10:22 +01:00
Import Therion files too 2021-04-07 21:53:43 +01:00			`def findimportinsert(therionfile, imp):`
			`'''Tries to link the scrap (Therion format) to the referenced therion scrap`
			`'''`
			`pass`

			`rx_xth_me = re.compile(r'xth_me_image_insert.{.}$', re.MULTILINE)`
			`rx_scrap = re.compile(r'^survey (\w).$', re.MULTILINE)`
			`rx_input = re.compile(r'^input (\w).$', re.MULTILINE)`

			`def settherionfileinfo(filetuple):`
renamed tunnel to drawing or dwg 2021-04-26 18:08:42 +01:00			`'''Read in the drawing file contents and sets values on the dwgfile object`
Import Therion files too 2021-04-07 21:53:43 +01:00			`'''`
			`thtype, therionfile = filetuple`

rename TUNNEL_DATA as DRAWINGS_DATA 2021-04-26 18:42:10 +01:00			`ff = os.path.join(settings.DRAWINGS_DATA, therionfile.dwgpath)`
Import Therion files too 2021-04-07 21:53:43 +01:00			`therionfile.filesize = os.stat(ff)[stat.ST_SIZE]`
			`if therionfile.filesize <= 0:`
			`message = "! Zero length therion file {}".format(ff)`
			`print(message)`
			`DataIssue.objects.create(parser='Therion', message=message)`
			`return`
			`fin = open(ff,'r')`
			`ttext = fin.read()`
			`fin.close()`

			`# The equivalent for a tunnel 'path' would be a .th2 'line wall' or 'scrap'`
			`# print(len(re.findall(r"line", ttext)))`
			`if thtype=='th':`
			`therionfile.npaths = len(re.findall(r"^input ", ttext, re.MULTILINE))`
			`elif thtype=='th2':`
			`therionfile.npaths = len(re.findall(r"^line ", ttext, re.MULTILINE))`
			`therionfile.save()`

			`# scan and look for survex blocks that might have been included, and image scans (as for tunnel drawings)`
renamed tunnel to drawing or dwg 2021-04-26 18:08:42 +01:00			`# which would populate dwgfile.survexfile`
Import Therion files too 2021-04-07 21:53:43 +01:00
			`# in .th2 files:`
			`# ##XTHERION## xth_me_image_insert {500 1 1.0} {1700 {}} ../../../expofiles/surveyscans/2014/01popped_elev1.jpeg 0 {}`
			`# scrap blownout -projection plan -scale [-81.0 -42.0 216.0 -42.0 0.0 0.0 7.5438 0.0 m]`

			`for xth_me in rx_xth_me.findall(ttext):`
rename tunnelname as dwgname 2021-04-26 18:37:59 +01:00			`message = f'! Un-parsed image filename: {therionfile.dwgname} : {xth_me.split()[-3]} - {therionfile.dwgpath}'`
Import Therion files too 2021-04-07 21:53:43 +01:00			`#print(message)`
			`DataIssue.objects.create(parser='Therion', message=message)`
			`findimageinsert(therionfile, xth_me)`

			`for inp in rx_input.findall(ttext):`
			`# if this 'input' is a .th2 file we have already seen, then we can assign this as a sub-file`
			`# but we would need to disentangle to get the current path properly`
rename tunnelname as dwgname 2021-04-26 18:37:59 +01:00			`message = f'! Un-set Therion .th2 input: - {therionfile.dwgname} : {inp} - {therionfile.dwgpath}'`
Import Therion files too 2021-04-07 21:53:43 +01:00			`#print(message)`
			`DataIssue.objects.create(parser='Therion', message=message)`
			`findimportinsert(therionfile, inp)`

			`therionfile.save()`

			`rx_skpath = re.compile(rb'<skpath')`
			`rx_pcpath = re.compile(rb'<pcarea area_signal="frame".?sfsketch="([^"])" sfstyle="([^"]*)"')`

renamed tunnel to drawing or dwg 2021-04-26 18:08:42 +01:00			`def setdwgfileinfo(dwgfile):`
			`'''Read in the drawing file contents and sets values on the dwgfile object`
Therion files now handled 2021-04-08 01:09:06 +01:00			`Should try to read the date too e.g. tunneldate="2010-08-16 22:51:57`
			`then we could display on the master calendar per expo.`
Import Therion files too 2021-04-07 21:53:43 +01:00			`'''`
rename TUNNEL_DATA as DRAWINGS_DATA 2021-04-26 18:42:10 +01:00			`ff = os.path.join(settings.DRAWINGS_DATA, dwgfile.dwgpath)`
renamed tunnel to drawing or dwg 2021-04-26 18:08:42 +01:00			`dwgfile.filesize = os.stat(ff)[stat.ST_SIZE]`
			`if dwgfile.filesize <= 0:`
Fix skipped import error messages for drawings 2020-07-29 22:54:53 +01:00			`message = "! Zero length xml file {}".format(ff)`
			`print(message)`
			`DataIssue.objects.create(parser='Drawings', message=message)`
			`return`
unused code commented out 2020-05-24 13:30:39 +01:00			`fin = open(ff,'rb')`
remove all the DOS linefeeds 2011-07-11 02:10:22 +01:00			`ttext = fin.read()`
			`fin.close()`
Import Therion files too 2021-04-07 21:53:43 +01:00
renamed tunnel to drawing or dwg 2021-04-26 18:08:42 +01:00			`dwgfile.npaths = len(rx_skpath.findall(ttext))`
			`dwgfile.save()`
remove all the DOS linefeeds 2011-07-11 02:10:22 +01:00
Import Therion files too 2021-04-07 21:53:43 +01:00			`# example drawing file in Tunnel format.`
remove all the DOS linefeeds 2011-07-11 02:10:22 +01:00			`# <tunnelxml tunnelversion="version2009-06-21 Matienzo" tunnelproject="ireby" tunneluser="goatchurch" tunneldate="2009-06-29 23:22:17">`
			`# <pcarea area_signal="frame" sfscaledown="12.282584" sfrotatedeg="-90.76982" sfxtrans="11.676667377221136" sfytrans="-15.677173422877454" sfsketch="204description/scans/plan(38).png" sfstyle="" nodeconnzsetrelative="0.0">`

Import Therion files too 2021-04-07 21:53:43 +01:00			`for path, style in rx_pcpath.findall(ttext):`
renamed tunnel to drawing or dwg 2021-04-26 18:08:42 +01:00			`find_tunnel_scan(dwgfile, path.decode())`
Import Therion files too 2021-04-07 21:53:43 +01:00
			`# should also scan and look for survex blocks that might have been included, and image scans`
renamed tunnel to drawing or dwg 2021-04-26 18:08:42 +01:00			`# which would populate dwgfile.survexfile`
Fix skipped import error messages for drawings 2020-07-29 22:54:53 +01:00
renamed tunnel to drawing or dwg 2021-04-26 18:08:42 +01:00			`dwgfile.save()`
remove all the DOS linefeeds 2011-07-11 02:10:22 +01:00

Import Therion files too 2021-04-07 21:53:43 +01:00			`def load_drawings_files():`
			`'''Breadth first search of drawings directory looking for sub-directories and *.xml filesize`
			`'''`
			`all_xml = []`
rename TUNNEL_DATA as DRAWINGS_DATA 2021-04-26 18:42:10 +01:00			`drawdatadir = settings.DRAWINGS_DATA`
renamed tunnel to drawing or dwg 2021-04-26 18:08:42 +01:00			`DrawingFile.objects.all().delete()`
Import Therion files too 2021-04-07 21:53:43 +01:00			`DataIssue.objects.filter(parser='Drawings').delete()`
			`DataIssue.objects.filter(parser='Therion').delete()`
			`DataIssue.objects.filter(parser='Tunnel').delete()`
rename Tunnel files to Drawings files - phase 1 2020-06-30 15:52:29 +01:00
			`drawingsdirs = [ "" ]`
			`while drawingsdirs:`
			`drawdir = drawingsdirs.pop()`
			`for f in os.listdir(os.path.join(drawdatadir, drawdir)):`
remove all the DOS linefeeds 2011-07-11 02:10:22 +01:00			`if f[0] == "." or f[-1] == "~":`
			`continue`
rename Tunnel files to Drawings files - phase 1 2020-06-30 15:52:29 +01:00			`lf = os.path.join(drawdir, f)`
			`ff = os.path.join(drawdatadir, lf)`
remove all the DOS linefeeds 2011-07-11 02:10:22 +01:00			`if os.path.isdir(ff):`
Import Therion files too 2021-04-07 21:53:43 +01:00			`drawingsdirs.append(lf) # lunatic! adding to list in middle of list while loop!`
remove all the DOS linefeeds 2011-07-11 02:10:22 +01:00			`elif f[-4:] == ".xml":`
Import Therion files too 2021-04-07 21:53:43 +01:00			`# Always creates new`
rename tunnelname as dwgname 2021-04-26 18:37:59 +01:00			`dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])`
renamed tunnel to drawing or dwg 2021-04-26 18:08:42 +01:00			`dwgfile.save()`
			`all_xml.append(('xml',dwgfile))`
Import Therion files too 2021-04-07 21:53:43 +01:00			`elif f[-3:] == ".th":`
			`# Always creates new`
rename tunnelname as dwgname 2021-04-26 18:37:59 +01:00			`dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])`
renamed tunnel to drawing or dwg 2021-04-26 18:08:42 +01:00			`dwgfile.save()`
			`all_xml.append(('th',dwgfile))`
Import Therion files too 2021-04-07 21:53:43 +01:00			`elif f[-4:] == ".th2":`
			`# Always creates new`
rename tunnelname as dwgname 2021-04-26 18:37:59 +01:00			`dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])`
renamed tunnel to drawing or dwg 2021-04-26 18:08:42 +01:00			`dwgfile.save()`
			`all_xml.append(('th2',dwgfile))`
Import Therion files too 2021-04-07 21:53:43 +01:00
			`print(f' - {len(all_xml)} Drawings files found')`

			`for d in all_xml:`
			`if d[0] == 'xml':`
renamed tunnel to drawing or dwg 2021-04-26 18:08:42 +01:00			`setdwgfileinfo(d[1])`
Import Therion files too 2021-04-07 21:53:43 +01:00			`# important to import .th2 files before .th so that we can assign them when found in .th files`
			`if d[0] == 'th2':`
			`settherionfileinfo(d)`
			`if d[0] == 'th':`
			`settherionfileinfo(d)`

renamed tunnel to drawing or dwg 2021-04-26 18:08:42 +01:00			`# for drawfile in DrawingFile.objects.all():`
Import Therion files too 2021-04-07 21:53:43 +01:00			`# SetTunnelfileInfo(drawfile)`