import sys import os import types import stat import csv import re import datetime from PIL import Image from functools import reduce import settings from troggle.core.models.survex import SingleScan, Wallet, DrawingFile from troggle.core.models.troggle import DataIssue from troggle.core.utils import save_carefully '''Searches through all the :drawings: repository looking for tunnel and therion files Searches through all the survey scans directories in expofiles, looking for images to be referenced. ''' def get_or_create_placeholder(year): """ All surveys must be related to a logbookentry. We don't have a way to automatically figure out which survey went with which logbookentry, so we create a survey placeholder logbook entry for each year. This function always returns such a placeholder, and creates it if it doesn't exist yet. """ lookupAttribs={'date__year':int(year), 'title':"placeholder for surveys",} nonLookupAttribs={'text':"surveys temporarily attached to this should be re-attached to their actual trips", 'date':datetime.date(int(year),1,1)} placeholder_logbook_entry, newly_created = save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs) return placeholder_logbook_entry def listdir(*directories): try: return os.listdir(os.path.join(settings.SURVEYS, *directories)) except: import urllib.request, urllib.parse, urllib.error url = settings.SURVEYS + reduce(lambda x, y: x + "/" + y, ["listdir"] + list(directories)) folders = urllib.request.urlopen(url.replace("#", "%23")).readlines() return [folder.rstrip(r"/") for folder in folders] # handles url or file, so we can refer to a set of scans (not drawings) on another server def GetListDir(sdir): res = [ ] if sdir[:7] == "http://": # s = urllib.request.urlopen(sdir) message = f"! Requesting loading from http:// NOT IMPLEMENTED. [{sdir}]" print(message) DataIssue.objects.create(parser='Drawings', message=message) sdir[:7] = "" for f in os.listdir(sdir): if f[0] != ".": ff = os.path.join(sdir, f) res.append((f, ff, os.path.isdir(ff))) return res def LoadListScansFile(wallet): gld = [ ] # flatten out any directories in these wallet folders - should not be any for (fyf, ffyf, fisdiryf) in GetListDir(wallet.fpath): if fisdiryf: gld.extend(GetListDir(ffyf)) else: gld.append((fyf, ffyf, fisdiryf)) c=0 for (fyf, ffyf, fisdiryf) in gld: if re.search(r"\.(?:png|jpg|jpeg|pdf|svg|gif)(?i)$", fyf): singlescan = SingleScan(ffile=ffyf, name=fyf, wallet=wallet) singlescan.save() c+=1 if c>=10: print(".", end='') c = 0 # this iterates through the scans directories (either here or on the remote server) # and builds up the models we can access later def load_all_scans(): print(' - Loading Survey Scans') SingleScan.objects.all().delete() Wallet.objects.all().delete() print(' - deleting all scansFolder and scansSingle objects') # first do the smkhs (large kh survey scans) directory manywallets_smkhs = Wallet(fpath=os.path.join(settings.SURVEY_SCANS, "../surveys/smkhs"), walletname="smkhs") print("smkhs", end=' ') if os.path.isdir(manywallets_smkhs.fpath): manywallets_smkhs.save() LoadListScansFile(manywallets_smkhs) # iterate into the surveyscans directory print(' - ', end=' ') for f, ff, fisdir in GetListDir(settings.SURVEY_SCANS): if not fisdir: continue # do the year folders if re.match(r"\d\d\d\d$", f): print("%s" % f, end=' ') for fy, ffy, fisdiry in GetListDir(ff): if fisdiry: wallet = Wallet(fpath=ffy, walletname=fy) wallet.save() LoadListScansFile(wallet) # do the elif f != "thumbs": wallet = Wallet(fpath=ff, walletname=f) wallet.save() LoadListScansFile(wallet) print("", flush=True) def find_tunnel_file(dwgfile, path): '''Is given a line of text 'path' which may or may not contain a recognisable name of a scanned file which we have already seen when we imported all the files we could find in the surveyscans direstories ''' wallet, scansfile = None, None mscansdir = re.search(r"(\d\d\d\d#X?\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg|gif))$", path) if mscansdir: scanswalletl = Wallet.objects.filter(walletname=mscansdir.group(1)) # This should be chnaged to properly detect if a list of folders is returned and do something sensible, not just pick the first. if len(scanswalletl): wallet = scanswalletl[0] if len(scanswalletl) > 1: message = "! More than one scan FOLDER matches filter query. [{}]: {} {} {} {}".format(scansfilel[0], mscansdir.group(1), mscansdir.group(2), dwgfile.dwgpath, path) print(message) DataIssue.objects.create(parser='Tunnel', message=message) if wallet: scansfilel = wallet.singlescan_set.filter(name=mscansdir.group(2)) if len(scansfilel): if len(scansfilel) > 1: message = "! More than one image FILENAME matches filter query. [{}]: {} {} {} {}".format(scansfilel[0], mscansdir.group(1), mscansdir.group(2), dwgfile.dwgpath, path) print(message) DataIssue.objects.create(parser='Tunnel', message=message) scansfile = scansfilel[0] if wallet: dwgfile.manywallets.add(wallet) if scansfile: dwgfile.scans.add(scansfile) elif path and not re.search(r"\.(?:png|jpg|pdf|jpeg|gif)$(?i)", path): name = os.path.split(path)[1] rdwgfilel = DrawingFile.objects.filter(dwgname=name) if len(rdwgfilel): message = "! Two paths with same name [{}]: {}".format(path, name) print(message) DataIssue.objects.create(parser='Tunnel', message=message) rdwgfile = rdwgfilel[0] dwgfile.dwgcontains.add(rdwgfile) dwgfile.save() def findimageinsert(therionfile, xth_me): '''Tries to link the drawing file (Therion format) to the referenced image (scan) file ''' pass def findimportinsert(therionfile, imp): '''Tries to link the scrap (Therion format) to the referenced therion scrap ''' pass rx_xth_me = re.compile(r'xth_me_image_insert.*{.*}$', re.MULTILINE) rx_scrap = re.compile(r'^survey (\w*).*$', re.MULTILINE) rx_input = re.compile(r'^input (\w*).*$', re.MULTILINE) def settherionfileinfo(filetuple): '''Read in the drawing file contents and sets values on the dwgfile object ''' thtype, therionfile = filetuple ff = os.path.join(settings.DRAWINGS_DATA, therionfile.dwgpath) therionfile.filesize = os.stat(ff)[stat.ST_SIZE] if therionfile.filesize <= 0: message = "! Zero length therion file {}".format(ff) print(message) DataIssue.objects.create(parser='Therion', message=message) return fin = open(ff,'r') ttext = fin.read() fin.close() # The equivalent for a tunnel 'path' would be a .th2 'line wall' or 'scrap' # print(len(re.findall(r"line", ttext))) if thtype=='th': therionfile.npaths = len(re.findall(r"^input ", ttext, re.MULTILINE)) elif thtype=='th2': therionfile.npaths = len(re.findall(r"^line ", ttext, re.MULTILINE)) therionfile.save() # scan and look for survex blocks that might have been included, and image scans (as for tunnel drawings) # which would populate dwgfile.survexfile # in .th2 files: # ##XTHERION## xth_me_image_insert {500 1 1.0} {1700 {}} ../../../expofiles/surveyscans/2014/01popped_elev1.jpeg 0 {} # scrap blownout -projection plan -scale [-81.0 -42.0 216.0 -42.0 0.0 0.0 7.5438 0.0 m] for xth_me in rx_xth_me.findall(ttext): message = f'! Un-parsed image filename: {therionfile.dwgname} : {xth_me.split()[-3]} - {therionfile.dwgpath}' #print(message) DataIssue.objects.create(parser='Therion', message=message) findimageinsert(therionfile, xth_me) for inp in rx_input.findall(ttext): # if this 'input' is a .th2 file we have already seen, then we can assign this as a sub-file # but we would need to disentangle to get the current path properly message = f'! Un-set Therion .th2 input: - {therionfile.dwgname} : {inp} - {therionfile.dwgpath}' #print(message) DataIssue.objects.create(parser='Therion', message=message) findimportinsert(therionfile, inp) therionfile.save() rx_skpath = re.compile(rb' # for path, style in rx_pcpath.findall(ttext): find_tunnel_file(dwgfile, path.decode()) # should also scan and look for survex blocks that might have been included, and image scans # which would populate dwgfile.survexfile dwgfile.save() def load_drawings_files(): '''Breadth first search of drawings directory looking for sub-directories and *.xml filesize ''' all_xml = [] drawdatadir = settings.DRAWINGS_DATA DrawingFile.objects.all().delete() DataIssue.objects.filter(parser='Drawings').delete() DataIssue.objects.filter(parser='Therion').delete() DataIssue.objects.filter(parser='Tunnel').delete() drawingsdirs = [ "" ] while drawingsdirs: drawdir = drawingsdirs.pop() for f in os.listdir(os.path.join(drawdatadir, drawdir)): if f[0] == "." or f[-1] == "~": continue lf = os.path.join(drawdir, f) ff = os.path.join(drawdatadir, lf) if os.path.isdir(ff): drawingsdirs.append(lf) # lunatic! adding to list in middle of list while loop! elif f[-4:] == ".xml": # Always creates new dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) dwgfile.save() all_xml.append(('xml',dwgfile)) elif f[-3:] == ".th": # Always creates new dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) dwgfile.save() all_xml.append(('th',dwgfile)) elif f[-4:] == ".th2": # Always creates new dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) dwgfile.save() all_xml.append(('th2',dwgfile)) print(f' - {len(all_xml)} Drawings files found') for d in all_xml: if d[0] == 'xml': setdwgfileinfo(d[1]) # important to import .th2 files before .th so that we can assign them when found in .th files if d[0] == 'th2': settherionfileinfo(d) if d[0] == 'th': settherionfileinfo(d) # for drawfile in DrawingFile.objects.all(): # SetTunnelfileInfo(drawfile)