import sys import os import types import logging import stat import csv import re import datetime from PIL import Image from utils import save_carefully from functools import reduce import settings from troggle.core.models_survex import SingleScan, ScansFolder, TunnelFile from troggle.core.models import DataIssue def get_or_create_placeholder(year): """ All surveys must be related to a logbookentry. We don't have a way to automatically figure out which survey went with which logbookentry, so we create a survey placeholder logbook entry for each year. This function always returns such a placeholder, and creates it if it doesn't exist yet. """ lookupAttribs={'date__year':int(year), 'title':"placeholder for surveys",} nonLookupAttribs={'text':"surveys temporarily attached to this should be re-attached to their actual trips", 'date':datetime.date(int(year),1,1)} placeholder_logbook_entry, newly_created = save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs) return placeholder_logbook_entry def listdir(*directories): try: return os.listdir(os.path.join(settings.SURVEYS, *directories)) except: import urllib.request, urllib.parse, urllib.error url = settings.SURVEYS + reduce(lambda x, y: x + "/" + y, ["listdir"] + list(directories)) folders = urllib.request.urlopen(url.replace("#", "%23")).readlines() return [folder.rstrip(r"/") for folder in folders] # handles url or file, so we can refer to a set of scans on another server def GetListDir(sdir): res = [ ] if sdir[:7] == "http://": assert False, "Not written" s = urllib.request.urlopen(sdir) else: for f in os.listdir(sdir): if f[0] != ".": ff = os.path.join(sdir, f) res.append((f, ff, os.path.isdir(ff))) return res def LoadListScansFile(scansfolder): gld = [ ] # flatten out any directories in these wallet folders - should not be any for (fyf, ffyf, fisdiryf) in GetListDir(scansfolder.fpath): if fisdiryf: gld.extend(GetListDir(ffyf)) else: gld.append((fyf, ffyf, fisdiryf)) c=0 for (fyf, ffyf, fisdiryf) in gld: #assert not fisdiryf, ffyf if re.search(r"\.(?:png|jpg|jpeg|pdf|svg|gif)(?i)$", fyf): singlescan = SingleScan(ffile=ffyf, name=fyf, scansfolder=scansfolder) singlescan.save() c+=1 if c>=10: print(".", end='') c = 0 # this iterates through the scans directories (either here or on the remote server) # and builds up the models we can access later def LoadListScans(): print(' - Loading Survey Scans') SingleScan.objects.all().delete() ScansFolder.objects.all().delete() print(' - deleting all scansFolder and scansSingle objects') # first do the smkhs (large kh survey scans) directory manyscansfoldersmkhs = ScansFolder(fpath=os.path.join(settings.SURVEY_SCANS, "../surveys/smkhs"), walletname="smkhs") print("smkhs", end=' ') if os.path.isdir(manyscansfoldersmkhs.fpath): manyscansfoldersmkhs.save() LoadListScansFile(manyscansfoldersmkhs) # iterate into the surveyscans directory print(' - ', end=' ') for f, ff, fisdir in GetListDir(settings.SURVEY_SCANS): if not fisdir: continue # do the year folders if re.match(r"\d\d\d\d$", f): print("%s" % f, end=' ') for fy, ffy, fisdiry in GetListDir(ff): if fisdiry: assert fisdiry, ffy scansfolder = ScansFolder(fpath=ffy, walletname=fy) scansfolder.save() LoadListScansFile(scansfolder) # do the elif f != "thumbs": scansfolder = ScansFolder(fpath=ff, walletname=f) scansfolder.save() LoadListScansFile(scansfolder) def find_tunnel_scan(tunnelfile, path): '''Is given a line of text 'path' which may or may not contain a recognisable name of a scanned file which we have already seen when we imported all the files we could find in teh surveyscans direstories ''' scansfolder, scansfile = None, None mscansdir = re.search(r"(\d\d\d\d#X?\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg))$", path) if mscansdir: scansfolderl = ScansFolder.objects.filter(walletname=mscansdir.group(1)) if len(scansfolderl): assert len(scansfolderl) == 1 scansfolder = scansfolderl[0] if scansfolder: scansfilel = scansfolder.singlescan_set.filter(name=mscansdir.group(2)) if len(scansfilel): if len(scansfilel) > 1: message = "! More than one image filename matches filter query. [{}]: {} {} {} {}".format(scansfilel[0], mscansdir.group(1), mscansdir.group(2), tunnelfile.tunnelpath, path) print(message) DataIssue.objects.create(parser='Tunnel', message=message) scansfile = scansfilel[0] if scansfolder: tunnelfile.manyscansfolders.add(scansfolder) if scansfile: tunnelfile.scans.add(scansfile) elif path and not re.search(r"\.(?:png|jpg|pdf|jpeg|gif|pdf)$(?i)", path): name = os.path.split(path)[1] rtunnelfilel = TunnelFile.objects.filter(tunnelname=name) if len(rtunnelfilel): message = "! Two paths with same name [{}]: {}".format(path, name) print(message) DataIssue.objects.create(parser='Tunnel', message=message) rtunnelfile = rtunnelfilel[0] tunnelfile.tunnelcontains.add(rtunnelfile) tunnelfile.save() def findimageinsert(therionfile, xth_me): '''Tries to link the drawing file (Therion format) to the referenced image (scan) file ''' pass def findimportinsert(therionfile, imp): '''Tries to link the scrap (Therion format) to the referenced therion scrap ''' pass rx_xth_me = re.compile(r'xth_me_image_insert.*{.*}$', re.MULTILINE) rx_scrap = re.compile(r'^survey (\w*).*$', re.MULTILINE) rx_input = re.compile(r'^input (\w*).*$', re.MULTILINE) def settherionfileinfo(filetuple): '''Read in the drawing file contents and sets values on the tunnelfile object ''' thtype, therionfile = filetuple ff = os.path.join(settings.TUNNEL_DATA, therionfile.tunnelpath) therionfile.filesize = os.stat(ff)[stat.ST_SIZE] if therionfile.filesize <= 0: message = "! Zero length therion file {}".format(ff) print(message) DataIssue.objects.create(parser='Therion', message=message) return fin = open(ff,'r') ttext = fin.read() fin.close() # The equivalent for a tunnel 'path' would be a .th2 'line wall' or 'scrap' # print(len(re.findall(r"line", ttext))) if thtype=='th': therionfile.npaths = len(re.findall(r"^input ", ttext, re.MULTILINE)) elif thtype=='th2': therionfile.npaths = len(re.findall(r"^line ", ttext, re.MULTILINE)) therionfile.save() # scan and look for survex blocks that might have been included, and image scans (as for tunnel drawings) # which would populate tunnelfile.survexfile # in .th2 files: # ##XTHERION## xth_me_image_insert {500 1 1.0} {1700 {}} ../../../expofiles/surveyscans/2014/01popped_elev1.jpeg 0 {} # scrap blownout -projection plan -scale [-81.0 -42.0 216.0 -42.0 0.0 0.0 7.5438 0.0 m] for xth_me in rx_xth_me.findall(ttext): message = f'! Un-parsed image filename: {therionfile.tunnelname} : {xth_me.split()[-3]} - {therionfile.tunnelpath}' #print(message) DataIssue.objects.create(parser='Therion', message=message) findimageinsert(therionfile, xth_me) for inp in rx_input.findall(ttext): # if this 'input' is a .th2 file we have already seen, then we can assign this as a sub-file # but we would need to disentangle to get the current path properly message = f'! Un-set Therion .th2 input: - {therionfile.tunnelname} : {inp} - {therionfile.tunnelpath}' #print(message) DataIssue.objects.create(parser='Therion', message=message) findimportinsert(therionfile, inp) therionfile.save() rx_skpath = re.compile(rb' # for path, style in rx_pcpath.findall(ttext): find_tunnel_scan(tunnelfile, path.decode()) # should also scan and look for survex blocks that might have been included, and image scans # which would populate tunnelfile.survexfile tunnelfile.save() def load_drawings_files(): '''Breadth first search of drawings directory looking for sub-directories and *.xml filesize ''' all_xml = [] drawdatadir = settings.TUNNEL_DATA TunnelFile.objects.all().delete() DataIssue.objects.filter(parser='Drawings').delete() DataIssue.objects.filter(parser='Therion').delete() DataIssue.objects.filter(parser='Tunnel').delete() drawingsdirs = [ "" ] while drawingsdirs: drawdir = drawingsdirs.pop() for f in os.listdir(os.path.join(drawdatadir, drawdir)): if f[0] == "." or f[-1] == "~": continue lf = os.path.join(drawdir, f) ff = os.path.join(drawdatadir, lf) if os.path.isdir(ff): drawingsdirs.append(lf) # lunatic! adding to list in middle of list while loop! elif f[-4:] == ".xml": # Always creates new tunnelfile = TunnelFile(tunnelpath=lf, tunnelname=os.path.split(f[:-4])[1]) tunnelfile.save() all_xml.append(('xml',tunnelfile)) elif f[-3:] == ".th": # Always creates new tunnelfile = TunnelFile(tunnelpath=lf, tunnelname=os.path.split(f[:-4])[1]) tunnelfile.save() all_xml.append(('th',tunnelfile)) elif f[-4:] == ".th2": # Always creates new tunnelfile = TunnelFile(tunnelpath=lf, tunnelname=os.path.split(f[:-4])[1]) tunnelfile.save() all_xml.append(('th2',tunnelfile)) print(f' - {len(all_xml)} Drawings files found') for d in all_xml: if d[0] == 'xml': settunnelfileinfo(d[1]) # important to import .th2 files before .th so that we can assign them when found in .th files if d[0] == 'th2': settherionfileinfo(d) if d[0] == 'th': settherionfileinfo(d) # for drawfile in TunnelFile.objects.all(): # SetTunnelfileInfo(drawfile)