from __future__ import (absolute_import, division, print_function, unicode_literals) import sys import os import types import logging import stat import csv import re import datetime from PIL import Image from utils import save_carefully from functools import reduce import settings from troggle.core.models import * def get_or_create_placeholder(year): """ All surveys must be related to a logbookentry. We don't have a way to automatically figure out which survey went with which logbookentry, so we create a survey placeholder logbook entry for each year. This function always returns such a placeholder, and creates it if it doesn't exist yet. """ lookupAttribs={'date__year':int(year), 'title':"placeholder for surveys",} nonLookupAttribs={'text':"surveys temporarily attached to this should be re-attached to their actual trips", 'date':datetime.date(int(year),1,1)} placeholder_logbook_entry, newly_created = save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs) return placeholder_logbook_entry # obsolete surveys.csv does not exist. # def readSurveysFromCSV(): # try: # could probably combine these two # surveytab = open(os.path.join(settings.SURVEY_SCANS, "Surveys.csv")) # except IOError: # import io, urllib.request, urllib.parse, urllib.error # surveytab = io.StringIO(urllib.request.urlopen(settings.SURVEY_SCANS + "/Surveys.csv").read()) # dialect=csv.Sniffer().sniff(surveytab.read()) # surveytab.seek(0,0) # surveyreader = csv.reader(surveytab,dialect=dialect) # headers = next(surveyreader) # header = dict(list(zip(headers, list(range(len(headers)))))) #set up a dictionary where the indexes are header names and the values are column numbers # # test if the expeditions have been added yet # if Expedition.objects.count()==0: # print("There are no expeditions in the database. Please run the logbook parser.") # sys.exit() # logging.info("Deleting all scanned images") # ScannedImage.objects.all().delete() # logging.info("Deleting all survey objects") # Survey.objects.all().delete() # logging.info("Beginning to import surveys from "+str(os.path.join(settings.SURVEYS, "Surveys.csv"))+"\n"+"-"*60+"\n") # for survey in surveyreader: # #I hate this, but some surveys have a letter eg 2000#34a. The next line deals with that. # walletNumberLetter = re.match(r'(?P\d*)(?P[a-zA-Z]*)',survey[header['Survey Number']]) # # print(walletNumberLetter.groups()) # year=survey[header['Year']] # surveyobj = Survey( # expedition = Expedition.objects.filter(year=year)[0], # wallet_number = walletNumberLetter.group('number'), # logbook_entry = get_or_create_placeholder(year), # comments = survey[header['Comments']], # location = survey[header['Location']] # ) # surveyobj.wallet_letter = walletNumberLetter.group('letter') # if survey[header['Finished']]=='Yes': # #try and find the sketch_scan # pass # surveyobj.save() # logging.info("added survey " + survey[header['Year']] + "#" + surveyobj.wallet_number + "\r") # dead def listdir(*directories): try: return os.listdir(os.path.join(settings.SURVEYS, *directories)) except: import urllib.request, urllib.parse, urllib.error url = settings.SURVEYS + reduce(lambda x, y: x + "/" + y, ["listdir"] + list(directories)) folders = urllib.request.urlopen(url.replace("#", "%23")).readlines() return [folder.rstrip(r"/") for folder in folders] # add survey scans # def parseSurveyScans(expedition, logfile=None): # # yearFileList = listdir(expedition.year) # try: # yearPath=os.path.join(settings.SURVEY_SCANS, "surveyscans", expedition.year) # yearFileList=os.listdir(yearPath) # print(yearFileList) # for surveyFolder in yearFileList: # try: # surveyNumber=re.match(rb'\d\d\d\d#(X?)0*(\d+)',surveyFolder).groups() # #scanList = listdir(expedition.year, surveyFolder) # scanList=os.listdir(os.path.join(yearPath,surveyFolder)) # except AttributeError: # print(("Ignoring file in year folder: " + surveyFolder + "\r")) # continue # for scan in scanList: # # Why does this insist on renaming all the scanned image files? # # It produces duplicates names and all images have type .jpg in the scanObj. # # It seems to rely on end users being particularly diligent in filenames which is NGtH # try: # #scanChopped=re.match(rb'(?i).*(notes|elev|plan|extend|elevation)-?(\d*)\.(png|jpg|jpeg|pdf)',scan).groups() # scanChopped=re.match(rb'(?i)([a-z_-]*\d?[a-z_-]*)(\d*)\.(png|jpg|jpeg|pdf|top|dxf|svg|tdr|th2|xml|txt)',scan).groups() # scanType,scanNumber,scanFormat=scanChopped # except AttributeError: # print(("Ignored (bad name format): " + surveyFolder + '/' + scan + "\r")) # continue # scanTest = scanType # scanType = 'notes' # match = re.search(rb'(?i)(elev|extend)',scanTest) # if match: # scanType = 'elevation' # match = re.search(rb'(?i)(plan)',scanTest) # if match: # scanType = 'plan' # if scanNumber=='': # scanNumber=1 # if isinstance(surveyNumber, tuple): # surveyLetter=surveyNumber[0] # surveyNumber=surveyNumber[1] # try: # placeholder=get_or_create_placeholder(year=int(expedition.year)) # survey=Survey.objects.get_or_create(wallet_number=surveyNumber, wallet_letter=surveyLetter, expedition=expedition, defaults={'logbook_entry':placeholder})[0] # except Survey.MultipleObjectsReturned: # survey=Survey.objects.filter(wallet_number=surveyNumber, wallet_letter=surveyLetter, expedition=expedition)[0] # file_=os.path.join(yearPath, surveyFolder, scan) # scanObj = ScannedImage( # file=file_, # contents=scanType, # number_in_wallet=scanNumber, # survey=survey, # new_since_parsing=False, # ) # print(("Added scanned image at " + str(scanObj))) # #if scanFormat=="png": # #if isInterlacedPNG(os.path.join(settings.SURVEY_SCANS, "surveyscans", file_)): # # print file_+ " is an interlaced PNG. No can do." # #continue # scanObj.save() # except (IOError, OSError): # yearPath=os.path.join(settings.SURVEY_SCANS, "surveyscans", expedition.year) # print((" ! No folder found for " + expedition.year + " at:- " + yearPath)) # dead # def parseSurveys(logfile=None): # try: # readSurveysFromCSV() # except (IOError, OSError): # print(" ! Survey CSV not found..") # pass # print(" - Loading scans by expedition year") # for expedition in Expedition.objects.filter(year__gte=2000): #expos since 2000, because paths and filenames were nonstandard before then # print("%s" % expedition, end=' ') # parseSurveyScans(expedition) # dead # def isInterlacedPNG(filePath): #We need to check for interlaced PNGs because the thumbnail engine can't handle them (uses PIL) # file=Image.open(filePath) # print(filePath) # if 'interlace' in file.info: # return file.info['interlace'] # else: # return False # handles url or file, so we can refer to a set of scans on another server def GetListDir(sdir): res = [ ] if sdir[:7] == "http://": assert False, "Not written" s = urllib.request.urlopen(sdir) else: for f in os.listdir(sdir): if f[0] != ".": ff = os.path.join(sdir, f) res.append((f, ff, os.path.isdir(ff))) return res def LoadListScansFile(survexscansfolder): gld = [ ] # flatten out any directories in these wallet folders - should not be any for (fyf, ffyf, fisdiryf) in GetListDir(survexscansfolder.fpath): if fisdiryf: gld.extend(GetListDir(ffyf)) else: gld.append((fyf, ffyf, fisdiryf)) for (fyf, ffyf, fisdiryf) in gld: #assert not fisdiryf, ffyf if re.search(r"\.(?:png|jpg|jpeg|pdf|jpeg|svg)(?i)$", fyf): survexscansingle = SurvexScanSingle(ffile=ffyf, name=fyf, survexscansfolder=survexscansfolder) survexscansingle.save() # this iterates through the scans directories (either here or on the remote server) # and builds up the models we can access later def LoadListScans(): print(' - Loading Survey Scans... (deleting all objects first)') SurvexScanSingle.objects.all().delete() SurvexScansFolder.objects.all().delete() # first do the smkhs (large kh survey scans) directory survexscansfoldersmkhs = SurvexScansFolder(fpath=os.path.join(settings.SURVEY_SCANS, "smkhs"), walletname="smkhs") if os.path.isdir(survexscansfoldersmkhs.fpath): survexscansfoldersmkhs.save() LoadListScansFile(survexscansfoldersmkhs) # iterate into the surveyscans directory print(' - ', end=' ') for f, ff, fisdir in GetListDir(os.path.join(settings.SURVEY_SCANS, "surveyscans")): if not fisdir: continue # do the year folders if re.match(r"\d\d\d\d$", f): print("%s" % f, end=' ') for fy, ffy, fisdiry in GetListDir(ff): if fisdiry: assert fisdiry, ffy survexscansfolder = SurvexScansFolder(fpath=ffy, walletname=fy) survexscansfolder.save() LoadListScansFile(survexscansfolder) # do the elif f != "thumbs": survexscansfolder = SurvexScansFolder(fpath=ff, walletname=f) survexscansfolder.save() LoadListScansFile(survexscansfolder) def FindTunnelScan(tunnelfile, path): scansfolder, scansfile = None, None mscansdir = re.search(rb"(\d\d\d\d#X?\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg))$", path) if mscansdir: scansfolderl = SurvexScansFolder.objects.filter(walletname=mscansdir.group(1)) if len(scansfolderl): assert len(scansfolderl) == 1 scansfolder = scansfolderl[0] if scansfolder: scansfilel = scansfolder.survexscansingle_set.filter(name=mscansdir.group(2)) if len(scansfilel): if len(scansfilel) > 1: print("BORK more than one image filename matches filter query. ", scansfilel[0]) print("BORK ", tunnelfile.tunnelpath, path) print("BORK ", mscansdir.group(1), mscansdir.group(2), len(scansfilel)) #assert len(scansfilel) == 1 scansfile = scansfilel[0] if scansfolder: tunnelfile.survexscansfolders.add(scansfolder) if scansfile: tunnelfile.survexscans.add(scansfile) elif path and not re.search(rb"\.(?:png|jpg|pdf|jpeg)$(?i)", path): name = os.path.split(path)[1] #print("debug-tunnelfileobjects ", tunnelfile.tunnelpath, path, name) rtunnelfilel = TunnelFile.objects.filter(tunnelname=name) if len(rtunnelfilel): assert len(rtunnelfilel) == 1, ("two paths with name of", path, "need more discrimination coded") rtunnelfile = rtunnelfilel[0] #print "ttt", tunnelfile.tunnelpath, path, name, rtunnelfile.tunnelpath tunnelfile.tunnelcontains.add(rtunnelfile) tunnelfile.save() def SetTunnelfileInfo(tunnelfile): ff = os.path.join(settings.TUNNEL_DATA, tunnelfile.tunnelpath) tunnelfile.filesize = os.stat(ff)[stat.ST_SIZE] fin = open(ff,'rb') ttext = fin.read() fin.close() if tunnelfile.filesize <= 0: print("DEBUG - zero length xml file", ff) return mtype = re.search(rb"<(fontcolours|sketch)", ttext) assert mtype, ff tunnelfile.bfontcolours = (mtype.group(1)=="fontcolours") tunnelfile.npaths = len(re.findall(rb" # for path, style in re.findall(rb'