From f16b4e3f47a5a7e3103d9e6278462b1e86e67b30 Mon Sep 17 00:00:00 2001 From: Sam Wenham Date: Sun, 24 Feb 2019 14:29:14 +0000 Subject: [PATCH] Make the suryeys importer not explode --- databaseReset.py | 11 +-- docker/requirements.txt.dj-1.8.19 | 2 +- parsers/surveys.py | 111 ++++++++++++++++-------------- 3 files changed, 68 insertions(+), 56 deletions(-) diff --git a/databaseReset.py b/databaseReset.py index edc7db9..f17d3e5 100644 --- a/databaseReset.py +++ b/databaseReset.py @@ -22,7 +22,7 @@ def reload_db(): os.remove(databasename) except OSError: pass - else: + else: cursor = connection.cursor() cursor.execute("DROP DATABASE %s" % databasename) cursor.execute("CREATE DATABASE %s" % databasename) @@ -115,7 +115,7 @@ def import_auto_logbooks(): "autologbook") for root, dirs, filenames in os.walk(directory): for filename in filenames: - print os.path.join(root, filename) + print(os.path.join(root, filename)) parsers.logbooks.parseAutoLogBookEntry(os.path.join(root, filename)) #Temporary function until definative source of data transfered. @@ -138,7 +138,7 @@ def dumplogbooks(): filename = os.path.join(directory, dateStr + "." + slugify(lbe.title)[:50] + ".html") if lbe.cave: - print lbe.cave.reference() + print(lbe.cave.reference()) trip = {"title": lbe.title, "html":lbe.text, "cave": lbe.cave.reference(), "caveOrLocation": "cave"} else: trip = {"title": lbe.title, "html":lbe.text, "location":lbe.place, "caveOrLocation": "location"} @@ -180,6 +180,7 @@ def usage(): scans - read in the scanned surveynotes survex - read in the survex files survexpos + surveys tunnel - read in the Tunnel files writeCaves """) @@ -212,7 +213,7 @@ if __name__ == "__main__": try: import_tunnelfiles() except: - print "Tunnel files parser broken." + print("Tunnel files parser broken.") import_surveys() import_descriptions() parse_descriptions() @@ -232,6 +233,8 @@ if __name__ == "__main__": dumplogbooks() elif "writeCaves" in sys.argv: writeCaves() + elif "surveys" in sys.argv: + import_surveys() elif "help" in sys.argv: usage() else: diff --git a/docker/requirements.txt.dj-1.8.19 b/docker/requirements.txt.dj-1.8.19 index 42c3640..6942998 100644 --- a/docker/requirements.txt.dj-1.8.19 +++ b/docker/requirements.txt.dj-1.8.19 @@ -1,7 +1,7 @@ Django==1.8.19 django-registration==2.1.2 mysql -imagekit +django-imagekit Image django-tinymce==2.7.0 smartencoding diff --git a/parsers/surveys.py b/parsers/surveys.py index 2c6f190..02b06f5 100644 --- a/parsers/surveys.py +++ b/parsers/surveys.py @@ -39,7 +39,7 @@ def readSurveysFromCSV(): # test if the expeditions have been added yet if Expedition.objects.count()==0: - print "There are no expeditions in the database. Please run the logbook parser." + print("There are no expeditions in the database. Please run the logbook parser.") sys.exit() @@ -56,7 +56,7 @@ def readSurveysFromCSV(): for survey in surveyreader: #I hate this, but some surveys have a letter eg 2000#34a. The next line deals with that. walletNumberLetter = re.match(r'(?P\d*)(?P[a-zA-Z]*)',survey[header['Survey Number']]) - # print walletNumberLetter.groups() + # print(walletNumberLetter.groups()) year=survey[header['Year']] @@ -89,63 +89,72 @@ def listdir(*directories): # add survey scans def parseSurveyScans(expedition, logfile=None): # yearFileList = listdir(expedition.year) - yearPath=os.path.join(settings.SURVEY_SCANS, "surveyscans", expedition.year) - yearFileList=os.listdir(yearPath) - print yearFileList - for surveyFolder in yearFileList: - try: - surveyNumber=re.match(r'\d\d\d\d#0*(\d+)',surveyFolder).groups() -# scanList = listdir(expedition.year, surveyFolder) - scanList=os.listdir(os.path.join(yearPath,surveyFolder)) - except AttributeError: - print surveyFolder + " ignored\r", - continue - - for scan in scanList: + try: + yearPath=os.path.join(settings.SURVEY_SCANS, "surveyscans", expedition.year) + yearFileList=os.listdir(yearPath) + print(yearFileList) + for surveyFolder in yearFileList: try: - scanChopped=re.match(r'(?i).*(notes|elev|plan|elevation|extend)(\d*)\.(png|jpg|jpeg)',scan).groups() - scanType,scanNumber,scanFormat=scanChopped + surveyNumber=re.match(r'\d\d\d\d#0*(\d+)',surveyFolder).groups() + #scanList = listdir(expedition.year, surveyFolder) + scanList=os.listdir(os.path.join(yearPath,surveyFolder)) except AttributeError: - print scan + " ignored\r", + print(surveyFolder + " ignored\r",) continue - if scanType == 'elev' or scanType == 'extend': - scanType = 'elevation' - if scanNumber=='': - scanNumber=1 + for scan in scanList: + try: + scanChopped=re.match(r'(?i).*(notes|elev|plan|elevation|extend)(\d*)\.(png|jpg|jpeg)',scan).groups() + scanType,scanNumber,scanFormat=scanChopped + except AttributeError: + print(scan + " ignored\r",) + continue + if scanType == 'elev' or scanType == 'extend': + scanType = 'elevation' - if type(surveyNumber)==types.TupleType: - surveyNumber=surveyNumber[0] - try: - placeholder=get_or_create_placeholder(year=int(expedition.year)) - survey=Survey.objects.get_or_create(wallet_number=surveyNumber, expedition=expedition, defaults={'logbook_entry':placeholder})[0] - except Survey.MultipleObjectsReturned: - survey=Survey.objects.filter(wallet_number=surveyNumber, expedition=expedition)[0] - file_=os.path.join(yearPath, surveyFolder, scan) - scanObj = ScannedImage( - file=file_, - contents=scanType, - number_in_wallet=scanNumber, - survey=survey, - new_since_parsing=False, - ) - print "Added scanned image at " + str(scanObj) - #if scanFormat=="png": - #if isInterlacedPNG(os.path.join(settings.SURVEY_SCANS, "surveyscans", file_)): - # print file_+ " is an interlaced PNG. No can do." - #continue - scanObj.save() + if scanNumber=='': + scanNumber=1 + + if type(surveyNumber)==types.TupleType: + surveyNumber=surveyNumber[0] + try: + placeholder=get_or_create_placeholder(year=int(expedition.year)) + survey=Survey.objects.get_or_create(wallet_number=surveyNumber, expedition=expedition, defaults={'logbook_entry':placeholder})[0] + except Survey.MultipleObjectsReturned: + survey=Survey.objects.filter(wallet_number=surveyNumber, expedition=expedition)[0] + file_=os.path.join(yearPath, surveyFolder, scan) + scanObj = ScannedImage( + file=file_, + contents=scanType, + number_in_wallet=scanNumber, + survey=survey, + new_since_parsing=False, + ) + print("Added scanned image at " + str(scanObj)) + #if scanFormat=="png": + #if isInterlacedPNG(os.path.join(settings.SURVEY_SCANS, "surveyscans", file_)): + # print file_+ " is an interlaced PNG. No can do." + #continue + scanObj.save() + except (IOError, OSError): + yearPath=os.path.join(settings.SURVEY_SCANS, "surveyscans", expedition.year) + print("No folder found for " + expedition.year + " at:- " + yearPath) # dead def parseSurveys(logfile=None): - readSurveysFromCSV() + try: + readSurveysFromCSV() + except (IOError, OSError): + print("Survey CSV not found..") + pass + for expedition in Expedition.objects.filter(year__gte=2000): #expos since 2000, because paths and filenames were nonstandard before then parseSurveyScans(expedition) # dead def isInterlacedPNG(filePath): #We need to check for interlaced PNGs because the thumbnail engine can't handle them (uses PIL) file=Image.open(filePath) - print filePath + print(filePath) if 'interlace' in file.info: return file.info['interlace'] else: @@ -181,7 +190,7 @@ def LoadListScansFile(survexscansfolder): for (fyf, ffyf, fisdiryf) in gld: assert not fisdiryf, ffyf - if re.search("\.(?:png|jpg|jpeg)(?i)$", fyf): + if re.search(r"\.(?:png|jpg|jpeg)(?i)$", fyf): survexscansingle = SurvexScanSingle(ffile=ffyf, name=fyf, survexscansfolder=survexscansfolder) survexscansingle.save() @@ -190,7 +199,7 @@ def LoadListScansFile(survexscansfolder): # and builds up the models we can access later def LoadListScans(): - print 'Loading Survey Scans...' + print('Loading Survey Scans...') SurvexScanSingle.objects.all().delete() SurvexScansFolder.objects.all().delete() @@ -208,7 +217,7 @@ def LoadListScans(): continue # do the year folders - if re.match("\d\d\d\d$", f): + if re.match(r"\d\d\d\d$", f): for fy, ffy, fisdiry in GetListDir(ff): if fisdiry: assert fisdiry, ffy @@ -225,7 +234,7 @@ def LoadListScans(): def FindTunnelScan(tunnelfile, path): scansfolder, scansfile = None, None - mscansdir = re.search("(\d\d\d\d#\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg))$", path) + mscansdir = re.search(r"(\d\d\d\d#\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg))$", path) if mscansdir: scansfolderl = SurvexScansFolder.objects.filter(walletname=mscansdir.group(1)) if len(scansfolderl): @@ -242,9 +251,9 @@ def FindTunnelScan(tunnelfile, path): if scansfile: tunnelfile.survexscans.add(scansfile) - elif path and not re.search("\.(?:png|jpg)$(?i)", path): + elif path and not re.search(r"\.(?:png|jpg)$(?i)", path): name = os.path.split(path)[1] - print "ttt", tunnelfile.tunnelpath, path, name + print("ttt", tunnelfile.tunnelpath, path, name) rtunnelfilel = TunnelFile.objects.filter(tunnelname=name) if len(rtunnelfilel): assert len(rtunnelfilel) == 1, ("two paths with name of", path, "need more discrimination coded")