From f16b4e3f47a5a7e3103d9e6278462b1e86e67b30 Mon Sep 17 00:00:00 2001
From: Sam Wenham <sam@wenhams.co.uk>
Date: Sun, 24 Feb 2019 14:29:14 +0000
Subject: [PATCH] Make the suryeys importer not explode

---
 databaseReset.py                  |  11 +--
 docker/requirements.txt.dj-1.8.19 |   2 +-
 parsers/surveys.py                | 111 ++++++++++++++++--------------
 3 files changed, 68 insertions(+), 56 deletions(-)

diff --git a/databaseReset.py b/databaseReset.py
index edc7db9..f17d3e5 100644
--- a/databaseReset.py
+++ b/databaseReset.py
@@ -22,7 +22,7 @@ def reload_db():
             os.remove(databasename)
         except OSError:
             pass
-    else: 
+    else:
         cursor = connection.cursor()
         cursor.execute("DROP DATABASE %s" % databasename)
         cursor.execute("CREATE DATABASE %s" % databasename)
@@ -115,7 +115,7 @@ def import_auto_logbooks():
                                  "autologbook")       
         for root, dirs, filenames in os.walk(directory):
             for filename in filenames:
-                print os.path.join(root, filename)
+                print(os.path.join(root, filename))
                 parsers.logbooks.parseAutoLogBookEntry(os.path.join(root, filename))
 
 #Temporary function until definative source of data transfered.
@@ -138,7 +138,7 @@ def dumplogbooks():
             filename = os.path.join(directory, 
                                     dateStr + "." + slugify(lbe.title)[:50] + ".html")
             if lbe.cave:
-                print lbe.cave.reference()
+                print(lbe.cave.reference())
                 trip = {"title": lbe.title, "html":lbe.text, "cave": lbe.cave.reference(), "caveOrLocation": "cave"}
             else:
                 trip = {"title": lbe.title, "html":lbe.text, "location":lbe.place, "caveOrLocation": "location"}
@@ -180,6 +180,7 @@ def usage():
              scans    - read in the scanned surveynotes
              survex   - read in the survex files
              survexpos
+             surveys
              tunnel   - read in the Tunnel files
              writeCaves
              """)
@@ -212,7 +213,7 @@ if __name__ == "__main__":
         try:
             import_tunnelfiles()
         except:
-            print "Tunnel files parser broken."
+            print("Tunnel files parser broken.")
         import_surveys()
         import_descriptions()
         parse_descriptions()
@@ -232,6 +233,8 @@ if __name__ == "__main__":
         dumplogbooks()
     elif "writeCaves" in sys.argv:
         writeCaves()
+    elif "surveys" in sys.argv:
+        import_surveys()
     elif "help" in sys.argv:
         usage()
     else:
diff --git a/docker/requirements.txt.dj-1.8.19 b/docker/requirements.txt.dj-1.8.19
index 42c3640..6942998 100644
--- a/docker/requirements.txt.dj-1.8.19
+++ b/docker/requirements.txt.dj-1.8.19
@@ -1,7 +1,7 @@
 Django==1.8.19
 django-registration==2.1.2
 mysql
-imagekit
+django-imagekit
 Image
 django-tinymce==2.7.0
 smartencoding
diff --git a/parsers/surveys.py b/parsers/surveys.py
index 2c6f190..02b06f5 100644
--- a/parsers/surveys.py
+++ b/parsers/surveys.py
@@ -39,7 +39,7 @@ def readSurveysFromCSV():
 
     # test if the expeditions have been added yet
     if Expedition.objects.count()==0:
-        print "There are no expeditions in the database. Please run the logbook parser."
+        print("There are no expeditions in the database. Please run the logbook parser.")
         sys.exit()
 
     
@@ -56,7 +56,7 @@ def readSurveysFromCSV():
     for survey in surveyreader:
         #I hate this, but some surveys have a letter eg 2000#34a. The next line deals with that.
         walletNumberLetter = re.match(r'(?P<number>\d*)(?P<letter>[a-zA-Z]*)',survey[header['Survey Number']]) 
-    #    print walletNumberLetter.groups()
+    #    print(walletNumberLetter.groups())
         year=survey[header['Year']]
 
         
@@ -89,63 +89,72 @@ def listdir(*directories):
 # add survey scans
 def parseSurveyScans(expedition, logfile=None):
 #    yearFileList = listdir(expedition.year)
-    yearPath=os.path.join(settings.SURVEY_SCANS, "surveyscans", expedition.year)
-    yearFileList=os.listdir(yearPath)
-    print yearFileList
-    for surveyFolder in yearFileList:
-        try:
-            surveyNumber=re.match(r'\d\d\d\d#0*(\d+)',surveyFolder).groups()
-#            scanList = listdir(expedition.year, surveyFolder)
-            scanList=os.listdir(os.path.join(yearPath,surveyFolder))
-        except AttributeError:
-            print surveyFolder + " ignored\r",
-            continue
-
-        for scan in scanList:
+    try:
+        yearPath=os.path.join(settings.SURVEY_SCANS, "surveyscans", expedition.year)
+        yearFileList=os.listdir(yearPath)
+        print(yearFileList)
+        for surveyFolder in yearFileList:
             try:
-                scanChopped=re.match(r'(?i).*(notes|elev|plan|elevation|extend)(\d*)\.(png|jpg|jpeg)',scan).groups()
-                scanType,scanNumber,scanFormat=scanChopped
+                surveyNumber=re.match(r'\d\d\d\d#0*(\d+)',surveyFolder).groups()
+                #scanList = listdir(expedition.year, surveyFolder)
+                scanList=os.listdir(os.path.join(yearPath,surveyFolder))
             except AttributeError:
-                print scan + " ignored\r",
+                print(surveyFolder + " ignored\r",)
                 continue
-            if scanType == 'elev' or scanType == 'extend':
-                scanType = 'elevation'
 
-            if scanNumber=='':
-                scanNumber=1
+            for scan in scanList:
+                try:
+                    scanChopped=re.match(r'(?i).*(notes|elev|plan|elevation|extend)(\d*)\.(png|jpg|jpeg)',scan).groups()
+                    scanType,scanNumber,scanFormat=scanChopped
+                except AttributeError:
+                    print(scan + " ignored\r",)
+                    continue
+                if scanType == 'elev' or scanType == 'extend':
+                    scanType = 'elevation'
 
-            if type(surveyNumber)==types.TupleType:
-                surveyNumber=surveyNumber[0]
-            try:
-                placeholder=get_or_create_placeholder(year=int(expedition.year))
-                survey=Survey.objects.get_or_create(wallet_number=surveyNumber, expedition=expedition, defaults={'logbook_entry':placeholder})[0]
-            except Survey.MultipleObjectsReturned:
-                survey=Survey.objects.filter(wallet_number=surveyNumber, expedition=expedition)[0]
-            file_=os.path.join(yearPath, surveyFolder, scan)
-            scanObj = ScannedImage(
-                file=file_,
-                contents=scanType,
-                number_in_wallet=scanNumber,
-                survey=survey,
-                new_since_parsing=False,
-                )
-            print "Added scanned image at " + str(scanObj)
-            #if scanFormat=="png":
-                #if isInterlacedPNG(os.path.join(settings.SURVEY_SCANS, "surveyscans", file_)):
-                #    print file_+ " is an interlaced PNG. No can do."
-                #continue
-            scanObj.save()
+                if scanNumber=='':
+                    scanNumber=1
+
+                if type(surveyNumber)==types.TupleType:
+                    surveyNumber=surveyNumber[0]
+                try:
+                    placeholder=get_or_create_placeholder(year=int(expedition.year))
+                    survey=Survey.objects.get_or_create(wallet_number=surveyNumber, expedition=expedition, defaults={'logbook_entry':placeholder})[0]
+                except Survey.MultipleObjectsReturned:
+                    survey=Survey.objects.filter(wallet_number=surveyNumber, expedition=expedition)[0]
+                file_=os.path.join(yearPath, surveyFolder, scan)
+                scanObj = ScannedImage(
+                    file=file_,
+                    contents=scanType,
+                    number_in_wallet=scanNumber,
+                    survey=survey,
+                    new_since_parsing=False,
+                    )
+                print("Added scanned image at " + str(scanObj))
+                #if scanFormat=="png":
+                    #if isInterlacedPNG(os.path.join(settings.SURVEY_SCANS, "surveyscans", file_)):
+                    #    print file_+ " is an interlaced PNG. No can do."
+                    #continue
+                scanObj.save()
+    except (IOError, OSError):
+        yearPath=os.path.join(settings.SURVEY_SCANS, "surveyscans", expedition.year)
+        print("No folder found for " + expedition.year + " at:- " + yearPath)
 
 # dead
 def parseSurveys(logfile=None):
-    readSurveysFromCSV()
+    try:
+        readSurveysFromCSV()
+    except (IOError, OSError):
+        print("Survey CSV not found..")
+        pass
+
     for expedition in Expedition.objects.filter(year__gte=2000):   #expos since 2000, because paths and filenames were nonstandard before then
         parseSurveyScans(expedition)
 
 # dead
 def isInterlacedPNG(filePath): #We need to check for interlaced PNGs because the thumbnail engine can't handle them (uses PIL)
     file=Image.open(filePath)
-    print filePath
+    print(filePath)
     if 'interlace' in file.info:
         return file.info['interlace']
     else:
@@ -181,7 +190,7 @@ def LoadListScansFile(survexscansfolder):
     
     for (fyf, ffyf, fisdiryf) in gld:
         assert not fisdiryf, ffyf
-        if re.search("\.(?:png|jpg|jpeg)(?i)$", fyf):
+        if re.search(r"\.(?:png|jpg|jpeg)(?i)$", fyf):
             survexscansingle = SurvexScanSingle(ffile=ffyf, name=fyf, survexscansfolder=survexscansfolder)
             survexscansingle.save()
 
@@ -190,7 +199,7 @@ def LoadListScansFile(survexscansfolder):
 # and builds up the models we can access later
 def LoadListScans():
 
-    print 'Loading Survey Scans...'
+    print('Loading Survey Scans...')
 
     SurvexScanSingle.objects.all().delete()
     SurvexScansFolder.objects.all().delete()
@@ -208,7 +217,7 @@ def LoadListScans():
             continue
         
         # do the year folders
-        if re.match("\d\d\d\d$", f):
+        if re.match(r"\d\d\d\d$", f):
             for fy, ffy, fisdiry in GetListDir(ff):
                 if fisdiry:
                     assert fisdiry, ffy
@@ -225,7 +234,7 @@ def LoadListScans():
 
 def FindTunnelScan(tunnelfile, path):
     scansfolder, scansfile = None, None
-    mscansdir = re.search("(\d\d\d\d#\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg))$", path)
+    mscansdir = re.search(r"(\d\d\d\d#\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg))$", path)
     if mscansdir:
         scansfolderl = SurvexScansFolder.objects.filter(walletname=mscansdir.group(1))
         if len(scansfolderl):
@@ -242,9 +251,9 @@ def FindTunnelScan(tunnelfile, path):
         if scansfile:
             tunnelfile.survexscans.add(scansfile)
     
-    elif path and not re.search("\.(?:png|jpg)$(?i)", path):
+    elif path and not re.search(r"\.(?:png|jpg)$(?i)", path):
         name = os.path.split(path)[1]
-        print "ttt", tunnelfile.tunnelpath, path, name
+        print("ttt", tunnelfile.tunnelpath, path, name)
         rtunnelfilel = TunnelFile.objects.filter(tunnelname=name)
         if len(rtunnelfilel):
             assert len(rtunnelfilel) == 1, ("two paths with name of", path, "need more discrimination coded")