Make the suryeys importer not explode

2026-02-08 11:28:23 +00:00 · 2019-02-24 14:29:14 +00:00
parent 4ad5b68433
commit f16b4e3f47
3 changed files with 68 additions and 56 deletions
--- a/databaseReset.py
+++ b/databaseReset.py
@@ -22,7 +22,7 @@ def reload_db():
            os.remove(databasename)
        except OSError:
            pass
-    else: 
+    else:
        cursor = connection.cursor()
        cursor.execute("DROP DATABASE %s" % databasename)
        cursor.execute("CREATE DATABASE %s" % databasename)
@@ -115,7 +115,7 @@ def import_auto_logbooks():
                                 "autologbook")       
        for root, dirs, filenames in os.walk(directory):
            for filename in filenames:
-                print os.path.join(root, filename)
+                print(os.path.join(root, filename))
                parsers.logbooks.parseAutoLogBookEntry(os.path.join(root, filename))
 #Temporary function until definative source of data transfered.
@@ -138,7 +138,7 @@ def dumplogbooks():
            filename = os.path.join(directory, 
                                    dateStr + "." + slugify(lbe.title)[:50] + ".html")
            if lbe.cave:
-                print lbe.cave.reference()
+                print(lbe.cave.reference())
                trip = {"title": lbe.title, "html":lbe.text, "cave": lbe.cave.reference(), "caveOrLocation": "cave"}
            else:
                trip = {"title": lbe.title, "html":lbe.text, "location":lbe.place, "caveOrLocation": "location"}
@@ -180,6 +180,7 @@ def usage():
             scans    - read in the scanned surveynotes
             survex   - read in the survex files
             survexpos
             surveys
             tunnel   - read in the Tunnel files
             writeCaves
             """)
@@ -212,7 +213,7 @@ if __name__ == "__main__":
        try:
            import_tunnelfiles()
        except:
-            print "Tunnel files parser broken."
+            print("Tunnel files parser broken.")
        import_surveys()
        import_descriptions()
        parse_descriptions()
@@ -232,6 +233,8 @@ if __name__ == "__main__":
        dumplogbooks()
    elif "writeCaves" in sys.argv:
        writeCaves()
    elif "surveys" in sys.argv:
        import_surveys()
    elif "help" in sys.argv:
        usage()
    else:
--- a/docker/requirements.txt.dj-1.8.19
+++ b/docker/requirements.txt.dj-1.8.19
@@ -1,7 +1,7 @@
 Django==1.8.19
 django-registration==2.1.2
 mysql
-imagekit
+django-imagekit
 Image
 django-tinymce==2.7.0
 smartencoding
--- a/parsers/surveys.py
+++ b/parsers/surveys.py
@@ -39,7 +39,7 @@ def readSurveysFromCSV():
    # test if the expeditions have been added yet
    if Expedition.objects.count()==0:
-        print "There are no expeditions in the database. Please run the logbook parser."
+        print("There are no expeditions in the database. Please run the logbook parser.")
        sys.exit()
@@ -56,7 +56,7 @@ def readSurveysFromCSV():
    for survey in surveyreader:
        #I hate this, but some surveys have a letter eg 2000#34a. The next line deals with that.
        walletNumberLetter = re.match(r'(?P<number>\d*)(?P<letter>[a-zA-Z]*)',survey[header['Survey Number']]) 
-    #    print walletNumberLetter.groups()
+    #    print(walletNumberLetter.groups())
        year=survey[header['Year']]
@@ -89,63 +89,72 @@ def listdir(*directories):
 # add survey scans
 def parseSurveyScans(expedition, logfile=None):
 #    yearFileList = listdir(expedition.year)
-    yearPath=os.path.join(settings.SURVEY_SCANS, "surveyscans", expedition.year)
+    try:
-    yearFileList=os.listdir(yearPath)
+        yearPath=os.path.join(settings.SURVEY_SCANS, "surveyscans", expedition.year)
-    print yearFileList
+        yearFileList=os.listdir(yearPath)
-    for surveyFolder in yearFileList:
+        print(yearFileList)
-        try:
+        for surveyFolder in yearFileList:
            surveyNumber=re.match(r'\d\d\d\d#0*(\d+)',surveyFolder).groups()
 #            scanList = listdir(expedition.year, surveyFolder)
            scanList=os.listdir(os.path.join(yearPath,surveyFolder))
        except AttributeError:
            print surveyFolder + " ignored\r",
            continue
        for scan in scanList:
            try:
-                scanChopped=re.match(r'(?i).*(notes|elev|plan|elevation|extend)(\d*)\.(png|jpg|jpeg)',scan).groups()
+                surveyNumber=re.match(r'\d\d\d\d#0*(\d+)',surveyFolder).groups()
-                scanType,scanNumber,scanFormat=scanChopped
+                #scanList = listdir(expedition.year, surveyFolder)
                scanList=os.listdir(os.path.join(yearPath,surveyFolder))
            except AttributeError:
-                print scan + " ignored\r",
+                print(surveyFolder + " ignored\r",)
                continue
            if scanType == 'elev' or scanType == 'extend':
                scanType = 'elevation'
-            if scanNumber=='':
+            for scan in scanList:
-                scanNumber=1
+                try:
                    scanChopped=re.match(r'(?i).*(notes|elev|plan|elevation|extend)(\d*)\.(png|jpg|jpeg)',scan).groups()
                    scanType,scanNumber,scanFormat=scanChopped
                except AttributeError:
                    print(scan + " ignored\r",)
                    continue
                if scanType == 'elev' or scanType == 'extend':
                    scanType = 'elevation'
-            if type(surveyNumber)==types.TupleType:
+                if scanNumber=='':
-                surveyNumber=surveyNumber[0]
+                    scanNumber=1
-            try:
+
-                placeholder=get_or_create_placeholder(year=int(expedition.year))
+                if type(surveyNumber)==types.TupleType:
-                survey=Survey.objects.get_or_create(wallet_number=surveyNumber, expedition=expedition, defaults={'logbook_entry':placeholder})[0]
+                    surveyNumber=surveyNumber[0]
-            except Survey.MultipleObjectsReturned:
+                try:
-                survey=Survey.objects.filter(wallet_number=surveyNumber, expedition=expedition)[0]
+                    placeholder=get_or_create_placeholder(year=int(expedition.year))
-            file_=os.path.join(yearPath, surveyFolder, scan)
+                    survey=Survey.objects.get_or_create(wallet_number=surveyNumber, expedition=expedition, defaults={'logbook_entry':placeholder})[0]
-            scanObj = ScannedImage(
+                except Survey.MultipleObjectsReturned:
-                file=file_,
+                    survey=Survey.objects.filter(wallet_number=surveyNumber, expedition=expedition)[0]
-                contents=scanType,
+                file_=os.path.join(yearPath, surveyFolder, scan)
-                number_in_wallet=scanNumber,
+                scanObj = ScannedImage(
-                survey=survey,
+                    file=file_,
-                new_since_parsing=False,
+                    contents=scanType,
-                )
+                    number_in_wallet=scanNumber,
-            print "Added scanned image at " + str(scanObj)
+                    survey=survey,
-            #if scanFormat=="png":
+                    new_since_parsing=False,
-                #if isInterlacedPNG(os.path.join(settings.SURVEY_SCANS, "surveyscans", file_)):
+                    )
-                #    print file_+ " is an interlaced PNG. No can do."
+                print("Added scanned image at " + str(scanObj))
-                #continue
+                #if scanFormat=="png":
-            scanObj.save()
+                    #if isInterlacedPNG(os.path.join(settings.SURVEY_SCANS, "surveyscans", file_)):
                    #    print file_+ " is an interlaced PNG. No can do."
                    #continue
                scanObj.save()
    except (IOError, OSError):
        yearPath=os.path.join(settings.SURVEY_SCANS, "surveyscans", expedition.year)
        print("No folder found for " + expedition.year + " at:- " + yearPath)
 # dead
 def parseSurveys(logfile=None):
-    readSurveysFromCSV()
+    try:
        readSurveysFromCSV()
    except (IOError, OSError):
        print("Survey CSV not found..")
        pass
    for expedition in Expedition.objects.filter(year__gte=2000):   #expos since 2000, because paths and filenames were nonstandard before then
        parseSurveyScans(expedition)
 # dead
 def isInterlacedPNG(filePath): #We need to check for interlaced PNGs because the thumbnail engine can't handle them (uses PIL)
    file=Image.open(filePath)
-    print filePath
+    print(filePath)
    if 'interlace' in file.info:
        return file.info['interlace']
    else:
@@ -181,7 +190,7 @@ def LoadListScansFile(survexscansfolder):
    for (fyf, ffyf, fisdiryf) in gld:
        assert not fisdiryf, ffyf
-        if re.search("\.(?:png|jpg|jpeg)(?i)$", fyf):
+        if re.search(r"\.(?:png|jpg|jpeg)(?i)$", fyf):
            survexscansingle = SurvexScanSingle(ffile=ffyf, name=fyf, survexscansfolder=survexscansfolder)
            survexscansingle.save()
@@ -190,7 +199,7 @@ def LoadListScansFile(survexscansfolder):
 # and builds up the models we can access later
 def LoadListScans():
-    print 'Loading Survey Scans...'
+    print('Loading Survey Scans...')
    SurvexScanSingle.objects.all().delete()
    SurvexScansFolder.objects.all().delete()
@@ -208,7 +217,7 @@ def LoadListScans():
            continue
        # do the year folders
-        if re.match("\d\d\d\d$", f):
+        if re.match(r"\d\d\d\d$", f):
            for fy, ffy, fisdiry in GetListDir(ff):
                if fisdiry:
                    assert fisdiry, ffy
@@ -225,7 +234,7 @@ def LoadListScans():
 def FindTunnelScan(tunnelfile, path):
    scansfolder, scansfile = None, None
-    mscansdir = re.search("(\d\d\d\d#\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg))$", path)
+    mscansdir = re.search(r"(\d\d\d\d#\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg))$", path)
    if mscansdir:
        scansfolderl = SurvexScansFolder.objects.filter(walletname=mscansdir.group(1))
        if len(scansfolderl):
@@ -242,9 +251,9 @@ def FindTunnelScan(tunnelfile, path):
        if scansfile:
            tunnelfile.survexscans.add(scansfile)
-    elif path and not re.search("\.(?:png|jpg)$(?i)", path):
+    elif path and not re.search(r"\.(?:png|jpg)$(?i)", path):
        name = os.path.split(path)[1]
-        print "ttt", tunnelfile.tunnelpath, path, name
+        print("ttt", tunnelfile.tunnelpath, path, name)
        rtunnelfilel = TunnelFile.objects.filter(tunnelname=name)
        if len(rtunnelfilel):
            assert len(rtunnelfilel) == 1, ("two paths with name of", path, "need more discrimination coded")