Merge lots of troggle fixes

2019-04-02 00:57:54 +01:00
parent de7d68b1eb bb8dbb381f
commit c4301cf6df
24 changed files with 239 additions and 255 deletions
--- a/parsers/QMs.py
+++ b/parsers/QMs.py
@@ -17,19 +17,19 @@ def parseCaveQMs(cave,inputFile):
        try:
            steinBr=Cave.objects.get(official_name="Steinbr&uuml;ckenh&ouml;hle")
        except Cave.DoesNotExist:
-            print "Steinbruckenhoehle is not in the database. Please run parsers.cavetab first."
+            print("Steinbruckenhoehle is not in the database. Please run parsers.cavetab first.")
            return
    elif cave=='hauch':
        try:
            hauchHl=Cave.objects.get(official_name="Hauchh&ouml;hle")
        except Cave.DoesNotExist:
-            print "Hauchhoele is not in the database. Please run parsers.cavetab first."
+            print("Hauchhoele is not in the database. Please run parsers.cavetab first.")
            return
    elif cave =='kh':
        try:
            kh=Cave.objects.get(official_name="Kaninchenh&ouml;hle")
        except Cave.DoesNotExist:
-            print "KH is not in the database. Please run parsers.cavetab first."
+            print("KH is not in the database. Please run parsers.cavetab first.")
        parse_KH_QMs(kh, inputFile=inputFile) 
        return

@@ -48,7 +48,7 @@ def parseCaveQMs(cave,inputFile):
            elif cave=='hauch':
                placeholder, hadToCreate = LogbookEntry.objects.get_or_create(date__year=year, title="placeholder for QMs in 234", text="QMs temporarily attached to this should be re-attached to their actual trips", defaults={"date": date(year, 1, 1),"cave":hauchHl})            
            if hadToCreate:
-                print cave+" placeholder logbook entry for " + str(year) + " added to database"
+                print(cave + " placeholder logbook entry for " + str(year) + " added to database")
            QMnum=re.match(r".*?-\d*?-X?(?P<numb>\d*)",line[0]).group("numb")
            newQM = QM()
            newQM.found_by=placeholder
@@ -71,19 +71,18 @@ def parseCaveQMs(cave,inputFile):
                if preexistingQM.new_since_parsing==False:  #if the pre-existing QM has not been modified, overwrite it
                    preexistingQM.delete()
                    newQM.save()
-                    print "overwriting " + str(preexistingQM) +"\r",
-                
+                    print("overwriting " + str(preexistingQM) +"\r")
                else:  # otherwise, print that it was ignored
-                    print "preserving "+ str(preexistingQM) + ", which was edited in admin \r",
+                    print("preserving " + str(preexistingQM) + ", which was edited in admin \r")
                    
            except QM.DoesNotExist:         #if there is no pre-existing QM, save the new one
                newQM.save() 
-                print "QM "+str(newQM) + ' added to database\r',
+                print("QM "+str(newQM) + ' added to database\r')
                
        except KeyError: #check on this one
            continue
        except IndexError:
-            print "Index error in " + str(line)
+            print("Index error in " + str(line))
            continue

 def parse_KH_QMs(kh, inputFile):
@@ -104,7 +103,7 @@ def parse_KH_QMs(kh, inputFile):
                }
            nonLookupArgs={
                'grade':res['grade'],
-                'nearest_station':res['nearest_station'],
+                'nearest_station_name':res['nearest_station'],
                'location_description':res['description']
                }
 
@@ -115,3 +114,4 @@ parseCaveQMs(cave='stein',inputFile=r"1623/204/qm.csv")
 parseCaveQMs(cave='hauch',inputFile=r"1623/234/qm.csv")
 parseCaveQMs(cave='kh', inputFile="1623/161/qmtodo.htm")
 #parseCaveQMs(cave='balkonhoehle',inputFile=r"1623/264/qm.csv")
+
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -45,7 +45,7 @@ def GetTripPersons(trippeople, expedition, logtime_underground):
        author = res[-1][0]
    return res, author

-def GetTripCave(place):                     #need to be fuzzier about matching here. Already a very slow function...
+def GetTripCave(place):  #need to be fuzzier about matching here. Already a very slow function...
 #    print "Getting cave for " , place
    try:
        katastNumRes=[]
@@ -74,23 +74,23 @@ def GetTripCave(place):                     #need to be fuzzier about matching h


 noncaveplaces = [ "Journey", "Loser Plateau" ]
-def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground):
+def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, entry_type="wiki"):
    """ saves a logbook entry and related persontrips """
    trippersons, author = GetTripPersons(trippeople, expedition, logtime_underground)
    if not author:
-        print("   - skipping logentry" + title + " no author for entry")
+        print("   - Skipping logentry: " + title + " no author for entry")
        return
-    
-#    tripCave = GetTripCave(place)
-    #
+
+    #tripCave = GetTripCave(place)
+
    lplace = place.lower()
    if lplace not in noncaveplaces:
        cave=GetCaveLookup().get(lplace)

    #Check for an existing copy of the current entry, and save
    expeditionday = expedition.get_expedition_day(date)
-    lookupAttribs={'date':date, 'title':title} 
-    nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave':cave, 'slug':slugify(title)[:50]}
+    lookupAttribs={'date':date, 'title':title}
+    nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave':cave, 'slug':slugify(title)[:50], 'entry_type':entry_type}
    lbo, created=save_carefully(models.LogbookEntry, lookupAttribs, nonLookupAttribs)
    
    for tripperson, time_underground in trippersons:
@@ -115,7 +115,7 @@ def ParseDate(tripdate, year):
        assert False, tripdate
    return datetime.date(year, month, day)

-# 2007, 2008, 2006
+# 2006, 2008 - 2010
 def Parselogwikitxt(year, expedition, txt):
    trippara = re.findall(r"===(.*?)===([\s\S]*?)(?====)", txt)
    for triphead, triptext in trippara:
@@ -140,9 +140,9 @@ def Parselogwikitxt(year, expedition, txt):
        #print "\n", tripcave, "---   ppp", trippeople, len(triptext)
        EnterLogIntoDbase(date = ldate, place = tripcave, title = tripplace, text = triptext, trippeople=trippeople, expedition=expedition, logtime_underground=0)

-# 2002, 2004, 2005
+# 2002, 2004, 2005, 2007, 2011 - 2018
 def Parseloghtmltxt(year, expedition, txt):
-    print(" - Using log html parser")
+    #print(" - Starting log html parser")
    tripparas = re.findall(r"<hr\s*/>([\s\S]*?)(?=<hr)", txt)
    logbook_entry_count = 0
    for trippara in tripparas:
@@ -163,7 +163,6 @@ def Parseloghtmltxt(year, expedition, txt):
                print("can't parse: ", trippara)  # this is 2007 which needs editing
            #assert s, trippara
            continue
-
        tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups()
        ldate = ParseDate(tripdate.strip(), year)
        #assert tripid[:-1] == "t" + tripdate, (tripid, tripdate)
@@ -174,16 +173,18 @@ def Parseloghtmltxt(year, expedition, txt):
            tripcave = triptitles[0]
        else:
            tripcave = "UNKNOWN"
-        #print "\n", tripcave, "---   ppp", trippeople, len(triptext)
+        #print("\n", tripcave, "---   ppp", trippeople, len(triptext))
        ltriptext = re.sub(r"</p>", "", triptext)
        ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext)
        ltriptext = re.sub(r"<p>", "\n\n", ltriptext).strip()
-        EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, logtime_underground=0)
+        EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext,
+                          trippeople=trippeople, expedition=expedition, logtime_underground=0,
+                          entry_type="html")
    if logbook_entry_count == 0:
        print(" - No trip entrys found in logbook, check the syntax matches htmltxt format")


-# main parser for pre-2001.  simpler because the data has been hacked so much to fit it
+# main parser for 1991 - 2001.  simpler because the data has been hacked so much to fit it
 def Parseloghtml01(year, expedition, txt):
    tripparas = re.findall(r"<hr[\s/]*>([\s\S]*?)(?=<hr)", txt)
    for trippara in tripparas:
@@ -227,9 +228,11 @@ def Parseloghtml01(year, expedition, txt):

        #print ldate, trippeople.strip()
            # could includ the tripid (url link for cross referencing)
-        EnterLogIntoDbase(date=ldate, place=tripcave, title=triptitle, text=ltriptext, trippeople=trippeople, expedition=expedition, logtime_underground=0)
-
+        EnterLogIntoDbase(date=ldate, place=tripcave, title=triptitle, text=ltriptext,
+                          trippeople=trippeople, expedition=expedition, logtime_underground=0,
+                          entry_type="html")

+# parser for 2003
 def Parseloghtml03(year, expedition, txt):
    tripparas = re.findall(r"<hr\s*/>([\s\S]*?)(?=<hr)", txt)
    for trippara in tripparas:
@@ -256,7 +259,9 @@ def Parseloghtml03(year, expedition, txt):
        ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext)
        ltriptext = re.sub(r"<p>", "\n\n", ltriptext).strip()
        ltriptext = re.sub(r"[^\s0-9a-zA-Z\-.,:;'!&()\[\]<>?=+*%]", "_NONASCII_", ltriptext)
-        EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, logtime_underground=0)
+        EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle,
+                          text = ltriptext, trippeople=trippeople, expedition=expedition,
+                          logtime_underground=0, entry_type="html")


 def SetDatesFromLogbookEntries(expedition):
@@ -281,8 +286,7 @@ def SetDatesFromLogbookEntries(expedition):
 def LoadLogbookForExpedition(expedition):
    """ Parses all logbook entries for one expedition """
        
-    expowebbase = os.path.join(settings.EXPOWEB, "years")  
-    #year = str(expedition.year)
+    expowebbase = os.path.join(settings.EXPOWEB, "years")
    yearlinks = settings.LOGBOOK_PARSER_SETTINGS

    logbook_parseable = False
@@ -294,6 +298,7 @@ def LoadLogbookForExpedition(expedition):
        file_in.close()
        parsefunc = year_settings[1]
        logbook_parseable = True
+        print(" - Parsing logbook: " + year_settings[0] + "\n - Using parser: " + year_settings[1])
    else:
        try:
            file_in = open(os.path.join(expowebbase, expedition.year, settings.DEFAULT_LOGBOOK_FILE))
@@ -304,7 +309,7 @@ def LoadLogbookForExpedition(expedition):
            parsefunc = settings.DEFAULT_LOGBOOK_PARSER
        except (IOError):
            logbook_parseable = False
-            print("Couldn't open default logbook file and nothing set for expo " + expedition.year)
+            print("Couldn't open default logbook file and nothing in settings for expo " + expedition.year)

    if logbook_parseable:
        parser = globals()[parsefunc]
--- a/parsers/people.py
+++ b/parsers/people.py
@@ -59,22 +59,19 @@ def LoadPersonsExpos():
        
        save_carefully(models.Expedition, lookupAttribs, nonLookupAttribs)

-    
    # make persons
    print("Loading personexpeditions")
-    #expoers2008 = """Edvin Deadman,Kathryn Hopkins,Djuke Veldhuis,Becka Lawson,Julian Todd,Natalie Uomini,Aaron Curtis,Tony Rooke,Ollie Stevens,Frank Tully,Martin Jahnke,Mark Shinwell,Jess Stirrups,Nial Peters,Serena Povia,Olly Madge,Steve Jones,Pete Harley,Eeva Makiranta,Keith Curtis""".split(",")
-    #expomissing = set(expoers2008)

    for personline in personreader:
        name = personline[header["Name"]]
-        name = re.sub("<.*?>", "", name)
-        mname = re.match("(\w+)(?:\s((?:van |ten )?\w+))?(?:\s\(([^)]*)\))?", name)
+        name = re.sub(r"<.*?>", "", name)
+        mname = re.match(r"(\w+)(?:\s((?:van |ten )?\w+))?(?:\s\(([^)]*)\))?", name)
        nickname = mname.group(3) or ""
-	
+
        lookupAttribs={'first_name':mname.group(1), 'last_name':(mname.group(2) or "")}
        nonLookupAttribs={'is_vfho':personline[header["VfHO member"]],}
        person, created = save_carefully(models.Person, lookupAttribs, nonLookupAttribs)
-	
+
        parseMugShotAndBlurb(personline=personline, header=header, person=person)
    
        # make person expedition from table
@@ -88,6 +85,8 @@ def LoadPersonsExpos():

    # this fills in those people for whom 2008 was their first expo
    #print "Loading personexpeditions 2008"
+    #expoers2008 = """Edvin Deadman,Kathryn Hopkins,Djuke Veldhuis,Becka Lawson,Julian Todd,Natalie Uomini,Aaron Curtis,Tony Rooke,Ollie Stevens,Frank Tully,Martin Jahnke,Mark Shinwell,Jess Stirrups,Nial Peters,Serena Povia,Olly Madge,Steve Jones,Pete Harley,Eeva Makiranta,Keith Curtis""".split(",")
+    #expomissing = set(expoers2008)
    #for name in expomissing:
        # firstname, lastname = name.split()
        # is_guest = name in ["Eeva Makiranta", "Keith Curtis"]
@@ -103,18 +102,6 @@ def LoadPersonsExpos():
        # personexpedition = models.PersonExpedition(person=person, expedition=expedition, nickname="", is_guest=is_guest)
        # personexpedition.save()

-    #Notability is now a method of person. Makes no sense to store it in the database; it would need to be recalculated every time something changes. - AC 16 Feb 09
-    # could rank according to surveying as well
-    #print "Setting person notability"
-    #for person in models.Person.objects.all():
-        #person.notability = 0.0
-        #for personexpedition in person.personexpedition_set.all():
-            #if not personexpedition.is_guest:
-                #person.notability += 1.0 / (2012 - int(personexpedition.expedition.year))
-        #person.bisnotable = person.notability > 0.3 # I don't know how to filter by this
-        #person.save()
-        
-        
 # used in other referencing parser functions
 # expedition name lookup cached for speed (it's a very big list)
 Gpersonexpeditionnamelookup = { }
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -92,13 +92,13 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
    teammembers = [ ]
 
 # uncomment to print out all files during parsing 
-    print("Reading file:", survexblock.survexfile.path)
+    print("Reading file: " + survexblock.survexfile.path)
    while True:
        svxline = fin.readline().decode("latin1")
        if not svxline:
            return
        textlines.append(svxline)
-        
+
        # break the line at the comment
        sline, comment = re.match(r"([^;]*?)\s*(?:;\s*(.*))?\n?$", svxline.strip()).groups()
        
--- a/parsers/surveys.py
+++ b/parsers/surveys.py
@@ -99,7 +99,7 @@ def parseSurveyScans(expedition, logfile=None):
                #scanList = listdir(expedition.year, surveyFolder)
                scanList=os.listdir(os.path.join(yearPath,surveyFolder))
            except AttributeError:
-                print(surveyFolder + " ignored\r",)
+                print("Folder: " + surveyFolder + " ignored\r")
                continue

            for scan in scanList:
@@ -107,7 +107,7 @@ def parseSurveyScans(expedition, logfile=None):
                    scanChopped=re.match(r'(?i).*(notes|elev|plan|elevation|extend)(\d*)\.(png|jpg|jpeg)',scan).groups()
                    scanType,scanNumber,scanFormat=scanChopped
                except AttributeError:
-                    print(scan + " ignored\r",)
+                    print("File: " + scan + " ignored\r")
                    continue
                if scanType == 'elev' or scanType == 'extend':
                    scanType = 'elevation'