cache tidy and move settings into parser

2024-11-21 23:01:52 +00:00 · 2022-11-21 16:41:52 +00:00 · 2022-11-21 16:41:52 +00:00 · a795707552
commit a795707552
parent bcb61f9cd9
3 changed files with 61 additions and 106 deletions
--- a/core/models/caves.py
+++ b/core/models/caves.py
@ -585,7 +585,7 @@ class QM(TroggleModel):
        
 class PersonTrip(TroggleModel):
    """Single Person going on a trip, which may or may not be written up.
-    It accounts for different T/U for people in same logbook entry.
+    It could account for different T/U for people in same logbook entry.
    """
    personexpedition = models.ForeignKey("PersonExpedition",null=True,on_delete=models.CASCADE)
    time_underground = models.FloatField(help_text="In decimal hours")
--- a/core/views/logbooks.py
+++ b/core/views/logbooks.py
@ -215,6 +215,7 @@ def logbookentry(request, date, slug):
         
            this_logbookentry=this_logbookentry[0]
            # This is the only page that uses presontrip_next and persontrip_prev
+            # and it is calculated on the fly in the model 
            return render(request, 'logbookentry.html', 
                {'logbookentry': this_logbookentry, 'trips': trips, 'svxothers': svxothers, 'wallets': wallets})
    else:
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@ -45,12 +45,43 @@ todo='''
 - this is a slow and uncertain function:  cave = getCaveByReference(caveRef)

 '''
-
-logentries = [] # the entire logbook for one year is a single object: a list of entries
-noncaveplaces = [ "QMplaceholder", "Journey", "Loser Plateau", "UNKNOWN", 'plateau', 
-        'base camp', 'basecamp', 'top camp', 'topcamp' ]
-logdataissues = TROG['issues']['logdataissues']
-trips ={}
+MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200
+DEFAULT_LOGBOOK_PARSER = "Parseloghtmltxt"
+DEFAULT_LOGBOOK_FILE = "logbook.html"
+# All years since 2010 use the default value for Logbook parser
+# but several don't work, and are skipped by the parsing code, e.g. 1983
+LOGBOOK_PARSER_SETTINGS = {
+                "2010": ("logbook.html", "Parseloghtmltxt"), 
+                "2009": ("2009logbook.txt", "Parselogwikitxt"), 
+                "2008": ("2008logbook.txt", "Parselogwikitxt"), 
+                "2007": ("logbook.html", "Parseloghtmltxt"), 
+                "2006": ("logbook.html", "Parseloghtmltxt"), 
+#               "2006": ("logbook/logbook_06.txt", "Parselogwikitxt"), 
+                "2006": ("logbook.html", "Parseloghtmltxt"), 
+                "2005": ("logbook.html", "Parseloghtmltxt"), 
+                "2004": ("logbook.html", "Parseloghtmltxt"), 
+                "2003": ("logbook.html", "Parseloghtml03"), 
+                "2002": ("logbook.html", "Parseloghtmltxt"), 
+                "2001": ("log.htm", "Parseloghtml01"), 
+                "2000": ("log.htm", "Parseloghtml01"), 
+                "1999": ("log.htm", "Parseloghtml01"), 
+                "1998": ("log.htm", "Parseloghtml01"), 
+                "1997": ("log.htm", "Parseloghtml01"), 
+                "1996": ("log.htm", "Parseloghtml01"),
+                "1995": ("log.htm", "Parseloghtml01"), 
+                "1994": ("log.htm", "Parseloghtml01"), 
+                "1993": ("log.htm", "Parseloghtml01"), 
+                "1992": ("log.htm", "Parseloghtml01"), 
+                "1991": ("log.htm", "Parseloghtml01"), 
+                "1990": ("log.htm", "Parseloghtml01"), 
+                "1989": ("log.htm", "Parseloghtml01"), #crashes MySQL
+                "1988": ("log.htm", "Parseloghtml01"), #crashes MySQL
+                "1987": ("log.htm", "Parseloghtml01"), #crashes MySQL
+                "1985": ("log.htm", "Parseloghtml01"), 
+                "1984": ("log.htm", "Parseloghtml01"), 
+                "1983": ("log.htm", "Parseloghtml01"), 
+                "1982": ("log.htm", "Parseloghtml01"), 
+            }

 entries = { "2022": 64, "2019": 44, "2018": 74, "2017": 60, "2016": 81, "2015": 79, 
    "2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52, 
@ -60,6 +91,11 @@ entries = { "2022": 64, "2019": 44, "2018": 74, "2017": 60, "2016": 81, "2015":
    "1985": 24,"1984": 32,"1983": 52,"1982": 42,}
 # Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, or need hand-editing.

+logentries = [] # the entire logbook for one year is a single object: a list of entries
+noncaveplaces = [ "QMplaceholder", "Journey", "Loser Plateau", "UNKNOWN", 'plateau', 
+        'base camp', 'basecamp', 'top camp', 'topcamp' ]
+logdataissues = TROG['issues']['logdataissues']
+trips ={}

 #
 # the logbook loading section
@ -169,7 +205,7 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
        lookupAttribs={'personexpedition':tripperson, 'logbook_entry':lbo}
        nonLookupAttribs={'time_underground':time_underground, 'is_logbook_entry_author':(tripperson == author)}
        # this creates the PersonTrip instance. 
-        save_carefully(PersonTrip, lookupAttribs, nonLookupAttribs) # PersonTrip also saved in SetDatesFromLogbookEntries
+        save_carefully(PersonTrip, lookupAttribs, nonLookupAttribs)

 def ParseDate(tripdate, year):
    """ Interprets dates in the expo logbooks and returns a correct datetime.date object  """
@ -465,34 +501,6 @@ def Parseloghtml03(year, expedition, txt):
                trippeople, expedition, tu, "html03", tid)
        logentries.append(entrytuple)

-def SetDatesFromLogbookEntries(expedition):
-    """Sets the next and previous entry for a persontrip by setting 
-    persontrip_prev
-    persontrip_next
-    for each persontrip instance.
-        
-    This is ONLY needed when a logbook entry is displayed. So could be called lazily
-    only when one of these entries is requested.
-    
-    It does NOT do what the docstring says here:
-    Sets the date_from and date_to field for an expedition based on persontrips.
-    Then sets the expedition date_from and date_to based on the personexpeditions.
-    """
-    # Probably a faster way to do this. This uses a lot of db queries, but we have all this
-    # in memory..
-    for personexpedition in expedition.personexpedition_set.all():
-        persontrips = personexpedition.persontrip_set.order_by('logbook_entry__date')
-        # sequencing is difficult to do
-        lprevpersontrip = None
-        for persontrip in persontrips:
-            persontrip.persontrip_prev = lprevpersontrip
-            if lprevpersontrip:
-                lprevpersontrip.persontrip_next = persontrip
-                lprevpersontrip.save()
-            persontrip.persontrip_next = None
-            lprevpersontrip = persontrip
-            #persontrip.save() # also saved in EnterLogIntoDbase. MERGE these to speed up import.
-

 def LoadLogbookForExpedition(expedition):
    """ Parses all logbook entries for one expedition 
@ -505,7 +513,7 @@ def LoadLogbookForExpedition(expedition):

    logbook_parseable = False
    logbook_cached = False
-    yearlinks   = settings.LOGBOOK_PARSER_SETTINGS
+    yearlinks   = LOGBOOK_PARSER_SETTINGS
    expologbase = os.path.join(settings.EXPOWEB, "years")
    logentries=[]
    
@ -555,85 +563,33 @@ def LoadLogbookForExpedition(expedition):
        expedition.logbookfile = yearlinks[year][0] 
        parsefunc   = yearlinks[year][1]
    else:
-        logbookpath = Path(expologbase) /  year / settings.DEFAULT_LOGBOOK_FILE
-        expedition.logbookfile = settings.DEFAULT_LOGBOOK_FILE
-        parsefunc   = settings.DEFAULT_LOGBOOK_PARSER
+        logbookpath = Path(expologbase) /  year / DEFAULT_LOGBOOK_FILE
+        expedition.logbookfile = DEFAULT_LOGBOOK_FILE
+        parsefunc   = DEFAULT_LOGBOOK_PARSER
    cache_filename = Path(str(logbookpath) + ".cache")
    if not cache_filename.is_file():
        print("   - Cache file does not exist \"" + str(cache_filename) +"\"")

    expedition.save()
    logbook_cached = False
-    if False: # enable cache system
-        now = time.time()
-        bad_cache = False # temporarily disable reading the cache - buggy
-        try:
-            cache_t = os.path.getmtime(cache_filename)
-            if os.path.getmtime(logbookpath) - cache_t > 2: # at least 2 secs later
-                print(" - ! Cache is older than the logbook file")
-                bad_cache= True
-            if now - cache_t > 30*24*60*60:
-                print(" - ! Cache is > 30 days old")
-                bad_cache= True
-            if bad_cache:
-                print("   - so cache is either stale or more than 30 days old. Deleting it.")
-                os.remove(cache_filename)
-                logentries=[]
-                print("   - Deleted stale or corrupt cache file")
-                raise
-            try:
-                # print("   - Reading cache: " + str(cache_filename), end='')
-                with open(cache_filename, "rb") as f:
-                    year, lbsize, n, logentries = pickle.load(f) 
-                if validcache(year, n, lbsize):
-                    print(f"  -- {year} : Loaded {len(logentries)} log entries")
-                    logbook_cached = True
-                else:
-                    print("  !- {year} : Cache failed validity checks")
-                    raise
-            except:
-                print("   ! Failed to load corrupt cache  (or I was told to ignore it). Deleting it.")
-                os.remove(cache_filename)
-                logentries=[]
-                raise
-        except : 
-            print("   - Cache old or de-pickle failure \"" + str(cache_filename) +"\"")
-            try:
-                file_in = open(logbookpath,'rb')
-                txt = file_in.read().decode("utf-8")
-                file_in.close()
-                logbook_parseable = True
-            except (IOError):
-                logbook_parseable = False
-                print("   ! Couldn't open logbook as UTF-8 " + logbookpath)
-    else:
-        try:
-            file_in = open(logbookpath,'rb')
-            txt = file_in.read().decode("utf-8")
-            file_in.close()
-            logbook_parseable = True
-        except (IOError):
-            logbook_parseable = False
-            print("   ! Couldn't open logbook as UTF-8 " + logbookpath)
-        except:
-            logbook_parseable = False
-            print("   ! Very Bad Error opening " + logbookpath)
+
+    try:
+        file_in = open(logbookpath,'rb')
+        txt = file_in.read().decode("utf-8")
+        file_in.close()
+        logbook_parseable = True
+    except (IOError):
+        logbook_parseable = False
+        print("   ! Couldn't open logbook as UTF-8 " + logbookpath)
+    except:
+        logbook_parseable = False
+        print("   ! Very Bad Error opening " + logbookpath)

    if logbook_parseable:
        parser = globals()[parsefunc]
        print(f' - {year} parsing with {parsefunc}')
        parser(year, expedition, txt) # this launches the right parser for this year
        
-        # SetDatesFromLogbookEntries(expedition)
-        # if len(logentries) >0:
-            # print("   - Cacheing " , len(logentries), " log entries")
-            # lbsize = logbookpath.stat().st_size
-            # with open(cache_filename, "wb") as fc: # we much check that permission are g+w ! or expo can't delete the cache
-                # logbk=(expedition,lbsize,len(logentries),logentries)
-                # pickle.dump(logbk, fc, protocol=4)
-        # else:
-            # print("   ! NO TRIP entries found in logbook, check the syntax.")
-
    i=0
    for entrytuple in logentries:
        # date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, tripid1 = entrytuple
@ -644,7 +600,6 @@ def LoadLogbookForExpedition(expedition):
        EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, 0,
                tripid1)
        i +=1
-    # SetDatesFromLogbookEntries(expedition)
    
    if len(logentries) == expect:
        # print(f"OK  {year} {len(logentries):5d} is {expect}\n")
@ -656,7 +611,6 @@ def LoadLogbookForExpedition(expedition):

 def LoadLogbooks():
    """ This is the master function for parsing all logbooks into the Troggle database. 
-    Parser settings appropriate for each year are set in settings.py LOGBOOK_PARSER_SETTINGS.
    This should be rewritten to use coroutines to load all logbooks from disc in parallel,
    but must be serialised to write to database as sqlite is single-user.
    """