cache tidy and move settings into parser

This commit is contained in:
Philip Sargent 2022-11-21 16:41:52 +00:00
parent bcb61f9cd9
commit a795707552
3 changed files with 61 additions and 106 deletions

View File

@ -585,7 +585,7 @@ class QM(TroggleModel):
class PersonTrip(TroggleModel): class PersonTrip(TroggleModel):
"""Single Person going on a trip, which may or may not be written up. """Single Person going on a trip, which may or may not be written up.
It accounts for different T/U for people in same logbook entry. It could account for different T/U for people in same logbook entry.
""" """
personexpedition = models.ForeignKey("PersonExpedition",null=True,on_delete=models.CASCADE) personexpedition = models.ForeignKey("PersonExpedition",null=True,on_delete=models.CASCADE)
time_underground = models.FloatField(help_text="In decimal hours") time_underground = models.FloatField(help_text="In decimal hours")

View File

@ -215,6 +215,7 @@ def logbookentry(request, date, slug):
this_logbookentry=this_logbookentry[0] this_logbookentry=this_logbookentry[0]
# This is the only page that uses presontrip_next and persontrip_prev # This is the only page that uses presontrip_next and persontrip_prev
# and it is calculated on the fly in the model
return render(request, 'logbookentry.html', return render(request, 'logbookentry.html',
{'logbookentry': this_logbookentry, 'trips': trips, 'svxothers': svxothers, 'wallets': wallets}) {'logbookentry': this_logbookentry, 'trips': trips, 'svxothers': svxothers, 'wallets': wallets})
else: else:

View File

@ -45,12 +45,43 @@ todo='''
- this is a slow and uncertain function: cave = getCaveByReference(caveRef) - this is a slow and uncertain function: cave = getCaveByReference(caveRef)
''' '''
MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200
logentries = [] # the entire logbook for one year is a single object: a list of entries DEFAULT_LOGBOOK_PARSER = "Parseloghtmltxt"
noncaveplaces = [ "QMplaceholder", "Journey", "Loser Plateau", "UNKNOWN", 'plateau', DEFAULT_LOGBOOK_FILE = "logbook.html"
'base camp', 'basecamp', 'top camp', 'topcamp' ] # All years since 2010 use the default value for Logbook parser
logdataissues = TROG['issues']['logdataissues'] # but several don't work, and are skipped by the parsing code, e.g. 1983
trips ={} LOGBOOK_PARSER_SETTINGS = {
"2010": ("logbook.html", "Parseloghtmltxt"),
"2009": ("2009logbook.txt", "Parselogwikitxt"),
"2008": ("2008logbook.txt", "Parselogwikitxt"),
"2007": ("logbook.html", "Parseloghtmltxt"),
"2006": ("logbook.html", "Parseloghtmltxt"),
# "2006": ("logbook/logbook_06.txt", "Parselogwikitxt"),
"2006": ("logbook.html", "Parseloghtmltxt"),
"2005": ("logbook.html", "Parseloghtmltxt"),
"2004": ("logbook.html", "Parseloghtmltxt"),
"2003": ("logbook.html", "Parseloghtml03"),
"2002": ("logbook.html", "Parseloghtmltxt"),
"2001": ("log.htm", "Parseloghtml01"),
"2000": ("log.htm", "Parseloghtml01"),
"1999": ("log.htm", "Parseloghtml01"),
"1998": ("log.htm", "Parseloghtml01"),
"1997": ("log.htm", "Parseloghtml01"),
"1996": ("log.htm", "Parseloghtml01"),
"1995": ("log.htm", "Parseloghtml01"),
"1994": ("log.htm", "Parseloghtml01"),
"1993": ("log.htm", "Parseloghtml01"),
"1992": ("log.htm", "Parseloghtml01"),
"1991": ("log.htm", "Parseloghtml01"),
"1990": ("log.htm", "Parseloghtml01"),
"1989": ("log.htm", "Parseloghtml01"), #crashes MySQL
"1988": ("log.htm", "Parseloghtml01"), #crashes MySQL
"1987": ("log.htm", "Parseloghtml01"), #crashes MySQL
"1985": ("log.htm", "Parseloghtml01"),
"1984": ("log.htm", "Parseloghtml01"),
"1983": ("log.htm", "Parseloghtml01"),
"1982": ("log.htm", "Parseloghtml01"),
}
entries = { "2022": 64, "2019": 44, "2018": 74, "2017": 60, "2016": 81, "2015": 79, entries = { "2022": 64, "2019": 44, "2018": 74, "2017": 60, "2016": 81, "2015": 79,
"2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52, "2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52,
@ -60,6 +91,11 @@ entries = { "2022": 64, "2019": 44, "2018": 74, "2017": 60, "2016": 81, "2015":
"1985": 24,"1984": 32,"1983": 52,"1982": 42,} "1985": 24,"1984": 32,"1983": 52,"1982": 42,}
# Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, or need hand-editing. # Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, or need hand-editing.
logentries = [] # the entire logbook for one year is a single object: a list of entries
noncaveplaces = [ "QMplaceholder", "Journey", "Loser Plateau", "UNKNOWN", 'plateau',
'base camp', 'basecamp', 'top camp', 'topcamp' ]
logdataissues = TROG['issues']['logdataissues']
trips ={}
# #
# the logbook loading section # the logbook loading section
@ -169,7 +205,7 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
lookupAttribs={'personexpedition':tripperson, 'logbook_entry':lbo} lookupAttribs={'personexpedition':tripperson, 'logbook_entry':lbo}
nonLookupAttribs={'time_underground':time_underground, 'is_logbook_entry_author':(tripperson == author)} nonLookupAttribs={'time_underground':time_underground, 'is_logbook_entry_author':(tripperson == author)}
# this creates the PersonTrip instance. # this creates the PersonTrip instance.
save_carefully(PersonTrip, lookupAttribs, nonLookupAttribs) # PersonTrip also saved in SetDatesFromLogbookEntries save_carefully(PersonTrip, lookupAttribs, nonLookupAttribs)
def ParseDate(tripdate, year): def ParseDate(tripdate, year):
""" Interprets dates in the expo logbooks and returns a correct datetime.date object """ """ Interprets dates in the expo logbooks and returns a correct datetime.date object """
@ -465,34 +501,6 @@ def Parseloghtml03(year, expedition, txt):
trippeople, expedition, tu, "html03", tid) trippeople, expedition, tu, "html03", tid)
logentries.append(entrytuple) logentries.append(entrytuple)
def SetDatesFromLogbookEntries(expedition):
"""Sets the next and previous entry for a persontrip by setting
persontrip_prev
persontrip_next
for each persontrip instance.
This is ONLY needed when a logbook entry is displayed. So could be called lazily
only when one of these entries is requested.
It does NOT do what the docstring says here:
Sets the date_from and date_to field for an expedition based on persontrips.
Then sets the expedition date_from and date_to based on the personexpeditions.
"""
# Probably a faster way to do this. This uses a lot of db queries, but we have all this
# in memory..
for personexpedition in expedition.personexpedition_set.all():
persontrips = personexpedition.persontrip_set.order_by('logbook_entry__date')
# sequencing is difficult to do
lprevpersontrip = None
for persontrip in persontrips:
persontrip.persontrip_prev = lprevpersontrip
if lprevpersontrip:
lprevpersontrip.persontrip_next = persontrip
lprevpersontrip.save()
persontrip.persontrip_next = None
lprevpersontrip = persontrip
#persontrip.save() # also saved in EnterLogIntoDbase. MERGE these to speed up import.
def LoadLogbookForExpedition(expedition): def LoadLogbookForExpedition(expedition):
""" Parses all logbook entries for one expedition """ Parses all logbook entries for one expedition
@ -505,7 +513,7 @@ def LoadLogbookForExpedition(expedition):
logbook_parseable = False logbook_parseable = False
logbook_cached = False logbook_cached = False
yearlinks = settings.LOGBOOK_PARSER_SETTINGS yearlinks = LOGBOOK_PARSER_SETTINGS
expologbase = os.path.join(settings.EXPOWEB, "years") expologbase = os.path.join(settings.EXPOWEB, "years")
logentries=[] logentries=[]
@ -555,85 +563,33 @@ def LoadLogbookForExpedition(expedition):
expedition.logbookfile = yearlinks[year][0] expedition.logbookfile = yearlinks[year][0]
parsefunc = yearlinks[year][1] parsefunc = yearlinks[year][1]
else: else:
logbookpath = Path(expologbase) / year / settings.DEFAULT_LOGBOOK_FILE logbookpath = Path(expologbase) / year / DEFAULT_LOGBOOK_FILE
expedition.logbookfile = settings.DEFAULT_LOGBOOK_FILE expedition.logbookfile = DEFAULT_LOGBOOK_FILE
parsefunc = settings.DEFAULT_LOGBOOK_PARSER parsefunc = DEFAULT_LOGBOOK_PARSER
cache_filename = Path(str(logbookpath) + ".cache") cache_filename = Path(str(logbookpath) + ".cache")
if not cache_filename.is_file(): if not cache_filename.is_file():
print(" - Cache file does not exist \"" + str(cache_filename) +"\"") print(" - Cache file does not exist \"" + str(cache_filename) +"\"")
expedition.save() expedition.save()
logbook_cached = False logbook_cached = False
if False: # enable cache system
now = time.time() try:
bad_cache = False # temporarily disable reading the cache - buggy file_in = open(logbookpath,'rb')
try: txt = file_in.read().decode("utf-8")
cache_t = os.path.getmtime(cache_filename) file_in.close()
if os.path.getmtime(logbookpath) - cache_t > 2: # at least 2 secs later logbook_parseable = True
print(" - ! Cache is older than the logbook file") except (IOError):
bad_cache= True logbook_parseable = False
if now - cache_t > 30*24*60*60: print(" ! Couldn't open logbook as UTF-8 " + logbookpath)
print(" - ! Cache is > 30 days old") except:
bad_cache= True logbook_parseable = False
if bad_cache: print(" ! Very Bad Error opening " + logbookpath)
print(" - so cache is either stale or more than 30 days old. Deleting it.")
os.remove(cache_filename)
logentries=[]
print(" - Deleted stale or corrupt cache file")
raise
try:
# print(" - Reading cache: " + str(cache_filename), end='')
with open(cache_filename, "rb") as f:
year, lbsize, n, logentries = pickle.load(f)
if validcache(year, n, lbsize):
print(f" -- {year} : Loaded {len(logentries)} log entries")
logbook_cached = True
else:
print(" !- {year} : Cache failed validity checks")
raise
except:
print(" ! Failed to load corrupt cache (or I was told to ignore it). Deleting it.")
os.remove(cache_filename)
logentries=[]
raise
except :
print(" - Cache old or de-pickle failure \"" + str(cache_filename) +"\"")
try:
file_in = open(logbookpath,'rb')
txt = file_in.read().decode("utf-8")
file_in.close()
logbook_parseable = True
except (IOError):
logbook_parseable = False
print(" ! Couldn't open logbook as UTF-8 " + logbookpath)
else:
try:
file_in = open(logbookpath,'rb')
txt = file_in.read().decode("utf-8")
file_in.close()
logbook_parseable = True
except (IOError):
logbook_parseable = False
print(" ! Couldn't open logbook as UTF-8 " + logbookpath)
except:
logbook_parseable = False
print(" ! Very Bad Error opening " + logbookpath)
if logbook_parseable: if logbook_parseable:
parser = globals()[parsefunc] parser = globals()[parsefunc]
print(f' - {year} parsing with {parsefunc}') print(f' - {year} parsing with {parsefunc}')
parser(year, expedition, txt) # this launches the right parser for this year parser(year, expedition, txt) # this launches the right parser for this year
# SetDatesFromLogbookEntries(expedition)
# if len(logentries) >0:
# print(" - Cacheing " , len(logentries), " log entries")
# lbsize = logbookpath.stat().st_size
# with open(cache_filename, "wb") as fc: # we much check that permission are g+w ! or expo can't delete the cache
# logbk=(expedition,lbsize,len(logentries),logentries)
# pickle.dump(logbk, fc, protocol=4)
# else:
# print(" ! NO TRIP entries found in logbook, check the syntax.")
i=0 i=0
for entrytuple in logentries: for entrytuple in logentries:
# date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, tripid1 = entrytuple # date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, tripid1 = entrytuple
@ -644,7 +600,6 @@ def LoadLogbookForExpedition(expedition):
EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, 0, EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, 0,
tripid1) tripid1)
i +=1 i +=1
# SetDatesFromLogbookEntries(expedition)
if len(logentries) == expect: if len(logentries) == expect:
# print(f"OK {year} {len(logentries):5d} is {expect}\n") # print(f"OK {year} {len(logentries):5d} is {expect}\n")
@ -656,7 +611,6 @@ def LoadLogbookForExpedition(expedition):
def LoadLogbooks(): def LoadLogbooks():
""" This is the master function for parsing all logbooks into the Troggle database. """ This is the master function for parsing all logbooks into the Troggle database.
Parser settings appropriate for each year are set in settings.py LOGBOOK_PARSER_SETTINGS.
This should be rewritten to use coroutines to load all logbooks from disc in parallel, This should be rewritten to use coroutines to load all logbooks from disc in parallel,
but must be serialised to write to database as sqlite is single-user. but must be serialised to write to database as sqlite is single-user.
""" """