restored logbook cacheing

This commit is contained in:
Philip Sargent 2022-03-23 22:55:59 +00:00
parent 4c7deadb9a
commit 9ccf5912d4

@ -109,6 +109,9 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, entry_type="wiki", tid=None):
""" saves a logbook entry and related persontrips
Does NOT save the expeditionday_id - all NULLs. why? Because we are deprecating expeditionday !
troggle.log shows that we are creating lots of duplicates, which is no no problem with SQL as they just overwrite
but we are saving the same thing too many times.. Also seen in teh ObjStore mimic
"""
try:
trippersons, author = GetTripPersons(trippeople, expedition, logtime_underground, tid=tid)
@ -153,12 +156,14 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
slug = str(randint(1000,10000)) + "_" + slugify(title)[:10].replace('-','_')
nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave_slug':str(cave), 'slug': slug, 'entry_type':entry_type}
# This cretes the lbo instance of LogbookEntry
lbo, created=save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs)
for tripperson, time_underground in trippersons:
lookupAttribs={'personexpedition':tripperson, 'logbook_entry':lbo}
nonLookupAttribs={'time_underground':time_underground, 'is_logbook_entry_author':(tripperson == author)}
# this creates the PersonTrip instance.
save_carefully(PersonTrip, lookupAttribs, nonLookupAttribs) # PersonTrip also saved in SetDatesFromLogbookEntries
def ParseDate(tripdate, year):
@ -243,19 +248,25 @@ def Parselogwikitxt(year, expedition, txt):
def EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, tu, formattype, tripid1, seq, tid=None):
'''Called once for each logbook entry as the logbook is parsed
'''
# This will need additional functions to replicate the persontrip calculation and storage. For the
# moment we leave all that to be done in the django db
global trips # should be a singleton TROG eventually
global logdataissues
if tid in trips:
tyear, tdate, *trest = trips[tid]
msg = f" ! DUPLICATE on {tdate} id: '{tid}'"
msg = f" ! DUPLICATE tid: '{tid}' on date:{tdate} "
print(msg)
DataIssue.objects.create(parser='logbooks', message=msg)
tid = set_trip_id(str(date),seq)
#print(" - De-dup ",seq, tid)
logdataissues[tid]=msg
if not tid:
tid = set_trip_id(str(date),seq)
trips[tid] = (year, date, tripcave, triptitle, text, trippeople, tu, formattype)
## copy a lot of checking functionality here from EnterLogIntoDbase()
@ -267,7 +278,9 @@ def EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, tu,
# message = " ! - Skipping logentry: %s - no author for entry in year '%s'" % (title, expedition.year)
# DataIssue.objects.create(parser='logbooks', message=message)
# logdataissues[tid+"author"]=message
pass
return
# 2002, 2004, 2005, 2007, 2010 - now
# 2006 wiki text is incomplete, but the html all there. So using this parser now.
@ -280,6 +293,7 @@ def Parseloghtmltxt(year, expedition, txt):
for trippara in tripparas:
logbook_entry_count += 1
tid = set_trip_id(year,logbook_entry_count)
print(f' - new tid:{tid} lbe count: {logbook_entry_count}')
s = re.match(r'''(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)? # second date
\s*(?:<a\s+id="(.*?)"\s*/>\s*</a>)?
@ -517,7 +531,7 @@ def SetDatesFromLogbookEntries(expedition):
lprevpersontrip.save()
persontrip.persontrip_next = None
lprevpersontrip = persontrip
persontrip.save() # also saved in EnterLogIntoDbase. MERGE these to speed up import.
#persontrip.save() # also saved in EnterLogIntoDbase. MERGE these to speed up import.
def LoadLogbookForExpedition(expedition, expect):
@ -579,13 +593,17 @@ def LoadLogbookForExpedition(expedition, expect):
print(" - Cache file does not exist \"" + str(cache_filename) +"\"")
expedition.save()
logbook_cached = False
if True: # enable cache system
now = time.time()
bad_cache = True # emporarily disable reading the cache - buggy
bad_cache = False # temporarily disable reading the cache - buggy
try:
cache_t = os.path.getmtime(cache_filename)
if os.path.getmtime(logbookfile) - cache_t > 2: # at least 2 secs later
print(" - ! Cache is older than the logbook file")
bad_cache= True
if now - cache_t > 30*24*60*60:
print(" - ! Cache is > 30 days old")
bad_cache= True
if bad_cache:
print(" - ! Cache is either stale or more than 30 days old. Deleting it.")
@ -593,7 +611,7 @@ def LoadLogbookForExpedition(expedition, expect):
logentries=[]
print(" ! Removed stale or corrupt cache file")
raise
print(" - Reading cache: " + str(cache_filename), end='')
# print(" - Reading cache: " + str(cache_filename), end='')
try:
with open(cache_filename, "rb") as f:
year,n,logentries = pickle.load(f)
@ -615,15 +633,14 @@ def LoadLogbookForExpedition(expedition, expect):
txt = file_in.read().decode("latin1")
file_in.close()
logbook_parseable = True
print((" - Using: " + parsefunc + " to parse " + logbookfile))
except (IOError):
logbook_parseable = False
print((" ! Couldn't open logbook " + logbookfile))
if logbook_parseable:
parser = globals()[parsefunc]
parser(expedition.year, expedition, txt) # this launches the parser
print(f' - Using parser {parsefunc}')
parser(expedition.year, expedition, txt) # this launches the right parser for this year
SetDatesFromLogbookEntries(expedition)
if len(logentries) >0:
@ -634,11 +651,9 @@ def LoadLogbookForExpedition(expedition, expect):
else:
print(" ! NO TRIP entries found in logbook, check the syntax.")
if logbook_cached: # working on this bit...
i=0
for entrytuple in range(len(logentries)):
date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, entry_type, tripid1 = logentries[i]
#print(" - entry tuple " , i, " tid", tripid1)
for entrytuple in logentries:
date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, entry_type, tripid1 = entrytuple
EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, 0,
entry_type, tripid1)
EnterLogIntoObjStore(expedition.year, date, tripcave, triptitle, text, trippeople, logtime_underground,
@ -672,7 +687,7 @@ def LoadLogbooks():
"2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 40, "2002": 31,
"2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41,
"1994": 32, "1993": 41, "1992": 61, "1991": 38, "1990": 87, "1989": 1,"1988": 1,"1987": 1,
"1985": 22,"1984": 32,"1983": 52,"1982": 42,}
"1985": 24,"1984": 32,"1983": 52,"1982": 42,}
# Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, or need hand-editing.
try:
os.remove("loadlogbk.log")