restored logbook cacheing

This commit is contained in:
Philip Sargent 2022-03-23 22:55:59 +00:00
parent 4c7deadb9a
commit 9ccf5912d4

View File

@ -109,6 +109,9 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, entry_type="wiki", tid=None):
""" saves a logbook entry and related persontrips
Does NOT save the expeditionday_id - all NULLs. why? Because we are deprecating expeditionday !
troggle.log shows that we are creating lots of duplicates, which is no no problem with SQL as they just overwrite
but we are saving the same thing too many times.. Also seen in teh ObjStore mimic
"""
try:
trippersons, author = GetTripPersons(trippeople, expedition, logtime_underground, tid=tid)
@ -153,12 +156,14 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
slug = str(randint(1000,10000)) + "_" + slugify(title)[:10].replace('-','_')
nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave_slug':str(cave), 'slug': slug, 'entry_type':entry_type}
# This cretes the lbo instance of LogbookEntry
lbo, created=save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs)
for tripperson, time_underground in trippersons:
lookupAttribs={'personexpedition':tripperson, 'logbook_entry':lbo}
nonLookupAttribs={'time_underground':time_underground, 'is_logbook_entry_author':(tripperson == author)}
# this creates the PersonTrip instance.
save_carefully(PersonTrip, lookupAttribs, nonLookupAttribs) # PersonTrip also saved in SetDatesFromLogbookEntries
def ParseDate(tripdate, year):
@ -243,19 +248,25 @@ def Parselogwikitxt(year, expedition, txt):
def EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, tu, formattype, tripid1, seq, tid=None):
'''Called once for each logbook entry as the logbook is parsed
'''
# This will need additional functions to replicate the persontrip calculation and storage. For the
# moment we leave all that to be done in the django db
global trips # should be a singleton TROG eventually
global logdataissues
if tid in trips:
tyear, tdate, *trest = trips[tid]
msg = f" ! DUPLICATE on {tdate} id: '{tid}'"
msg = f" ! DUPLICATE tid: '{tid}' on date:{tdate} "
print(msg)
DataIssue.objects.create(parser='logbooks', message=msg)
tid = set_trip_id(str(date),seq)
#print(" - De-dup ",seq, tid)
logdataissues[tid]=msg
if not tid:
tid = set_trip_id(str(date),seq)
trips[tid] = (year, date, tripcave, triptitle, text, trippeople, tu, formattype)
## copy a lot of checking functionality here from EnterLogIntoDbase()
@ -267,7 +278,9 @@ def EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, tu,
# message = " ! - Skipping logentry: %s - no author for entry in year '%s'" % (title, expedition.year)
# DataIssue.objects.create(parser='logbooks', message=message)
# logdataissues[tid+"author"]=message
pass
return
# 2002, 2004, 2005, 2007, 2010 - now
# 2006 wiki text is incomplete, but the html all there. So using this parser now.
@ -280,6 +293,7 @@ def Parseloghtmltxt(year, expedition, txt):
for trippara in tripparas:
logbook_entry_count += 1
tid = set_trip_id(year,logbook_entry_count)
print(f' - new tid:{tid} lbe count: {logbook_entry_count}')
s = re.match(r'''(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)? # second date
\s*(?:<a\s+id="(.*?)"\s*/>\s*</a>)?
@ -517,7 +531,7 @@ def SetDatesFromLogbookEntries(expedition):
lprevpersontrip.save()
persontrip.persontrip_next = None
lprevpersontrip = persontrip
persontrip.save() # also saved in EnterLogIntoDbase. MERGE these to speed up import.
#persontrip.save() # also saved in EnterLogIntoDbase. MERGE these to speed up import.
def LoadLogbookForExpedition(expedition, expect):
@ -579,51 +593,54 @@ def LoadLogbookForExpedition(expedition, expect):
print(" - Cache file does not exist \"" + str(cache_filename) +"\"")
expedition.save()
now = time.time()
bad_cache = True # emporarily disable reading the cache - buggy
try:
cache_t = os.path.getmtime(cache_filename)
if os.path.getmtime(logbookfile) - cache_t > 2: # at least 2 secs later
bad_cache= True
if now - cache_t > 30*24*60*60:
bad_cache= True
if bad_cache:
print(" - ! Cache is either stale or more than 30 days old. Deleting it.")
os.remove(cache_filename)
logentries=[]
print(" ! Removed stale or corrupt cache file")
raise
print(" - Reading cache: " + str(cache_filename), end='')
logbook_cached = False
if True: # enable cache system
now = time.time()
bad_cache = False # temporarily disable reading the cache - buggy
try:
with open(cache_filename, "rb") as f:
year,n,logentries = pickle.load(f)
if validcache(year,n):
print(" -- Loaded ", len(logentries), " log entries")
logbook_cached = True
else:
print(" !- Told to expect ", expect, " but ", len(logentries), " found in cache")
cache_t = os.path.getmtime(cache_filename)
if os.path.getmtime(logbookfile) - cache_t > 2: # at least 2 secs later
print(" - ! Cache is older than the logbook file")
bad_cache= True
if now - cache_t > 30*24*60*60:
print(" - ! Cache is > 30 days old")
bad_cache= True
if bad_cache:
print(" - ! Cache is either stale or more than 30 days old. Deleting it.")
os.remove(cache_filename)
logentries=[]
print(" ! Removed stale or corrupt cache file")
raise
except:
print(" ! Failed to load corrupt cache. (Or I was told to ignore it). Deleting it.")
os.remove(cache_filename)
logentries=[]
raise
except :
print(" - Cache de-pickle failure \"" + str(cache_filename) +"\"")
try:
file_in = open(logbookfile,'rb')
txt = file_in.read().decode("latin1")
file_in.close()
logbook_parseable = True
print((" - Using: " + parsefunc + " to parse " + logbookfile))
except (IOError):
logbook_parseable = False
print((" ! Couldn't open logbook " + logbookfile))
# print(" - Reading cache: " + str(cache_filename), end='')
try:
with open(cache_filename, "rb") as f:
year,n,logentries = pickle.load(f)
if validcache(year,n):
print(" -- Loaded ", len(logentries), " log entries")
logbook_cached = True
else:
print(" !- Told to expect ", expect, " but ", len(logentries), " found in cache")
raise
except:
print(" ! Failed to load corrupt cache. (Or I was told to ignore it). Deleting it.")
os.remove(cache_filename)
logentries=[]
raise
except :
print(" - Cache de-pickle failure \"" + str(cache_filename) +"\"")
try:
file_in = open(logbookfile,'rb')
txt = file_in.read().decode("latin1")
file_in.close()
logbook_parseable = True
except (IOError):
logbook_parseable = False
print((" ! Couldn't open logbook " + logbookfile))
if logbook_parseable:
parser = globals()[parsefunc]
parser(expedition.year, expedition, txt) # this launches the parser
print(f' - Using parser {parsefunc}')
parser(expedition.year, expedition, txt) # this launches the right parser for this year
SetDatesFromLogbookEntries(expedition)
if len(logentries) >0:
@ -634,17 +651,15 @@ def LoadLogbookForExpedition(expedition, expect):
else:
print(" ! NO TRIP entries found in logbook, check the syntax.")
if logbook_cached: # working on this bit...
i=0
for entrytuple in range(len(logentries)):
date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, entry_type, tripid1 = logentries[i]
#print(" - entry tuple " , i, " tid", tripid1)
EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, 0,
entry_type, tripid1)
EnterLogIntoObjStore(expedition.year, date, tripcave, triptitle, text, trippeople, logtime_underground,
entry_type, tripid1, i)
i +=1
SetDatesFromLogbookEntries(expedition)
i=0
for entrytuple in logentries:
date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, entry_type, tripid1 = entrytuple
EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, 0,
entry_type, tripid1)
EnterLogIntoObjStore(expedition.year, date, tripcave, triptitle, text, trippeople, logtime_underground,
entry_type, tripid1, i)
i +=1
SetDatesFromLogbookEntries(expedition)
return len(logentries)
def LoadLogbooks():
@ -672,7 +687,7 @@ def LoadLogbooks():
"2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 40, "2002": 31,
"2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41,
"1994": 32, "1993": 41, "1992": 61, "1991": 38, "1990": 87, "1989": 1,"1988": 1,"1987": 1,
"1985": 22,"1984": 32,"1983": 52,"1982": 42,}
"1985": 24,"1984": 32,"1983": 52,"1982": 42,}
# Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, or need hand-editing.
try:
os.remove("loadlogbk.log")