restored logbook cacheing

This commit is contained in:
Philip Sargent 2022-03-23 22:55:59 +00:00
parent 4c7deadb9a
commit 9ccf5912d4

@ -109,6 +109,9 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, entry_type="wiki", tid=None): def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, entry_type="wiki", tid=None):
""" saves a logbook entry and related persontrips """ saves a logbook entry and related persontrips
Does NOT save the expeditionday_id - all NULLs. why? Because we are deprecating expeditionday ! Does NOT save the expeditionday_id - all NULLs. why? Because we are deprecating expeditionday !
troggle.log shows that we are creating lots of duplicates, which is no no problem with SQL as they just overwrite
but we are saving the same thing too many times.. Also seen in teh ObjStore mimic
""" """
try: try:
trippersons, author = GetTripPersons(trippeople, expedition, logtime_underground, tid=tid) trippersons, author = GetTripPersons(trippeople, expedition, logtime_underground, tid=tid)
@ -153,12 +156,14 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
slug = str(randint(1000,10000)) + "_" + slugify(title)[:10].replace('-','_') slug = str(randint(1000,10000)) + "_" + slugify(title)[:10].replace('-','_')
nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave_slug':str(cave), 'slug': slug, 'entry_type':entry_type} nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave_slug':str(cave), 'slug': slug, 'entry_type':entry_type}
# This cretes the lbo instance of LogbookEntry
lbo, created=save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs) lbo, created=save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs)
for tripperson, time_underground in trippersons: for tripperson, time_underground in trippersons:
lookupAttribs={'personexpedition':tripperson, 'logbook_entry':lbo} lookupAttribs={'personexpedition':tripperson, 'logbook_entry':lbo}
nonLookupAttribs={'time_underground':time_underground, 'is_logbook_entry_author':(tripperson == author)} nonLookupAttribs={'time_underground':time_underground, 'is_logbook_entry_author':(tripperson == author)}
# this creates the PersonTrip instance.
save_carefully(PersonTrip, lookupAttribs, nonLookupAttribs) # PersonTrip also saved in SetDatesFromLogbookEntries save_carefully(PersonTrip, lookupAttribs, nonLookupAttribs) # PersonTrip also saved in SetDatesFromLogbookEntries
def ParseDate(tripdate, year): def ParseDate(tripdate, year):
@ -243,19 +248,25 @@ def Parselogwikitxt(year, expedition, txt):
def EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, tu, formattype, tripid1, seq, tid=None): def EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, tu, formattype, tripid1, seq, tid=None):
'''Called once for each logbook entry as the logbook is parsed
'''
# This will need additional functions to replicate the persontrip calculation and storage. For the # This will need additional functions to replicate the persontrip calculation and storage. For the
# moment we leave all that to be done in the django db # moment we leave all that to be done in the django db
global trips # should be a singleton TROG eventually global trips # should be a singleton TROG eventually
global logdataissues global logdataissues
if tid in trips: if tid in trips:
tyear, tdate, *trest = trips[tid] tyear, tdate, *trest = trips[tid]
msg = f" ! DUPLICATE on {tdate} id: '{tid}'" msg = f" ! DUPLICATE tid: '{tid}' on date:{tdate} "
print(msg) print(msg)
DataIssue.objects.create(parser='logbooks', message=msg) DataIssue.objects.create(parser='logbooks', message=msg)
tid = set_trip_id(str(date),seq) tid = set_trip_id(str(date),seq)
#print(" - De-dup ",seq, tid) #print(" - De-dup ",seq, tid)
logdataissues[tid]=msg logdataissues[tid]=msg
if not tid:
tid = set_trip_id(str(date),seq)
trips[tid] = (year, date, tripcave, triptitle, text, trippeople, tu, formattype) trips[tid] = (year, date, tripcave, triptitle, text, trippeople, tu, formattype)
## copy a lot of checking functionality here from EnterLogIntoDbase() ## copy a lot of checking functionality here from EnterLogIntoDbase()
@ -267,7 +278,9 @@ def EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, tu,
# message = " ! - Skipping logentry: %s - no author for entry in year '%s'" % (title, expedition.year) # message = " ! - Skipping logentry: %s - no author for entry in year '%s'" % (title, expedition.year)
# DataIssue.objects.create(parser='logbooks', message=message) # DataIssue.objects.create(parser='logbooks', message=message)
# logdataissues[tid+"author"]=message # logdataissues[tid+"author"]=message
pass return
# 2002, 2004, 2005, 2007, 2010 - now # 2002, 2004, 2005, 2007, 2010 - now
# 2006 wiki text is incomplete, but the html all there. So using this parser now. # 2006 wiki text is incomplete, but the html all there. So using this parser now.
@ -280,6 +293,7 @@ def Parseloghtmltxt(year, expedition, txt):
for trippara in tripparas: for trippara in tripparas:
logbook_entry_count += 1 logbook_entry_count += 1
tid = set_trip_id(year,logbook_entry_count) tid = set_trip_id(year,logbook_entry_count)
print(f' - new tid:{tid} lbe count: {logbook_entry_count}')
s = re.match(r'''(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)? # second date s = re.match(r'''(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)? # second date
\s*(?:<a\s+id="(.*?)"\s*/>\s*</a>)? \s*(?:<a\s+id="(.*?)"\s*/>\s*</a>)?
@ -517,7 +531,7 @@ def SetDatesFromLogbookEntries(expedition):
lprevpersontrip.save() lprevpersontrip.save()
persontrip.persontrip_next = None persontrip.persontrip_next = None
lprevpersontrip = persontrip lprevpersontrip = persontrip
persontrip.save() # also saved in EnterLogIntoDbase. MERGE these to speed up import. #persontrip.save() # also saved in EnterLogIntoDbase. MERGE these to speed up import.
def LoadLogbookForExpedition(expedition, expect): def LoadLogbookForExpedition(expedition, expect):
@ -579,13 +593,17 @@ def LoadLogbookForExpedition(expedition, expect):
print(" - Cache file does not exist \"" + str(cache_filename) +"\"") print(" - Cache file does not exist \"" + str(cache_filename) +"\"")
expedition.save() expedition.save()
logbook_cached = False
if True: # enable cache system
now = time.time() now = time.time()
bad_cache = True # emporarily disable reading the cache - buggy bad_cache = False # temporarily disable reading the cache - buggy
try: try:
cache_t = os.path.getmtime(cache_filename) cache_t = os.path.getmtime(cache_filename)
if os.path.getmtime(logbookfile) - cache_t > 2: # at least 2 secs later if os.path.getmtime(logbookfile) - cache_t > 2: # at least 2 secs later
print(" - ! Cache is older than the logbook file")
bad_cache= True bad_cache= True
if now - cache_t > 30*24*60*60: if now - cache_t > 30*24*60*60:
print(" - ! Cache is > 30 days old")
bad_cache= True bad_cache= True
if bad_cache: if bad_cache:
print(" - ! Cache is either stale or more than 30 days old. Deleting it.") print(" - ! Cache is either stale or more than 30 days old. Deleting it.")
@ -593,7 +611,7 @@ def LoadLogbookForExpedition(expedition, expect):
logentries=[] logentries=[]
print(" ! Removed stale or corrupt cache file") print(" ! Removed stale or corrupt cache file")
raise raise
print(" - Reading cache: " + str(cache_filename), end='') # print(" - Reading cache: " + str(cache_filename), end='')
try: try:
with open(cache_filename, "rb") as f: with open(cache_filename, "rb") as f:
year,n,logentries = pickle.load(f) year,n,logentries = pickle.load(f)
@ -615,15 +633,14 @@ def LoadLogbookForExpedition(expedition, expect):
txt = file_in.read().decode("latin1") txt = file_in.read().decode("latin1")
file_in.close() file_in.close()
logbook_parseable = True logbook_parseable = True
print((" - Using: " + parsefunc + " to parse " + logbookfile))
except (IOError): except (IOError):
logbook_parseable = False logbook_parseable = False
print((" ! Couldn't open logbook " + logbookfile)) print((" ! Couldn't open logbook " + logbookfile))
if logbook_parseable: if logbook_parseable:
parser = globals()[parsefunc] parser = globals()[parsefunc]
print(f' - Using parser {parsefunc}')
parser(expedition.year, expedition, txt) # this launches the parser parser(expedition.year, expedition, txt) # this launches the right parser for this year
SetDatesFromLogbookEntries(expedition) SetDatesFromLogbookEntries(expedition)
if len(logentries) >0: if len(logentries) >0:
@ -634,11 +651,9 @@ def LoadLogbookForExpedition(expedition, expect):
else: else:
print(" ! NO TRIP entries found in logbook, check the syntax.") print(" ! NO TRIP entries found in logbook, check the syntax.")
if logbook_cached: # working on this bit...
i=0 i=0
for entrytuple in range(len(logentries)): for entrytuple in logentries:
date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, entry_type, tripid1 = logentries[i] date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, entry_type, tripid1 = entrytuple
#print(" - entry tuple " , i, " tid", tripid1)
EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, 0, EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, 0,
entry_type, tripid1) entry_type, tripid1)
EnterLogIntoObjStore(expedition.year, date, tripcave, triptitle, text, trippeople, logtime_underground, EnterLogIntoObjStore(expedition.year, date, tripcave, triptitle, text, trippeople, logtime_underground,
@ -672,7 +687,7 @@ def LoadLogbooks():
"2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 40, "2002": 31, "2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 40, "2002": 31,
"2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41, "2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41,
"1994": 32, "1993": 41, "1992": 61, "1991": 38, "1990": 87, "1989": 1,"1988": 1,"1987": 1, "1994": 32, "1993": 41, "1992": 61, "1991": 38, "1990": 87, "1989": 1,"1988": 1,"1987": 1,
"1985": 22,"1984": 32,"1983": 52,"1982": 42,} "1985": 24,"1984": 32,"1983": 52,"1982": 42,}
# Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, or need hand-editing. # Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, or need hand-editing.
try: try:
os.remove("loadlogbk.log") os.remove("loadlogbk.log")