2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2025-12-14 21:47:12 +00:00

restored logbook cacheing

This commit is contained in:
Philip Sargent
2022-03-23 22:55:59 +00:00
parent 4c7deadb9a
commit 9ccf5912d4

View File

@@ -109,6 +109,9 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, entry_type="wiki", tid=None): def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, entry_type="wiki", tid=None):
""" saves a logbook entry and related persontrips """ saves a logbook entry and related persontrips
Does NOT save the expeditionday_id - all NULLs. why? Because we are deprecating expeditionday ! Does NOT save the expeditionday_id - all NULLs. why? Because we are deprecating expeditionday !
troggle.log shows that we are creating lots of duplicates, which is no no problem with SQL as they just overwrite
but we are saving the same thing too many times.. Also seen in teh ObjStore mimic
""" """
try: try:
trippersons, author = GetTripPersons(trippeople, expedition, logtime_underground, tid=tid) trippersons, author = GetTripPersons(trippeople, expedition, logtime_underground, tid=tid)
@@ -153,12 +156,14 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
slug = str(randint(1000,10000)) + "_" + slugify(title)[:10].replace('-','_') slug = str(randint(1000,10000)) + "_" + slugify(title)[:10].replace('-','_')
nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave_slug':str(cave), 'slug': slug, 'entry_type':entry_type} nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave_slug':str(cave), 'slug': slug, 'entry_type':entry_type}
# This cretes the lbo instance of LogbookEntry
lbo, created=save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs) lbo, created=save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs)
for tripperson, time_underground in trippersons: for tripperson, time_underground in trippersons:
lookupAttribs={'personexpedition':tripperson, 'logbook_entry':lbo} lookupAttribs={'personexpedition':tripperson, 'logbook_entry':lbo}
nonLookupAttribs={'time_underground':time_underground, 'is_logbook_entry_author':(tripperson == author)} nonLookupAttribs={'time_underground':time_underground, 'is_logbook_entry_author':(tripperson == author)}
# this creates the PersonTrip instance.
save_carefully(PersonTrip, lookupAttribs, nonLookupAttribs) # PersonTrip also saved in SetDatesFromLogbookEntries save_carefully(PersonTrip, lookupAttribs, nonLookupAttribs) # PersonTrip also saved in SetDatesFromLogbookEntries
def ParseDate(tripdate, year): def ParseDate(tripdate, year):
@@ -243,19 +248,25 @@ def Parselogwikitxt(year, expedition, txt):
def EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, tu, formattype, tripid1, seq, tid=None): def EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, tu, formattype, tripid1, seq, tid=None):
'''Called once for each logbook entry as the logbook is parsed
'''
# This will need additional functions to replicate the persontrip calculation and storage. For the # This will need additional functions to replicate the persontrip calculation and storage. For the
# moment we leave all that to be done in the django db # moment we leave all that to be done in the django db
global trips # should be a singleton TROG eventually global trips # should be a singleton TROG eventually
global logdataissues global logdataissues
if tid in trips: if tid in trips:
tyear, tdate, *trest = trips[tid] tyear, tdate, *trest = trips[tid]
msg = f" ! DUPLICATE on {tdate} id: '{tid}'" msg = f" ! DUPLICATE tid: '{tid}' on date:{tdate} "
print(msg) print(msg)
DataIssue.objects.create(parser='logbooks', message=msg) DataIssue.objects.create(parser='logbooks', message=msg)
tid = set_trip_id(str(date),seq) tid = set_trip_id(str(date),seq)
#print(" - De-dup ",seq, tid) #print(" - De-dup ",seq, tid)
logdataissues[tid]=msg logdataissues[tid]=msg
if not tid:
tid = set_trip_id(str(date),seq)
trips[tid] = (year, date, tripcave, triptitle, text, trippeople, tu, formattype) trips[tid] = (year, date, tripcave, triptitle, text, trippeople, tu, formattype)
## copy a lot of checking functionality here from EnterLogIntoDbase() ## copy a lot of checking functionality here from EnterLogIntoDbase()
@@ -267,7 +278,9 @@ def EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, tu,
# message = " ! - Skipping logentry: %s - no author for entry in year '%s'" % (title, expedition.year) # message = " ! - Skipping logentry: %s - no author for entry in year '%s'" % (title, expedition.year)
# DataIssue.objects.create(parser='logbooks', message=message) # DataIssue.objects.create(parser='logbooks', message=message)
# logdataissues[tid+"author"]=message # logdataissues[tid+"author"]=message
pass return
# 2002, 2004, 2005, 2007, 2010 - now # 2002, 2004, 2005, 2007, 2010 - now
# 2006 wiki text is incomplete, but the html all there. So using this parser now. # 2006 wiki text is incomplete, but the html all there. So using this parser now.
@@ -280,6 +293,7 @@ def Parseloghtmltxt(year, expedition, txt):
for trippara in tripparas: for trippara in tripparas:
logbook_entry_count += 1 logbook_entry_count += 1
tid = set_trip_id(year,logbook_entry_count) tid = set_trip_id(year,logbook_entry_count)
print(f' - new tid:{tid} lbe count: {logbook_entry_count}')
s = re.match(r'''(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)? # second date s = re.match(r'''(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)? # second date
\s*(?:<a\s+id="(.*?)"\s*/>\s*</a>)? \s*(?:<a\s+id="(.*?)"\s*/>\s*</a>)?
@@ -517,7 +531,7 @@ def SetDatesFromLogbookEntries(expedition):
lprevpersontrip.save() lprevpersontrip.save()
persontrip.persontrip_next = None persontrip.persontrip_next = None
lprevpersontrip = persontrip lprevpersontrip = persontrip
persontrip.save() # also saved in EnterLogIntoDbase. MERGE these to speed up import. #persontrip.save() # also saved in EnterLogIntoDbase. MERGE these to speed up import.
def LoadLogbookForExpedition(expedition, expect): def LoadLogbookForExpedition(expedition, expect):
@@ -579,51 +593,54 @@ def LoadLogbookForExpedition(expedition, expect):
print(" - Cache file does not exist \"" + str(cache_filename) +"\"") print(" - Cache file does not exist \"" + str(cache_filename) +"\"")
expedition.save() expedition.save()
now = time.time() logbook_cached = False
bad_cache = True # emporarily disable reading the cache - buggy if True: # enable cache system
try: now = time.time()
cache_t = os.path.getmtime(cache_filename) bad_cache = False # temporarily disable reading the cache - buggy
if os.path.getmtime(logbookfile) - cache_t > 2: # at least 2 secs later
bad_cache= True
if now - cache_t > 30*24*60*60:
bad_cache= True
if bad_cache:
print(" - ! Cache is either stale or more than 30 days old. Deleting it.")
os.remove(cache_filename)
logentries=[]
print(" ! Removed stale or corrupt cache file")
raise
print(" - Reading cache: " + str(cache_filename), end='')
try: try:
with open(cache_filename, "rb") as f: cache_t = os.path.getmtime(cache_filename)
year,n,logentries = pickle.load(f) if os.path.getmtime(logbookfile) - cache_t > 2: # at least 2 secs later
if validcache(year,n): print(" - ! Cache is older than the logbook file")
print(" -- Loaded ", len(logentries), " log entries") bad_cache= True
logbook_cached = True if now - cache_t > 30*24*60*60:
else: print(" - ! Cache is > 30 days old")
print(" !- Told to expect ", expect, " but ", len(logentries), " found in cache") bad_cache= True
if bad_cache:
print(" - ! Cache is either stale or more than 30 days old. Deleting it.")
os.remove(cache_filename)
logentries=[]
print(" ! Removed stale or corrupt cache file")
raise raise
except: # print(" - Reading cache: " + str(cache_filename), end='')
print(" ! Failed to load corrupt cache. (Or I was told to ignore it). Deleting it.") try:
os.remove(cache_filename) with open(cache_filename, "rb") as f:
logentries=[] year,n,logentries = pickle.load(f)
raise if validcache(year,n):
except : print(" -- Loaded ", len(logentries), " log entries")
print(" - Cache de-pickle failure \"" + str(cache_filename) +"\"") logbook_cached = True
try: else:
file_in = open(logbookfile,'rb') print(" !- Told to expect ", expect, " but ", len(logentries), " found in cache")
txt = file_in.read().decode("latin1") raise
file_in.close() except:
logbook_parseable = True print(" ! Failed to load corrupt cache. (Or I was told to ignore it). Deleting it.")
print((" - Using: " + parsefunc + " to parse " + logbookfile)) os.remove(cache_filename)
except (IOError): logentries=[]
logbook_parseable = False raise
print((" ! Couldn't open logbook " + logbookfile)) except :
print(" - Cache de-pickle failure \"" + str(cache_filename) +"\"")
try:
file_in = open(logbookfile,'rb')
txt = file_in.read().decode("latin1")
file_in.close()
logbook_parseable = True
except (IOError):
logbook_parseable = False
print((" ! Couldn't open logbook " + logbookfile))
if logbook_parseable: if logbook_parseable:
parser = globals()[parsefunc] parser = globals()[parsefunc]
print(f' - Using parser {parsefunc}')
parser(expedition.year, expedition, txt) # this launches the parser parser(expedition.year, expedition, txt) # this launches the right parser for this year
SetDatesFromLogbookEntries(expedition) SetDatesFromLogbookEntries(expedition)
if len(logentries) >0: if len(logentries) >0:
@@ -634,17 +651,15 @@ def LoadLogbookForExpedition(expedition, expect):
else: else:
print(" ! NO TRIP entries found in logbook, check the syntax.") print(" ! NO TRIP entries found in logbook, check the syntax.")
if logbook_cached: # working on this bit... i=0
i=0 for entrytuple in logentries:
for entrytuple in range(len(logentries)): date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, entry_type, tripid1 = entrytuple
date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, entry_type, tripid1 = logentries[i] EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, 0,
#print(" - entry tuple " , i, " tid", tripid1) entry_type, tripid1)
EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, 0, EnterLogIntoObjStore(expedition.year, date, tripcave, triptitle, text, trippeople, logtime_underground,
entry_type, tripid1) entry_type, tripid1, i)
EnterLogIntoObjStore(expedition.year, date, tripcave, triptitle, text, trippeople, logtime_underground, i +=1
entry_type, tripid1, i) SetDatesFromLogbookEntries(expedition)
i +=1
SetDatesFromLogbookEntries(expedition)
return len(logentries) return len(logentries)
def LoadLogbooks(): def LoadLogbooks():
@@ -672,7 +687,7 @@ def LoadLogbooks():
"2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 40, "2002": 31, "2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 40, "2002": 31,
"2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41, "2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41,
"1994": 32, "1993": 41, "1992": 61, "1991": 38, "1990": 87, "1989": 1,"1988": 1,"1987": 1, "1994": 32, "1993": 41, "1992": 61, "1991": 38, "1990": 87, "1989": 1,"1988": 1,"1987": 1,
"1985": 22,"1984": 32,"1983": 52,"1982": 42,} "1985": 24,"1984": 32,"1983": 52,"1982": 42,}
# Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, or need hand-editing. # Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, or need hand-editing.
try: try:
os.remove("loadlogbk.log") os.remove("loadlogbk.log")