forked from expo/troggle
restored logbook cacheing
This commit is contained in:
parent
4c7deadb9a
commit
9ccf5912d4
@ -109,6 +109,9 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
|
||||
def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, entry_type="wiki", tid=None):
|
||||
""" saves a logbook entry and related persontrips
|
||||
Does NOT save the expeditionday_id - all NULLs. why? Because we are deprecating expeditionday !
|
||||
|
||||
troggle.log shows that we are creating lots of duplicates, which is no no problem with SQL as they just overwrite
|
||||
but we are saving the same thing too many times.. Also seen in teh ObjStore mimic
|
||||
"""
|
||||
try:
|
||||
trippersons, author = GetTripPersons(trippeople, expedition, logtime_underground, tid=tid)
|
||||
@ -153,12 +156,14 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
|
||||
slug = str(randint(1000,10000)) + "_" + slugify(title)[:10].replace('-','_')
|
||||
nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave_slug':str(cave), 'slug': slug, 'entry_type':entry_type}
|
||||
|
||||
# This cretes the lbo instance of LogbookEntry
|
||||
lbo, created=save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs)
|
||||
|
||||
|
||||
for tripperson, time_underground in trippersons:
|
||||
lookupAttribs={'personexpedition':tripperson, 'logbook_entry':lbo}
|
||||
nonLookupAttribs={'time_underground':time_underground, 'is_logbook_entry_author':(tripperson == author)}
|
||||
# this creates the PersonTrip instance.
|
||||
save_carefully(PersonTrip, lookupAttribs, nonLookupAttribs) # PersonTrip also saved in SetDatesFromLogbookEntries
|
||||
|
||||
def ParseDate(tripdate, year):
|
||||
@ -243,19 +248,25 @@ def Parselogwikitxt(year, expedition, txt):
|
||||
|
||||
|
||||
def EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, tu, formattype, tripid1, seq, tid=None):
|
||||
'''Called once for each logbook entry as the logbook is parsed
|
||||
'''
|
||||
# This will need additional functions to replicate the persontrip calculation and storage. For the
|
||||
# moment we leave all that to be done in the django db
|
||||
global trips # should be a singleton TROG eventually
|
||||
global logdataissues
|
||||
|
||||
|
||||
if tid in trips:
|
||||
tyear, tdate, *trest = trips[tid]
|
||||
msg = f" ! DUPLICATE on {tdate} id: '{tid}'"
|
||||
msg = f" ! DUPLICATE tid: '{tid}' on date:{tdate} "
|
||||
print(msg)
|
||||
DataIssue.objects.create(parser='logbooks', message=msg)
|
||||
tid = set_trip_id(str(date),seq)
|
||||
#print(" - De-dup ",seq, tid)
|
||||
logdataissues[tid]=msg
|
||||
|
||||
if not tid:
|
||||
tid = set_trip_id(str(date),seq)
|
||||
trips[tid] = (year, date, tripcave, triptitle, text, trippeople, tu, formattype)
|
||||
|
||||
## copy a lot of checking functionality here from EnterLogIntoDbase()
|
||||
@ -267,7 +278,9 @@ def EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, tu,
|
||||
# message = " ! - Skipping logentry: %s - no author for entry in year '%s'" % (title, expedition.year)
|
||||
# DataIssue.objects.create(parser='logbooks', message=message)
|
||||
# logdataissues[tid+"author"]=message
|
||||
pass
|
||||
return
|
||||
|
||||
|
||||
|
||||
# 2002, 2004, 2005, 2007, 2010 - now
|
||||
# 2006 wiki text is incomplete, but the html all there. So using this parser now.
|
||||
@ -280,6 +293,7 @@ def Parseloghtmltxt(year, expedition, txt):
|
||||
for trippara in tripparas:
|
||||
logbook_entry_count += 1
|
||||
tid = set_trip_id(year,logbook_entry_count)
|
||||
print(f' - new tid:{tid} lbe count: {logbook_entry_count}')
|
||||
|
||||
s = re.match(r'''(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)? # second date
|
||||
\s*(?:<a\s+id="(.*?)"\s*/>\s*</a>)?
|
||||
@ -517,7 +531,7 @@ def SetDatesFromLogbookEntries(expedition):
|
||||
lprevpersontrip.save()
|
||||
persontrip.persontrip_next = None
|
||||
lprevpersontrip = persontrip
|
||||
persontrip.save() # also saved in EnterLogIntoDbase. MERGE these to speed up import.
|
||||
#persontrip.save() # also saved in EnterLogIntoDbase. MERGE these to speed up import.
|
||||
|
||||
|
||||
def LoadLogbookForExpedition(expedition, expect):
|
||||
@ -579,51 +593,54 @@ def LoadLogbookForExpedition(expedition, expect):
|
||||
print(" - Cache file does not exist \"" + str(cache_filename) +"\"")
|
||||
|
||||
expedition.save()
|
||||
now = time.time()
|
||||
bad_cache = True # emporarily disable reading the cache - buggy
|
||||
try:
|
||||
cache_t = os.path.getmtime(cache_filename)
|
||||
if os.path.getmtime(logbookfile) - cache_t > 2: # at least 2 secs later
|
||||
bad_cache= True
|
||||
if now - cache_t > 30*24*60*60:
|
||||
bad_cache= True
|
||||
if bad_cache:
|
||||
print(" - ! Cache is either stale or more than 30 days old. Deleting it.")
|
||||
os.remove(cache_filename)
|
||||
logentries=[]
|
||||
print(" ! Removed stale or corrupt cache file")
|
||||
raise
|
||||
print(" - Reading cache: " + str(cache_filename), end='')
|
||||
logbook_cached = False
|
||||
if True: # enable cache system
|
||||
now = time.time()
|
||||
bad_cache = False # temporarily disable reading the cache - buggy
|
||||
try:
|
||||
with open(cache_filename, "rb") as f:
|
||||
year,n,logentries = pickle.load(f)
|
||||
if validcache(year,n):
|
||||
print(" -- Loaded ", len(logentries), " log entries")
|
||||
logbook_cached = True
|
||||
else:
|
||||
print(" !- Told to expect ", expect, " but ", len(logentries), " found in cache")
|
||||
cache_t = os.path.getmtime(cache_filename)
|
||||
if os.path.getmtime(logbookfile) - cache_t > 2: # at least 2 secs later
|
||||
print(" - ! Cache is older than the logbook file")
|
||||
bad_cache= True
|
||||
if now - cache_t > 30*24*60*60:
|
||||
print(" - ! Cache is > 30 days old")
|
||||
bad_cache= True
|
||||
if bad_cache:
|
||||
print(" - ! Cache is either stale or more than 30 days old. Deleting it.")
|
||||
os.remove(cache_filename)
|
||||
logentries=[]
|
||||
print(" ! Removed stale or corrupt cache file")
|
||||
raise
|
||||
except:
|
||||
print(" ! Failed to load corrupt cache. (Or I was told to ignore it). Deleting it.")
|
||||
os.remove(cache_filename)
|
||||
logentries=[]
|
||||
raise
|
||||
except :
|
||||
print(" - Cache de-pickle failure \"" + str(cache_filename) +"\"")
|
||||
try:
|
||||
file_in = open(logbookfile,'rb')
|
||||
txt = file_in.read().decode("latin1")
|
||||
file_in.close()
|
||||
logbook_parseable = True
|
||||
print((" - Using: " + parsefunc + " to parse " + logbookfile))
|
||||
except (IOError):
|
||||
logbook_parseable = False
|
||||
print((" ! Couldn't open logbook " + logbookfile))
|
||||
# print(" - Reading cache: " + str(cache_filename), end='')
|
||||
try:
|
||||
with open(cache_filename, "rb") as f:
|
||||
year,n,logentries = pickle.load(f)
|
||||
if validcache(year,n):
|
||||
print(" -- Loaded ", len(logentries), " log entries")
|
||||
logbook_cached = True
|
||||
else:
|
||||
print(" !- Told to expect ", expect, " but ", len(logentries), " found in cache")
|
||||
raise
|
||||
except:
|
||||
print(" ! Failed to load corrupt cache. (Or I was told to ignore it). Deleting it.")
|
||||
os.remove(cache_filename)
|
||||
logentries=[]
|
||||
raise
|
||||
except :
|
||||
print(" - Cache de-pickle failure \"" + str(cache_filename) +"\"")
|
||||
try:
|
||||
file_in = open(logbookfile,'rb')
|
||||
txt = file_in.read().decode("latin1")
|
||||
file_in.close()
|
||||
logbook_parseable = True
|
||||
except (IOError):
|
||||
logbook_parseable = False
|
||||
print((" ! Couldn't open logbook " + logbookfile))
|
||||
|
||||
if logbook_parseable:
|
||||
parser = globals()[parsefunc]
|
||||
|
||||
parser(expedition.year, expedition, txt) # this launches the parser
|
||||
print(f' - Using parser {parsefunc}')
|
||||
parser(expedition.year, expedition, txt) # this launches the right parser for this year
|
||||
|
||||
SetDatesFromLogbookEntries(expedition)
|
||||
if len(logentries) >0:
|
||||
@ -634,17 +651,15 @@ def LoadLogbookForExpedition(expedition, expect):
|
||||
else:
|
||||
print(" ! NO TRIP entries found in logbook, check the syntax.")
|
||||
|
||||
if logbook_cached: # working on this bit...
|
||||
i=0
|
||||
for entrytuple in range(len(logentries)):
|
||||
date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, entry_type, tripid1 = logentries[i]
|
||||
#print(" - entry tuple " , i, " tid", tripid1)
|
||||
EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, 0,
|
||||
entry_type, tripid1)
|
||||
EnterLogIntoObjStore(expedition.year, date, tripcave, triptitle, text, trippeople, logtime_underground,
|
||||
entry_type, tripid1, i)
|
||||
i +=1
|
||||
SetDatesFromLogbookEntries(expedition)
|
||||
i=0
|
||||
for entrytuple in logentries:
|
||||
date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, entry_type, tripid1 = entrytuple
|
||||
EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, 0,
|
||||
entry_type, tripid1)
|
||||
EnterLogIntoObjStore(expedition.year, date, tripcave, triptitle, text, trippeople, logtime_underground,
|
||||
entry_type, tripid1, i)
|
||||
i +=1
|
||||
SetDatesFromLogbookEntries(expedition)
|
||||
return len(logentries)
|
||||
|
||||
def LoadLogbooks():
|
||||
@ -672,7 +687,7 @@ def LoadLogbooks():
|
||||
"2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 40, "2002": 31,
|
||||
"2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41,
|
||||
"1994": 32, "1993": 41, "1992": 61, "1991": 38, "1990": 87, "1989": 1,"1988": 1,"1987": 1,
|
||||
"1985": 22,"1984": 32,"1983": 52,"1982": 42,}
|
||||
"1985": 24,"1984": 32,"1983": 52,"1982": 42,}
|
||||
# Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, or need hand-editing.
|
||||
try:
|
||||
os.remove("loadlogbk.log")
|
||||
|
Loading…
Reference in New Issue
Block a user