forked from expo/troggle
restored logbook cacheing
This commit is contained in:
parent
4c7deadb9a
commit
9ccf5912d4
@ -109,6 +109,9 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
|
|||||||
def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, entry_type="wiki", tid=None):
|
def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, entry_type="wiki", tid=None):
|
||||||
""" saves a logbook entry and related persontrips
|
""" saves a logbook entry and related persontrips
|
||||||
Does NOT save the expeditionday_id - all NULLs. why? Because we are deprecating expeditionday !
|
Does NOT save the expeditionday_id - all NULLs. why? Because we are deprecating expeditionday !
|
||||||
|
|
||||||
|
troggle.log shows that we are creating lots of duplicates, which is no no problem with SQL as they just overwrite
|
||||||
|
but we are saving the same thing too many times.. Also seen in teh ObjStore mimic
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
trippersons, author = GetTripPersons(trippeople, expedition, logtime_underground, tid=tid)
|
trippersons, author = GetTripPersons(trippeople, expedition, logtime_underground, tid=tid)
|
||||||
@ -153,12 +156,14 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
|
|||||||
slug = str(randint(1000,10000)) + "_" + slugify(title)[:10].replace('-','_')
|
slug = str(randint(1000,10000)) + "_" + slugify(title)[:10].replace('-','_')
|
||||||
nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave_slug':str(cave), 'slug': slug, 'entry_type':entry_type}
|
nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave_slug':str(cave), 'slug': slug, 'entry_type':entry_type}
|
||||||
|
|
||||||
|
# This cretes the lbo instance of LogbookEntry
|
||||||
lbo, created=save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs)
|
lbo, created=save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs)
|
||||||
|
|
||||||
|
|
||||||
for tripperson, time_underground in trippersons:
|
for tripperson, time_underground in trippersons:
|
||||||
lookupAttribs={'personexpedition':tripperson, 'logbook_entry':lbo}
|
lookupAttribs={'personexpedition':tripperson, 'logbook_entry':lbo}
|
||||||
nonLookupAttribs={'time_underground':time_underground, 'is_logbook_entry_author':(tripperson == author)}
|
nonLookupAttribs={'time_underground':time_underground, 'is_logbook_entry_author':(tripperson == author)}
|
||||||
|
# this creates the PersonTrip instance.
|
||||||
save_carefully(PersonTrip, lookupAttribs, nonLookupAttribs) # PersonTrip also saved in SetDatesFromLogbookEntries
|
save_carefully(PersonTrip, lookupAttribs, nonLookupAttribs) # PersonTrip also saved in SetDatesFromLogbookEntries
|
||||||
|
|
||||||
def ParseDate(tripdate, year):
|
def ParseDate(tripdate, year):
|
||||||
@ -243,19 +248,25 @@ def Parselogwikitxt(year, expedition, txt):
|
|||||||
|
|
||||||
|
|
||||||
def EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, tu, formattype, tripid1, seq, tid=None):
|
def EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, tu, formattype, tripid1, seq, tid=None):
|
||||||
|
'''Called once for each logbook entry as the logbook is parsed
|
||||||
|
'''
|
||||||
# This will need additional functions to replicate the persontrip calculation and storage. For the
|
# This will need additional functions to replicate the persontrip calculation and storage. For the
|
||||||
# moment we leave all that to be done in the django db
|
# moment we leave all that to be done in the django db
|
||||||
global trips # should be a singleton TROG eventually
|
global trips # should be a singleton TROG eventually
|
||||||
global logdataissues
|
global logdataissues
|
||||||
|
|
||||||
|
|
||||||
if tid in trips:
|
if tid in trips:
|
||||||
tyear, tdate, *trest = trips[tid]
|
tyear, tdate, *trest = trips[tid]
|
||||||
msg = f" ! DUPLICATE on {tdate} id: '{tid}'"
|
msg = f" ! DUPLICATE tid: '{tid}' on date:{tdate} "
|
||||||
print(msg)
|
print(msg)
|
||||||
DataIssue.objects.create(parser='logbooks', message=msg)
|
DataIssue.objects.create(parser='logbooks', message=msg)
|
||||||
tid = set_trip_id(str(date),seq)
|
tid = set_trip_id(str(date),seq)
|
||||||
#print(" - De-dup ",seq, tid)
|
#print(" - De-dup ",seq, tid)
|
||||||
logdataissues[tid]=msg
|
logdataissues[tid]=msg
|
||||||
|
|
||||||
|
if not tid:
|
||||||
|
tid = set_trip_id(str(date),seq)
|
||||||
trips[tid] = (year, date, tripcave, triptitle, text, trippeople, tu, formattype)
|
trips[tid] = (year, date, tripcave, triptitle, text, trippeople, tu, formattype)
|
||||||
|
|
||||||
## copy a lot of checking functionality here from EnterLogIntoDbase()
|
## copy a lot of checking functionality here from EnterLogIntoDbase()
|
||||||
@ -267,7 +278,9 @@ def EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, tu,
|
|||||||
# message = " ! - Skipping logentry: %s - no author for entry in year '%s'" % (title, expedition.year)
|
# message = " ! - Skipping logentry: %s - no author for entry in year '%s'" % (title, expedition.year)
|
||||||
# DataIssue.objects.create(parser='logbooks', message=message)
|
# DataIssue.objects.create(parser='logbooks', message=message)
|
||||||
# logdataissues[tid+"author"]=message
|
# logdataissues[tid+"author"]=message
|
||||||
pass
|
return
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# 2002, 2004, 2005, 2007, 2010 - now
|
# 2002, 2004, 2005, 2007, 2010 - now
|
||||||
# 2006 wiki text is incomplete, but the html all there. So using this parser now.
|
# 2006 wiki text is incomplete, but the html all there. So using this parser now.
|
||||||
@ -280,6 +293,7 @@ def Parseloghtmltxt(year, expedition, txt):
|
|||||||
for trippara in tripparas:
|
for trippara in tripparas:
|
||||||
logbook_entry_count += 1
|
logbook_entry_count += 1
|
||||||
tid = set_trip_id(year,logbook_entry_count)
|
tid = set_trip_id(year,logbook_entry_count)
|
||||||
|
print(f' - new tid:{tid} lbe count: {logbook_entry_count}')
|
||||||
|
|
||||||
s = re.match(r'''(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)? # second date
|
s = re.match(r'''(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)? # second date
|
||||||
\s*(?:<a\s+id="(.*?)"\s*/>\s*</a>)?
|
\s*(?:<a\s+id="(.*?)"\s*/>\s*</a>)?
|
||||||
@ -517,7 +531,7 @@ def SetDatesFromLogbookEntries(expedition):
|
|||||||
lprevpersontrip.save()
|
lprevpersontrip.save()
|
||||||
persontrip.persontrip_next = None
|
persontrip.persontrip_next = None
|
||||||
lprevpersontrip = persontrip
|
lprevpersontrip = persontrip
|
||||||
persontrip.save() # also saved in EnterLogIntoDbase. MERGE these to speed up import.
|
#persontrip.save() # also saved in EnterLogIntoDbase. MERGE these to speed up import.
|
||||||
|
|
||||||
|
|
||||||
def LoadLogbookForExpedition(expedition, expect):
|
def LoadLogbookForExpedition(expedition, expect):
|
||||||
@ -579,13 +593,17 @@ def LoadLogbookForExpedition(expedition, expect):
|
|||||||
print(" - Cache file does not exist \"" + str(cache_filename) +"\"")
|
print(" - Cache file does not exist \"" + str(cache_filename) +"\"")
|
||||||
|
|
||||||
expedition.save()
|
expedition.save()
|
||||||
|
logbook_cached = False
|
||||||
|
if True: # enable cache system
|
||||||
now = time.time()
|
now = time.time()
|
||||||
bad_cache = True # emporarily disable reading the cache - buggy
|
bad_cache = False # temporarily disable reading the cache - buggy
|
||||||
try:
|
try:
|
||||||
cache_t = os.path.getmtime(cache_filename)
|
cache_t = os.path.getmtime(cache_filename)
|
||||||
if os.path.getmtime(logbookfile) - cache_t > 2: # at least 2 secs later
|
if os.path.getmtime(logbookfile) - cache_t > 2: # at least 2 secs later
|
||||||
|
print(" - ! Cache is older than the logbook file")
|
||||||
bad_cache= True
|
bad_cache= True
|
||||||
if now - cache_t > 30*24*60*60:
|
if now - cache_t > 30*24*60*60:
|
||||||
|
print(" - ! Cache is > 30 days old")
|
||||||
bad_cache= True
|
bad_cache= True
|
||||||
if bad_cache:
|
if bad_cache:
|
||||||
print(" - ! Cache is either stale or more than 30 days old. Deleting it.")
|
print(" - ! Cache is either stale or more than 30 days old. Deleting it.")
|
||||||
@ -593,7 +611,7 @@ def LoadLogbookForExpedition(expedition, expect):
|
|||||||
logentries=[]
|
logentries=[]
|
||||||
print(" ! Removed stale or corrupt cache file")
|
print(" ! Removed stale or corrupt cache file")
|
||||||
raise
|
raise
|
||||||
print(" - Reading cache: " + str(cache_filename), end='')
|
# print(" - Reading cache: " + str(cache_filename), end='')
|
||||||
try:
|
try:
|
||||||
with open(cache_filename, "rb") as f:
|
with open(cache_filename, "rb") as f:
|
||||||
year,n,logentries = pickle.load(f)
|
year,n,logentries = pickle.load(f)
|
||||||
@ -615,15 +633,14 @@ def LoadLogbookForExpedition(expedition, expect):
|
|||||||
txt = file_in.read().decode("latin1")
|
txt = file_in.read().decode("latin1")
|
||||||
file_in.close()
|
file_in.close()
|
||||||
logbook_parseable = True
|
logbook_parseable = True
|
||||||
print((" - Using: " + parsefunc + " to parse " + logbookfile))
|
|
||||||
except (IOError):
|
except (IOError):
|
||||||
logbook_parseable = False
|
logbook_parseable = False
|
||||||
print((" ! Couldn't open logbook " + logbookfile))
|
print((" ! Couldn't open logbook " + logbookfile))
|
||||||
|
|
||||||
if logbook_parseable:
|
if logbook_parseable:
|
||||||
parser = globals()[parsefunc]
|
parser = globals()[parsefunc]
|
||||||
|
print(f' - Using parser {parsefunc}')
|
||||||
parser(expedition.year, expedition, txt) # this launches the parser
|
parser(expedition.year, expedition, txt) # this launches the right parser for this year
|
||||||
|
|
||||||
SetDatesFromLogbookEntries(expedition)
|
SetDatesFromLogbookEntries(expedition)
|
||||||
if len(logentries) >0:
|
if len(logentries) >0:
|
||||||
@ -634,11 +651,9 @@ def LoadLogbookForExpedition(expedition, expect):
|
|||||||
else:
|
else:
|
||||||
print(" ! NO TRIP entries found in logbook, check the syntax.")
|
print(" ! NO TRIP entries found in logbook, check the syntax.")
|
||||||
|
|
||||||
if logbook_cached: # working on this bit...
|
|
||||||
i=0
|
i=0
|
||||||
for entrytuple in range(len(logentries)):
|
for entrytuple in logentries:
|
||||||
date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, entry_type, tripid1 = logentries[i]
|
date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, entry_type, tripid1 = entrytuple
|
||||||
#print(" - entry tuple " , i, " tid", tripid1)
|
|
||||||
EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, 0,
|
EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, 0,
|
||||||
entry_type, tripid1)
|
entry_type, tripid1)
|
||||||
EnterLogIntoObjStore(expedition.year, date, tripcave, triptitle, text, trippeople, logtime_underground,
|
EnterLogIntoObjStore(expedition.year, date, tripcave, triptitle, text, trippeople, logtime_underground,
|
||||||
@ -672,7 +687,7 @@ def LoadLogbooks():
|
|||||||
"2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 40, "2002": 31,
|
"2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 40, "2002": 31,
|
||||||
"2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41,
|
"2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41,
|
||||||
"1994": 32, "1993": 41, "1992": 61, "1991": 38, "1990": 87, "1989": 1,"1988": 1,"1987": 1,
|
"1994": 32, "1993": 41, "1992": 61, "1991": 38, "1990": 87, "1989": 1,"1988": 1,"1987": 1,
|
||||||
"1985": 22,"1984": 32,"1983": 52,"1982": 42,}
|
"1985": 24,"1984": 32,"1983": 52,"1982": 42,}
|
||||||
# Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, or need hand-editing.
|
# Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, or need hand-editing.
|
||||||
try:
|
try:
|
||||||
os.remove("loadlogbk.log")
|
os.remove("loadlogbk.log")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user