forked from expo/troggle
cache tidy and move settings into parser
This commit is contained in:
parent
bcb61f9cd9
commit
a795707552
@ -585,7 +585,7 @@ class QM(TroggleModel):
|
|||||||
|
|
||||||
class PersonTrip(TroggleModel):
|
class PersonTrip(TroggleModel):
|
||||||
"""Single Person going on a trip, which may or may not be written up.
|
"""Single Person going on a trip, which may or may not be written up.
|
||||||
It accounts for different T/U for people in same logbook entry.
|
It could account for different T/U for people in same logbook entry.
|
||||||
"""
|
"""
|
||||||
personexpedition = models.ForeignKey("PersonExpedition",null=True,on_delete=models.CASCADE)
|
personexpedition = models.ForeignKey("PersonExpedition",null=True,on_delete=models.CASCADE)
|
||||||
time_underground = models.FloatField(help_text="In decimal hours")
|
time_underground = models.FloatField(help_text="In decimal hours")
|
||||||
|
@ -215,6 +215,7 @@ def logbookentry(request, date, slug):
|
|||||||
|
|
||||||
this_logbookentry=this_logbookentry[0]
|
this_logbookentry=this_logbookentry[0]
|
||||||
# This is the only page that uses presontrip_next and persontrip_prev
|
# This is the only page that uses presontrip_next and persontrip_prev
|
||||||
|
# and it is calculated on the fly in the model
|
||||||
return render(request, 'logbookentry.html',
|
return render(request, 'logbookentry.html',
|
||||||
{'logbookentry': this_logbookentry, 'trips': trips, 'svxothers': svxothers, 'wallets': wallets})
|
{'logbookentry': this_logbookentry, 'trips': trips, 'svxothers': svxothers, 'wallets': wallets})
|
||||||
else:
|
else:
|
||||||
|
@ -45,12 +45,43 @@ todo='''
|
|||||||
- this is a slow and uncertain function: cave = getCaveByReference(caveRef)
|
- this is a slow and uncertain function: cave = getCaveByReference(caveRef)
|
||||||
|
|
||||||
'''
|
'''
|
||||||
|
MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200
|
||||||
logentries = [] # the entire logbook for one year is a single object: a list of entries
|
DEFAULT_LOGBOOK_PARSER = "Parseloghtmltxt"
|
||||||
noncaveplaces = [ "QMplaceholder", "Journey", "Loser Plateau", "UNKNOWN", 'plateau',
|
DEFAULT_LOGBOOK_FILE = "logbook.html"
|
||||||
'base camp', 'basecamp', 'top camp', 'topcamp' ]
|
# All years since 2010 use the default value for Logbook parser
|
||||||
logdataissues = TROG['issues']['logdataissues']
|
# but several don't work, and are skipped by the parsing code, e.g. 1983
|
||||||
trips ={}
|
LOGBOOK_PARSER_SETTINGS = {
|
||||||
|
"2010": ("logbook.html", "Parseloghtmltxt"),
|
||||||
|
"2009": ("2009logbook.txt", "Parselogwikitxt"),
|
||||||
|
"2008": ("2008logbook.txt", "Parselogwikitxt"),
|
||||||
|
"2007": ("logbook.html", "Parseloghtmltxt"),
|
||||||
|
"2006": ("logbook.html", "Parseloghtmltxt"),
|
||||||
|
# "2006": ("logbook/logbook_06.txt", "Parselogwikitxt"),
|
||||||
|
"2006": ("logbook.html", "Parseloghtmltxt"),
|
||||||
|
"2005": ("logbook.html", "Parseloghtmltxt"),
|
||||||
|
"2004": ("logbook.html", "Parseloghtmltxt"),
|
||||||
|
"2003": ("logbook.html", "Parseloghtml03"),
|
||||||
|
"2002": ("logbook.html", "Parseloghtmltxt"),
|
||||||
|
"2001": ("log.htm", "Parseloghtml01"),
|
||||||
|
"2000": ("log.htm", "Parseloghtml01"),
|
||||||
|
"1999": ("log.htm", "Parseloghtml01"),
|
||||||
|
"1998": ("log.htm", "Parseloghtml01"),
|
||||||
|
"1997": ("log.htm", "Parseloghtml01"),
|
||||||
|
"1996": ("log.htm", "Parseloghtml01"),
|
||||||
|
"1995": ("log.htm", "Parseloghtml01"),
|
||||||
|
"1994": ("log.htm", "Parseloghtml01"),
|
||||||
|
"1993": ("log.htm", "Parseloghtml01"),
|
||||||
|
"1992": ("log.htm", "Parseloghtml01"),
|
||||||
|
"1991": ("log.htm", "Parseloghtml01"),
|
||||||
|
"1990": ("log.htm", "Parseloghtml01"),
|
||||||
|
"1989": ("log.htm", "Parseloghtml01"), #crashes MySQL
|
||||||
|
"1988": ("log.htm", "Parseloghtml01"), #crashes MySQL
|
||||||
|
"1987": ("log.htm", "Parseloghtml01"), #crashes MySQL
|
||||||
|
"1985": ("log.htm", "Parseloghtml01"),
|
||||||
|
"1984": ("log.htm", "Parseloghtml01"),
|
||||||
|
"1983": ("log.htm", "Parseloghtml01"),
|
||||||
|
"1982": ("log.htm", "Parseloghtml01"),
|
||||||
|
}
|
||||||
|
|
||||||
entries = { "2022": 64, "2019": 44, "2018": 74, "2017": 60, "2016": 81, "2015": 79,
|
entries = { "2022": 64, "2019": 44, "2018": 74, "2017": 60, "2016": 81, "2015": 79,
|
||||||
"2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52,
|
"2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52,
|
||||||
@ -60,6 +91,11 @@ entries = { "2022": 64, "2019": 44, "2018": 74, "2017": 60, "2016": 81, "2015":
|
|||||||
"1985": 24,"1984": 32,"1983": 52,"1982": 42,}
|
"1985": 24,"1984": 32,"1983": 52,"1982": 42,}
|
||||||
# Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, or need hand-editing.
|
# Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, or need hand-editing.
|
||||||
|
|
||||||
|
logentries = [] # the entire logbook for one year is a single object: a list of entries
|
||||||
|
noncaveplaces = [ "QMplaceholder", "Journey", "Loser Plateau", "UNKNOWN", 'plateau',
|
||||||
|
'base camp', 'basecamp', 'top camp', 'topcamp' ]
|
||||||
|
logdataissues = TROG['issues']['logdataissues']
|
||||||
|
trips ={}
|
||||||
|
|
||||||
#
|
#
|
||||||
# the logbook loading section
|
# the logbook loading section
|
||||||
@ -169,7 +205,7 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
|
|||||||
lookupAttribs={'personexpedition':tripperson, 'logbook_entry':lbo}
|
lookupAttribs={'personexpedition':tripperson, 'logbook_entry':lbo}
|
||||||
nonLookupAttribs={'time_underground':time_underground, 'is_logbook_entry_author':(tripperson == author)}
|
nonLookupAttribs={'time_underground':time_underground, 'is_logbook_entry_author':(tripperson == author)}
|
||||||
# this creates the PersonTrip instance.
|
# this creates the PersonTrip instance.
|
||||||
save_carefully(PersonTrip, lookupAttribs, nonLookupAttribs) # PersonTrip also saved in SetDatesFromLogbookEntries
|
save_carefully(PersonTrip, lookupAttribs, nonLookupAttribs)
|
||||||
|
|
||||||
def ParseDate(tripdate, year):
|
def ParseDate(tripdate, year):
|
||||||
""" Interprets dates in the expo logbooks and returns a correct datetime.date object """
|
""" Interprets dates in the expo logbooks and returns a correct datetime.date object """
|
||||||
@ -465,34 +501,6 @@ def Parseloghtml03(year, expedition, txt):
|
|||||||
trippeople, expedition, tu, "html03", tid)
|
trippeople, expedition, tu, "html03", tid)
|
||||||
logentries.append(entrytuple)
|
logentries.append(entrytuple)
|
||||||
|
|
||||||
def SetDatesFromLogbookEntries(expedition):
|
|
||||||
"""Sets the next and previous entry for a persontrip by setting
|
|
||||||
persontrip_prev
|
|
||||||
persontrip_next
|
|
||||||
for each persontrip instance.
|
|
||||||
|
|
||||||
This is ONLY needed when a logbook entry is displayed. So could be called lazily
|
|
||||||
only when one of these entries is requested.
|
|
||||||
|
|
||||||
It does NOT do what the docstring says here:
|
|
||||||
Sets the date_from and date_to field for an expedition based on persontrips.
|
|
||||||
Then sets the expedition date_from and date_to based on the personexpeditions.
|
|
||||||
"""
|
|
||||||
# Probably a faster way to do this. This uses a lot of db queries, but we have all this
|
|
||||||
# in memory..
|
|
||||||
for personexpedition in expedition.personexpedition_set.all():
|
|
||||||
persontrips = personexpedition.persontrip_set.order_by('logbook_entry__date')
|
|
||||||
# sequencing is difficult to do
|
|
||||||
lprevpersontrip = None
|
|
||||||
for persontrip in persontrips:
|
|
||||||
persontrip.persontrip_prev = lprevpersontrip
|
|
||||||
if lprevpersontrip:
|
|
||||||
lprevpersontrip.persontrip_next = persontrip
|
|
||||||
lprevpersontrip.save()
|
|
||||||
persontrip.persontrip_next = None
|
|
||||||
lprevpersontrip = persontrip
|
|
||||||
#persontrip.save() # also saved in EnterLogIntoDbase. MERGE these to speed up import.
|
|
||||||
|
|
||||||
|
|
||||||
def LoadLogbookForExpedition(expedition):
|
def LoadLogbookForExpedition(expedition):
|
||||||
""" Parses all logbook entries for one expedition
|
""" Parses all logbook entries for one expedition
|
||||||
@ -505,7 +513,7 @@ def LoadLogbookForExpedition(expedition):
|
|||||||
|
|
||||||
logbook_parseable = False
|
logbook_parseable = False
|
||||||
logbook_cached = False
|
logbook_cached = False
|
||||||
yearlinks = settings.LOGBOOK_PARSER_SETTINGS
|
yearlinks = LOGBOOK_PARSER_SETTINGS
|
||||||
expologbase = os.path.join(settings.EXPOWEB, "years")
|
expologbase = os.path.join(settings.EXPOWEB, "years")
|
||||||
logentries=[]
|
logentries=[]
|
||||||
|
|
||||||
@ -555,85 +563,33 @@ def LoadLogbookForExpedition(expedition):
|
|||||||
expedition.logbookfile = yearlinks[year][0]
|
expedition.logbookfile = yearlinks[year][0]
|
||||||
parsefunc = yearlinks[year][1]
|
parsefunc = yearlinks[year][1]
|
||||||
else:
|
else:
|
||||||
logbookpath = Path(expologbase) / year / settings.DEFAULT_LOGBOOK_FILE
|
logbookpath = Path(expologbase) / year / DEFAULT_LOGBOOK_FILE
|
||||||
expedition.logbookfile = settings.DEFAULT_LOGBOOK_FILE
|
expedition.logbookfile = DEFAULT_LOGBOOK_FILE
|
||||||
parsefunc = settings.DEFAULT_LOGBOOK_PARSER
|
parsefunc = DEFAULT_LOGBOOK_PARSER
|
||||||
cache_filename = Path(str(logbookpath) + ".cache")
|
cache_filename = Path(str(logbookpath) + ".cache")
|
||||||
if not cache_filename.is_file():
|
if not cache_filename.is_file():
|
||||||
print(" - Cache file does not exist \"" + str(cache_filename) +"\"")
|
print(" - Cache file does not exist \"" + str(cache_filename) +"\"")
|
||||||
|
|
||||||
expedition.save()
|
expedition.save()
|
||||||
logbook_cached = False
|
logbook_cached = False
|
||||||
if False: # enable cache system
|
|
||||||
now = time.time()
|
try:
|
||||||
bad_cache = False # temporarily disable reading the cache - buggy
|
file_in = open(logbookpath,'rb')
|
||||||
try:
|
txt = file_in.read().decode("utf-8")
|
||||||
cache_t = os.path.getmtime(cache_filename)
|
file_in.close()
|
||||||
if os.path.getmtime(logbookpath) - cache_t > 2: # at least 2 secs later
|
logbook_parseable = True
|
||||||
print(" - ! Cache is older than the logbook file")
|
except (IOError):
|
||||||
bad_cache= True
|
logbook_parseable = False
|
||||||
if now - cache_t > 30*24*60*60:
|
print(" ! Couldn't open logbook as UTF-8 " + logbookpath)
|
||||||
print(" - ! Cache is > 30 days old")
|
except:
|
||||||
bad_cache= True
|
logbook_parseable = False
|
||||||
if bad_cache:
|
print(" ! Very Bad Error opening " + logbookpath)
|
||||||
print(" - so cache is either stale or more than 30 days old. Deleting it.")
|
|
||||||
os.remove(cache_filename)
|
|
||||||
logentries=[]
|
|
||||||
print(" - Deleted stale or corrupt cache file")
|
|
||||||
raise
|
|
||||||
try:
|
|
||||||
# print(" - Reading cache: " + str(cache_filename), end='')
|
|
||||||
with open(cache_filename, "rb") as f:
|
|
||||||
year, lbsize, n, logentries = pickle.load(f)
|
|
||||||
if validcache(year, n, lbsize):
|
|
||||||
print(f" -- {year} : Loaded {len(logentries)} log entries")
|
|
||||||
logbook_cached = True
|
|
||||||
else:
|
|
||||||
print(" !- {year} : Cache failed validity checks")
|
|
||||||
raise
|
|
||||||
except:
|
|
||||||
print(" ! Failed to load corrupt cache (or I was told to ignore it). Deleting it.")
|
|
||||||
os.remove(cache_filename)
|
|
||||||
logentries=[]
|
|
||||||
raise
|
|
||||||
except :
|
|
||||||
print(" - Cache old or de-pickle failure \"" + str(cache_filename) +"\"")
|
|
||||||
try:
|
|
||||||
file_in = open(logbookpath,'rb')
|
|
||||||
txt = file_in.read().decode("utf-8")
|
|
||||||
file_in.close()
|
|
||||||
logbook_parseable = True
|
|
||||||
except (IOError):
|
|
||||||
logbook_parseable = False
|
|
||||||
print(" ! Couldn't open logbook as UTF-8 " + logbookpath)
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
file_in = open(logbookpath,'rb')
|
|
||||||
txt = file_in.read().decode("utf-8")
|
|
||||||
file_in.close()
|
|
||||||
logbook_parseable = True
|
|
||||||
except (IOError):
|
|
||||||
logbook_parseable = False
|
|
||||||
print(" ! Couldn't open logbook as UTF-8 " + logbookpath)
|
|
||||||
except:
|
|
||||||
logbook_parseable = False
|
|
||||||
print(" ! Very Bad Error opening " + logbookpath)
|
|
||||||
|
|
||||||
if logbook_parseable:
|
if logbook_parseable:
|
||||||
parser = globals()[parsefunc]
|
parser = globals()[parsefunc]
|
||||||
print(f' - {year} parsing with {parsefunc}')
|
print(f' - {year} parsing with {parsefunc}')
|
||||||
parser(year, expedition, txt) # this launches the right parser for this year
|
parser(year, expedition, txt) # this launches the right parser for this year
|
||||||
|
|
||||||
# SetDatesFromLogbookEntries(expedition)
|
|
||||||
# if len(logentries) >0:
|
|
||||||
# print(" - Cacheing " , len(logentries), " log entries")
|
|
||||||
# lbsize = logbookpath.stat().st_size
|
|
||||||
# with open(cache_filename, "wb") as fc: # we much check that permission are g+w ! or expo can't delete the cache
|
|
||||||
# logbk=(expedition,lbsize,len(logentries),logentries)
|
|
||||||
# pickle.dump(logbk, fc, protocol=4)
|
|
||||||
# else:
|
|
||||||
# print(" ! NO TRIP entries found in logbook, check the syntax.")
|
|
||||||
|
|
||||||
i=0
|
i=0
|
||||||
for entrytuple in logentries:
|
for entrytuple in logentries:
|
||||||
# date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, tripid1 = entrytuple
|
# date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, tripid1 = entrytuple
|
||||||
@ -644,7 +600,6 @@ def LoadLogbookForExpedition(expedition):
|
|||||||
EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, 0,
|
EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, 0,
|
||||||
tripid1)
|
tripid1)
|
||||||
i +=1
|
i +=1
|
||||||
# SetDatesFromLogbookEntries(expedition)
|
|
||||||
|
|
||||||
if len(logentries) == expect:
|
if len(logentries) == expect:
|
||||||
# print(f"OK {year} {len(logentries):5d} is {expect}\n")
|
# print(f"OK {year} {len(logentries):5d} is {expect}\n")
|
||||||
@ -656,7 +611,6 @@ def LoadLogbookForExpedition(expedition):
|
|||||||
|
|
||||||
def LoadLogbooks():
|
def LoadLogbooks():
|
||||||
""" This is the master function for parsing all logbooks into the Troggle database.
|
""" This is the master function for parsing all logbooks into the Troggle database.
|
||||||
Parser settings appropriate for each year are set in settings.py LOGBOOK_PARSER_SETTINGS.
|
|
||||||
This should be rewritten to use coroutines to load all logbooks from disc in parallel,
|
This should be rewritten to use coroutines to load all logbooks from disc in parallel,
|
||||||
but must be serialised to write to database as sqlite is single-user.
|
but must be serialised to write to database as sqlite is single-user.
|
||||||
"""
|
"""
|
||||||
|
Loading…
Reference in New Issue
Block a user