forked from expo/troggle
cache tidy and move settings into parser
This commit is contained in:
parent
bcb61f9cd9
commit
a795707552
@ -585,7 +585,7 @@ class QM(TroggleModel):
|
||||
|
||||
class PersonTrip(TroggleModel):
|
||||
"""Single Person going on a trip, which may or may not be written up.
|
||||
It accounts for different T/U for people in same logbook entry.
|
||||
It could account for different T/U for people in same logbook entry.
|
||||
"""
|
||||
personexpedition = models.ForeignKey("PersonExpedition",null=True,on_delete=models.CASCADE)
|
||||
time_underground = models.FloatField(help_text="In decimal hours")
|
||||
|
@ -215,6 +215,7 @@ def logbookentry(request, date, slug):
|
||||
|
||||
this_logbookentry=this_logbookentry[0]
|
||||
# This is the only page that uses presontrip_next and persontrip_prev
|
||||
# and it is calculated on the fly in the model
|
||||
return render(request, 'logbookentry.html',
|
||||
{'logbookentry': this_logbookentry, 'trips': trips, 'svxothers': svxothers, 'wallets': wallets})
|
||||
else:
|
||||
|
@ -45,12 +45,43 @@ todo='''
|
||||
- this is a slow and uncertain function: cave = getCaveByReference(caveRef)
|
||||
|
||||
'''
|
||||
|
||||
logentries = [] # the entire logbook for one year is a single object: a list of entries
|
||||
noncaveplaces = [ "QMplaceholder", "Journey", "Loser Plateau", "UNKNOWN", 'plateau',
|
||||
'base camp', 'basecamp', 'top camp', 'topcamp' ]
|
||||
logdataissues = TROG['issues']['logdataissues']
|
||||
trips ={}
|
||||
MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200
|
||||
DEFAULT_LOGBOOK_PARSER = "Parseloghtmltxt"
|
||||
DEFAULT_LOGBOOK_FILE = "logbook.html"
|
||||
# All years since 2010 use the default value for Logbook parser
|
||||
# but several don't work, and are skipped by the parsing code, e.g. 1983
|
||||
LOGBOOK_PARSER_SETTINGS = {
|
||||
"2010": ("logbook.html", "Parseloghtmltxt"),
|
||||
"2009": ("2009logbook.txt", "Parselogwikitxt"),
|
||||
"2008": ("2008logbook.txt", "Parselogwikitxt"),
|
||||
"2007": ("logbook.html", "Parseloghtmltxt"),
|
||||
"2006": ("logbook.html", "Parseloghtmltxt"),
|
||||
# "2006": ("logbook/logbook_06.txt", "Parselogwikitxt"),
|
||||
"2006": ("logbook.html", "Parseloghtmltxt"),
|
||||
"2005": ("logbook.html", "Parseloghtmltxt"),
|
||||
"2004": ("logbook.html", "Parseloghtmltxt"),
|
||||
"2003": ("logbook.html", "Parseloghtml03"),
|
||||
"2002": ("logbook.html", "Parseloghtmltxt"),
|
||||
"2001": ("log.htm", "Parseloghtml01"),
|
||||
"2000": ("log.htm", "Parseloghtml01"),
|
||||
"1999": ("log.htm", "Parseloghtml01"),
|
||||
"1998": ("log.htm", "Parseloghtml01"),
|
||||
"1997": ("log.htm", "Parseloghtml01"),
|
||||
"1996": ("log.htm", "Parseloghtml01"),
|
||||
"1995": ("log.htm", "Parseloghtml01"),
|
||||
"1994": ("log.htm", "Parseloghtml01"),
|
||||
"1993": ("log.htm", "Parseloghtml01"),
|
||||
"1992": ("log.htm", "Parseloghtml01"),
|
||||
"1991": ("log.htm", "Parseloghtml01"),
|
||||
"1990": ("log.htm", "Parseloghtml01"),
|
||||
"1989": ("log.htm", "Parseloghtml01"), #crashes MySQL
|
||||
"1988": ("log.htm", "Parseloghtml01"), #crashes MySQL
|
||||
"1987": ("log.htm", "Parseloghtml01"), #crashes MySQL
|
||||
"1985": ("log.htm", "Parseloghtml01"),
|
||||
"1984": ("log.htm", "Parseloghtml01"),
|
||||
"1983": ("log.htm", "Parseloghtml01"),
|
||||
"1982": ("log.htm", "Parseloghtml01"),
|
||||
}
|
||||
|
||||
entries = { "2022": 64, "2019": 44, "2018": 74, "2017": 60, "2016": 81, "2015": 79,
|
||||
"2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52,
|
||||
@ -60,6 +91,11 @@ entries = { "2022": 64, "2019": 44, "2018": 74, "2017": 60, "2016": 81, "2015":
|
||||
"1985": 24,"1984": 32,"1983": 52,"1982": 42,}
|
||||
# Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, or need hand-editing.
|
||||
|
||||
logentries = [] # the entire logbook for one year is a single object: a list of entries
|
||||
noncaveplaces = [ "QMplaceholder", "Journey", "Loser Plateau", "UNKNOWN", 'plateau',
|
||||
'base camp', 'basecamp', 'top camp', 'topcamp' ]
|
||||
logdataissues = TROG['issues']['logdataissues']
|
||||
trips ={}
|
||||
|
||||
#
|
||||
# the logbook loading section
|
||||
@ -169,7 +205,7 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
|
||||
lookupAttribs={'personexpedition':tripperson, 'logbook_entry':lbo}
|
||||
nonLookupAttribs={'time_underground':time_underground, 'is_logbook_entry_author':(tripperson == author)}
|
||||
# this creates the PersonTrip instance.
|
||||
save_carefully(PersonTrip, lookupAttribs, nonLookupAttribs) # PersonTrip also saved in SetDatesFromLogbookEntries
|
||||
save_carefully(PersonTrip, lookupAttribs, nonLookupAttribs)
|
||||
|
||||
def ParseDate(tripdate, year):
|
||||
""" Interprets dates in the expo logbooks and returns a correct datetime.date object """
|
||||
@ -465,34 +501,6 @@ def Parseloghtml03(year, expedition, txt):
|
||||
trippeople, expedition, tu, "html03", tid)
|
||||
logentries.append(entrytuple)
|
||||
|
||||
def SetDatesFromLogbookEntries(expedition):
|
||||
"""Sets the next and previous entry for a persontrip by setting
|
||||
persontrip_prev
|
||||
persontrip_next
|
||||
for each persontrip instance.
|
||||
|
||||
This is ONLY needed when a logbook entry is displayed. So could be called lazily
|
||||
only when one of these entries is requested.
|
||||
|
||||
It does NOT do what the docstring says here:
|
||||
Sets the date_from and date_to field for an expedition based on persontrips.
|
||||
Then sets the expedition date_from and date_to based on the personexpeditions.
|
||||
"""
|
||||
# Probably a faster way to do this. This uses a lot of db queries, but we have all this
|
||||
# in memory..
|
||||
for personexpedition in expedition.personexpedition_set.all():
|
||||
persontrips = personexpedition.persontrip_set.order_by('logbook_entry__date')
|
||||
# sequencing is difficult to do
|
||||
lprevpersontrip = None
|
||||
for persontrip in persontrips:
|
||||
persontrip.persontrip_prev = lprevpersontrip
|
||||
if lprevpersontrip:
|
||||
lprevpersontrip.persontrip_next = persontrip
|
||||
lprevpersontrip.save()
|
||||
persontrip.persontrip_next = None
|
||||
lprevpersontrip = persontrip
|
||||
#persontrip.save() # also saved in EnterLogIntoDbase. MERGE these to speed up import.
|
||||
|
||||
|
||||
def LoadLogbookForExpedition(expedition):
|
||||
""" Parses all logbook entries for one expedition
|
||||
@ -505,7 +513,7 @@ def LoadLogbookForExpedition(expedition):
|
||||
|
||||
logbook_parseable = False
|
||||
logbook_cached = False
|
||||
yearlinks = settings.LOGBOOK_PARSER_SETTINGS
|
||||
yearlinks = LOGBOOK_PARSER_SETTINGS
|
||||
expologbase = os.path.join(settings.EXPOWEB, "years")
|
||||
logentries=[]
|
||||
|
||||
@ -555,85 +563,33 @@ def LoadLogbookForExpedition(expedition):
|
||||
expedition.logbookfile = yearlinks[year][0]
|
||||
parsefunc = yearlinks[year][1]
|
||||
else:
|
||||
logbookpath = Path(expologbase) / year / settings.DEFAULT_LOGBOOK_FILE
|
||||
expedition.logbookfile = settings.DEFAULT_LOGBOOK_FILE
|
||||
parsefunc = settings.DEFAULT_LOGBOOK_PARSER
|
||||
logbookpath = Path(expologbase) / year / DEFAULT_LOGBOOK_FILE
|
||||
expedition.logbookfile = DEFAULT_LOGBOOK_FILE
|
||||
parsefunc = DEFAULT_LOGBOOK_PARSER
|
||||
cache_filename = Path(str(logbookpath) + ".cache")
|
||||
if not cache_filename.is_file():
|
||||
print(" - Cache file does not exist \"" + str(cache_filename) +"\"")
|
||||
|
||||
expedition.save()
|
||||
logbook_cached = False
|
||||
if False: # enable cache system
|
||||
now = time.time()
|
||||
bad_cache = False # temporarily disable reading the cache - buggy
|
||||
try:
|
||||
cache_t = os.path.getmtime(cache_filename)
|
||||
if os.path.getmtime(logbookpath) - cache_t > 2: # at least 2 secs later
|
||||
print(" - ! Cache is older than the logbook file")
|
||||
bad_cache= True
|
||||
if now - cache_t > 30*24*60*60:
|
||||
print(" - ! Cache is > 30 days old")
|
||||
bad_cache= True
|
||||
if bad_cache:
|
||||
print(" - so cache is either stale or more than 30 days old. Deleting it.")
|
||||
os.remove(cache_filename)
|
||||
logentries=[]
|
||||
print(" - Deleted stale or corrupt cache file")
|
||||
raise
|
||||
try:
|
||||
# print(" - Reading cache: " + str(cache_filename), end='')
|
||||
with open(cache_filename, "rb") as f:
|
||||
year, lbsize, n, logentries = pickle.load(f)
|
||||
if validcache(year, n, lbsize):
|
||||
print(f" -- {year} : Loaded {len(logentries)} log entries")
|
||||
logbook_cached = True
|
||||
else:
|
||||
print(" !- {year} : Cache failed validity checks")
|
||||
raise
|
||||
except:
|
||||
print(" ! Failed to load corrupt cache (or I was told to ignore it). Deleting it.")
|
||||
os.remove(cache_filename)
|
||||
logentries=[]
|
||||
raise
|
||||
except :
|
||||
print(" - Cache old or de-pickle failure \"" + str(cache_filename) +"\"")
|
||||
try:
|
||||
file_in = open(logbookpath,'rb')
|
||||
txt = file_in.read().decode("utf-8")
|
||||
file_in.close()
|
||||
logbook_parseable = True
|
||||
except (IOError):
|
||||
logbook_parseable = False
|
||||
print(" ! Couldn't open logbook as UTF-8 " + logbookpath)
|
||||
else:
|
||||
try:
|
||||
file_in = open(logbookpath,'rb')
|
||||
txt = file_in.read().decode("utf-8")
|
||||
file_in.close()
|
||||
logbook_parseable = True
|
||||
except (IOError):
|
||||
logbook_parseable = False
|
||||
print(" ! Couldn't open logbook as UTF-8 " + logbookpath)
|
||||
except:
|
||||
logbook_parseable = False
|
||||
print(" ! Very Bad Error opening " + logbookpath)
|
||||
|
||||
try:
|
||||
file_in = open(logbookpath,'rb')
|
||||
txt = file_in.read().decode("utf-8")
|
||||
file_in.close()
|
||||
logbook_parseable = True
|
||||
except (IOError):
|
||||
logbook_parseable = False
|
||||
print(" ! Couldn't open logbook as UTF-8 " + logbookpath)
|
||||
except:
|
||||
logbook_parseable = False
|
||||
print(" ! Very Bad Error opening " + logbookpath)
|
||||
|
||||
if logbook_parseable:
|
||||
parser = globals()[parsefunc]
|
||||
print(f' - {year} parsing with {parsefunc}')
|
||||
parser(year, expedition, txt) # this launches the right parser for this year
|
||||
|
||||
# SetDatesFromLogbookEntries(expedition)
|
||||
# if len(logentries) >0:
|
||||
# print(" - Cacheing " , len(logentries), " log entries")
|
||||
# lbsize = logbookpath.stat().st_size
|
||||
# with open(cache_filename, "wb") as fc: # we much check that permission are g+w ! or expo can't delete the cache
|
||||
# logbk=(expedition,lbsize,len(logentries),logentries)
|
||||
# pickle.dump(logbk, fc, protocol=4)
|
||||
# else:
|
||||
# print(" ! NO TRIP entries found in logbook, check the syntax.")
|
||||
|
||||
i=0
|
||||
for entrytuple in logentries:
|
||||
# date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, tripid1 = entrytuple
|
||||
@ -644,7 +600,6 @@ def LoadLogbookForExpedition(expedition):
|
||||
EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, 0,
|
||||
tripid1)
|
||||
i +=1
|
||||
# SetDatesFromLogbookEntries(expedition)
|
||||
|
||||
if len(logentries) == expect:
|
||||
# print(f"OK {year} {len(logentries):5d} is {expect}\n")
|
||||
@ -656,7 +611,6 @@ def LoadLogbookForExpedition(expedition):
|
||||
|
||||
def LoadLogbooks():
|
||||
""" This is the master function for parsing all logbooks into the Troggle database.
|
||||
Parser settings appropriate for each year are set in settings.py LOGBOOK_PARSER_SETTINGS.
|
||||
This should be rewritten to use coroutines to load all logbooks from disc in parallel,
|
||||
but must be serialised to write to database as sqlite is single-user.
|
||||
"""
|
||||
|
Loading…
Reference in New Issue
Block a user