2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2024-11-21 23:01:52 +00:00

cache tidy and move settings into parser

This commit is contained in:
Philip Sargent 2022-11-21 16:41:52 +00:00
parent bcb61f9cd9
commit a795707552
3 changed files with 61 additions and 106 deletions

View File

@ -585,7 +585,7 @@ class QM(TroggleModel):
class PersonTrip(TroggleModel):
"""Single Person going on a trip, which may or may not be written up.
It accounts for different T/U for people in same logbook entry.
It could account for different T/U for people in same logbook entry.
"""
personexpedition = models.ForeignKey("PersonExpedition",null=True,on_delete=models.CASCADE)
time_underground = models.FloatField(help_text="In decimal hours")

View File

@ -215,6 +215,7 @@ def logbookentry(request, date, slug):
this_logbookentry=this_logbookentry[0]
# This is the only page that uses presontrip_next and persontrip_prev
# and it is calculated on the fly in the model
return render(request, 'logbookentry.html',
{'logbookentry': this_logbookentry, 'trips': trips, 'svxothers': svxothers, 'wallets': wallets})
else:

View File

@ -45,12 +45,43 @@ todo='''
- this is a slow and uncertain function: cave = getCaveByReference(caveRef)
'''
logentries = [] # the entire logbook for one year is a single object: a list of entries
noncaveplaces = [ "QMplaceholder", "Journey", "Loser Plateau", "UNKNOWN", 'plateau',
'base camp', 'basecamp', 'top camp', 'topcamp' ]
logdataissues = TROG['issues']['logdataissues']
trips ={}
MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200
DEFAULT_LOGBOOK_PARSER = "Parseloghtmltxt"
DEFAULT_LOGBOOK_FILE = "logbook.html"
# All years since 2010 use the default value for Logbook parser
# but several don't work, and are skipped by the parsing code, e.g. 1983
LOGBOOK_PARSER_SETTINGS = {
"2010": ("logbook.html", "Parseloghtmltxt"),
"2009": ("2009logbook.txt", "Parselogwikitxt"),
"2008": ("2008logbook.txt", "Parselogwikitxt"),
"2007": ("logbook.html", "Parseloghtmltxt"),
"2006": ("logbook.html", "Parseloghtmltxt"),
# "2006": ("logbook/logbook_06.txt", "Parselogwikitxt"),
"2006": ("logbook.html", "Parseloghtmltxt"),
"2005": ("logbook.html", "Parseloghtmltxt"),
"2004": ("logbook.html", "Parseloghtmltxt"),
"2003": ("logbook.html", "Parseloghtml03"),
"2002": ("logbook.html", "Parseloghtmltxt"),
"2001": ("log.htm", "Parseloghtml01"),
"2000": ("log.htm", "Parseloghtml01"),
"1999": ("log.htm", "Parseloghtml01"),
"1998": ("log.htm", "Parseloghtml01"),
"1997": ("log.htm", "Parseloghtml01"),
"1996": ("log.htm", "Parseloghtml01"),
"1995": ("log.htm", "Parseloghtml01"),
"1994": ("log.htm", "Parseloghtml01"),
"1993": ("log.htm", "Parseloghtml01"),
"1992": ("log.htm", "Parseloghtml01"),
"1991": ("log.htm", "Parseloghtml01"),
"1990": ("log.htm", "Parseloghtml01"),
"1989": ("log.htm", "Parseloghtml01"), #crashes MySQL
"1988": ("log.htm", "Parseloghtml01"), #crashes MySQL
"1987": ("log.htm", "Parseloghtml01"), #crashes MySQL
"1985": ("log.htm", "Parseloghtml01"),
"1984": ("log.htm", "Parseloghtml01"),
"1983": ("log.htm", "Parseloghtml01"),
"1982": ("log.htm", "Parseloghtml01"),
}
entries = { "2022": 64, "2019": 44, "2018": 74, "2017": 60, "2016": 81, "2015": 79,
"2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52,
@ -60,6 +91,11 @@ entries = { "2022": 64, "2019": 44, "2018": 74, "2017": 60, "2016": 81, "2015":
"1985": 24,"1984": 32,"1983": 52,"1982": 42,}
# Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, or need hand-editing.
logentries = [] # the entire logbook for one year is a single object: a list of entries
noncaveplaces = [ "QMplaceholder", "Journey", "Loser Plateau", "UNKNOWN", 'plateau',
'base camp', 'basecamp', 'top camp', 'topcamp' ]
logdataissues = TROG['issues']['logdataissues']
trips ={}
#
# the logbook loading section
@ -169,7 +205,7 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
lookupAttribs={'personexpedition':tripperson, 'logbook_entry':lbo}
nonLookupAttribs={'time_underground':time_underground, 'is_logbook_entry_author':(tripperson == author)}
# this creates the PersonTrip instance.
save_carefully(PersonTrip, lookupAttribs, nonLookupAttribs) # PersonTrip also saved in SetDatesFromLogbookEntries
save_carefully(PersonTrip, lookupAttribs, nonLookupAttribs)
def ParseDate(tripdate, year):
""" Interprets dates in the expo logbooks and returns a correct datetime.date object """
@ -465,34 +501,6 @@ def Parseloghtml03(year, expedition, txt):
trippeople, expedition, tu, "html03", tid)
logentries.append(entrytuple)
def SetDatesFromLogbookEntries(expedition):
"""Sets the next and previous entry for a persontrip by setting
persontrip_prev
persontrip_next
for each persontrip instance.
This is ONLY needed when a logbook entry is displayed. So could be called lazily
only when one of these entries is requested.
It does NOT do what the docstring says here:
Sets the date_from and date_to field for an expedition based on persontrips.
Then sets the expedition date_from and date_to based on the personexpeditions.
"""
# Probably a faster way to do this. This uses a lot of db queries, but we have all this
# in memory..
for personexpedition in expedition.personexpedition_set.all():
persontrips = personexpedition.persontrip_set.order_by('logbook_entry__date')
# sequencing is difficult to do
lprevpersontrip = None
for persontrip in persontrips:
persontrip.persontrip_prev = lprevpersontrip
if lprevpersontrip:
lprevpersontrip.persontrip_next = persontrip
lprevpersontrip.save()
persontrip.persontrip_next = None
lprevpersontrip = persontrip
#persontrip.save() # also saved in EnterLogIntoDbase. MERGE these to speed up import.
def LoadLogbookForExpedition(expedition):
""" Parses all logbook entries for one expedition
@ -505,7 +513,7 @@ def LoadLogbookForExpedition(expedition):
logbook_parseable = False
logbook_cached = False
yearlinks = settings.LOGBOOK_PARSER_SETTINGS
yearlinks = LOGBOOK_PARSER_SETTINGS
expologbase = os.path.join(settings.EXPOWEB, "years")
logentries=[]
@ -555,85 +563,33 @@ def LoadLogbookForExpedition(expedition):
expedition.logbookfile = yearlinks[year][0]
parsefunc = yearlinks[year][1]
else:
logbookpath = Path(expologbase) / year / settings.DEFAULT_LOGBOOK_FILE
expedition.logbookfile = settings.DEFAULT_LOGBOOK_FILE
parsefunc = settings.DEFAULT_LOGBOOK_PARSER
logbookpath = Path(expologbase) / year / DEFAULT_LOGBOOK_FILE
expedition.logbookfile = DEFAULT_LOGBOOK_FILE
parsefunc = DEFAULT_LOGBOOK_PARSER
cache_filename = Path(str(logbookpath) + ".cache")
if not cache_filename.is_file():
print(" - Cache file does not exist \"" + str(cache_filename) +"\"")
expedition.save()
logbook_cached = False
if False: # enable cache system
now = time.time()
bad_cache = False # temporarily disable reading the cache - buggy
try:
cache_t = os.path.getmtime(cache_filename)
if os.path.getmtime(logbookpath) - cache_t > 2: # at least 2 secs later
print(" - ! Cache is older than the logbook file")
bad_cache= True
if now - cache_t > 30*24*60*60:
print(" - ! Cache is > 30 days old")
bad_cache= True
if bad_cache:
print(" - so cache is either stale or more than 30 days old. Deleting it.")
os.remove(cache_filename)
logentries=[]
print(" - Deleted stale or corrupt cache file")
raise
try:
# print(" - Reading cache: " + str(cache_filename), end='')
with open(cache_filename, "rb") as f:
year, lbsize, n, logentries = pickle.load(f)
if validcache(year, n, lbsize):
print(f" -- {year} : Loaded {len(logentries)} log entries")
logbook_cached = True
else:
print(" !- {year} : Cache failed validity checks")
raise
except:
print(" ! Failed to load corrupt cache (or I was told to ignore it). Deleting it.")
os.remove(cache_filename)
logentries=[]
raise
except :
print(" - Cache old or de-pickle failure \"" + str(cache_filename) +"\"")
try:
file_in = open(logbookpath,'rb')
txt = file_in.read().decode("utf-8")
file_in.close()
logbook_parseable = True
except (IOError):
logbook_parseable = False
print(" ! Couldn't open logbook as UTF-8 " + logbookpath)
else:
try:
file_in = open(logbookpath,'rb')
txt = file_in.read().decode("utf-8")
file_in.close()
logbook_parseable = True
except (IOError):
logbook_parseable = False
print(" ! Couldn't open logbook as UTF-8 " + logbookpath)
except:
logbook_parseable = False
print(" ! Very Bad Error opening " + logbookpath)
try:
file_in = open(logbookpath,'rb')
txt = file_in.read().decode("utf-8")
file_in.close()
logbook_parseable = True
except (IOError):
logbook_parseable = False
print(" ! Couldn't open logbook as UTF-8 " + logbookpath)
except:
logbook_parseable = False
print(" ! Very Bad Error opening " + logbookpath)
if logbook_parseable:
parser = globals()[parsefunc]
print(f' - {year} parsing with {parsefunc}')
parser(year, expedition, txt) # this launches the right parser for this year
# SetDatesFromLogbookEntries(expedition)
# if len(logentries) >0:
# print(" - Cacheing " , len(logentries), " log entries")
# lbsize = logbookpath.stat().st_size
# with open(cache_filename, "wb") as fc: # we much check that permission are g+w ! or expo can't delete the cache
# logbk=(expedition,lbsize,len(logentries),logentries)
# pickle.dump(logbk, fc, protocol=4)
# else:
# print(" ! NO TRIP entries found in logbook, check the syntax.")
i=0
for entrytuple in logentries:
# date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, tripid1 = entrytuple
@ -644,7 +600,6 @@ def LoadLogbookForExpedition(expedition):
EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, 0,
tripid1)
i +=1
# SetDatesFromLogbookEntries(expedition)
if len(logentries) == expect:
# print(f"OK {year} {len(logentries):5d} is {expect}\n")
@ -656,7 +611,6 @@ def LoadLogbookForExpedition(expedition):
def LoadLogbooks():
""" This is the master function for parsing all logbooks into the Troggle database.
Parser settings appropriate for each year are set in settings.py LOGBOOK_PARSER_SETTINGS.
This should be rewritten to use coroutines to load all logbooks from disc in parallel,
but must be serialised to write to database as sqlite is single-user.
"""