diff --git a/core/models/caves.py b/core/models/caves.py index 972d766..a08b1f8 100644 --- a/core/models/caves.py +++ b/core/models/caves.py @@ -585,7 +585,7 @@ class QM(TroggleModel): class PersonTrip(TroggleModel): """Single Person going on a trip, which may or may not be written up. - It accounts for different T/U for people in same logbook entry. + It could account for different T/U for people in same logbook entry. """ personexpedition = models.ForeignKey("PersonExpedition",null=True,on_delete=models.CASCADE) time_underground = models.FloatField(help_text="In decimal hours") diff --git a/core/views/logbooks.py b/core/views/logbooks.py index 33d0c0e..7d58c75 100644 --- a/core/views/logbooks.py +++ b/core/views/logbooks.py @@ -215,6 +215,7 @@ def logbookentry(request, date, slug): this_logbookentry=this_logbookentry[0] # This is the only page that uses presontrip_next and persontrip_prev + # and it is calculated on the fly in the model return render(request, 'logbookentry.html', {'logbookentry': this_logbookentry, 'trips': trips, 'svxothers': svxothers, 'wallets': wallets}) else: diff --git a/parsers/logbooks.py b/parsers/logbooks.py index 018c051..a1df040 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -45,12 +45,43 @@ todo=''' - this is a slow and uncertain function: cave = getCaveByReference(caveRef) ''' - -logentries = [] # the entire logbook for one year is a single object: a list of entries -noncaveplaces = [ "QMplaceholder", "Journey", "Loser Plateau", "UNKNOWN", 'plateau', - 'base camp', 'basecamp', 'top camp', 'topcamp' ] -logdataissues = TROG['issues']['logdataissues'] -trips ={} +MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200 +DEFAULT_LOGBOOK_PARSER = "Parseloghtmltxt" +DEFAULT_LOGBOOK_FILE = "logbook.html" +# All years since 2010 use the default value for Logbook parser +# but several don't work, and are skipped by the parsing code, e.g. 1983 +LOGBOOK_PARSER_SETTINGS = { + "2010": ("logbook.html", "Parseloghtmltxt"), + "2009": ("2009logbook.txt", "Parselogwikitxt"), + "2008": ("2008logbook.txt", "Parselogwikitxt"), + "2007": ("logbook.html", "Parseloghtmltxt"), + "2006": ("logbook.html", "Parseloghtmltxt"), +# "2006": ("logbook/logbook_06.txt", "Parselogwikitxt"), + "2006": ("logbook.html", "Parseloghtmltxt"), + "2005": ("logbook.html", "Parseloghtmltxt"), + "2004": ("logbook.html", "Parseloghtmltxt"), + "2003": ("logbook.html", "Parseloghtml03"), + "2002": ("logbook.html", "Parseloghtmltxt"), + "2001": ("log.htm", "Parseloghtml01"), + "2000": ("log.htm", "Parseloghtml01"), + "1999": ("log.htm", "Parseloghtml01"), + "1998": ("log.htm", "Parseloghtml01"), + "1997": ("log.htm", "Parseloghtml01"), + "1996": ("log.htm", "Parseloghtml01"), + "1995": ("log.htm", "Parseloghtml01"), + "1994": ("log.htm", "Parseloghtml01"), + "1993": ("log.htm", "Parseloghtml01"), + "1992": ("log.htm", "Parseloghtml01"), + "1991": ("log.htm", "Parseloghtml01"), + "1990": ("log.htm", "Parseloghtml01"), + "1989": ("log.htm", "Parseloghtml01"), #crashes MySQL + "1988": ("log.htm", "Parseloghtml01"), #crashes MySQL + "1987": ("log.htm", "Parseloghtml01"), #crashes MySQL + "1985": ("log.htm", "Parseloghtml01"), + "1984": ("log.htm", "Parseloghtml01"), + "1983": ("log.htm", "Parseloghtml01"), + "1982": ("log.htm", "Parseloghtml01"), + } entries = { "2022": 64, "2019": 44, "2018": 74, "2017": 60, "2016": 81, "2015": 79, "2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52, @@ -60,6 +91,11 @@ entries = { "2022": 64, "2019": 44, "2018": 74, "2017": 60, "2016": 81, "2015": "1985": 24,"1984": 32,"1983": 52,"1982": 42,} # Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, or need hand-editing. +logentries = [] # the entire logbook for one year is a single object: a list of entries +noncaveplaces = [ "QMplaceholder", "Journey", "Loser Plateau", "UNKNOWN", 'plateau', + 'base camp', 'basecamp', 'top camp', 'topcamp' ] +logdataissues = TROG['issues']['logdataissues'] +trips ={} # # the logbook loading section @@ -169,7 +205,7 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_ lookupAttribs={'personexpedition':tripperson, 'logbook_entry':lbo} nonLookupAttribs={'time_underground':time_underground, 'is_logbook_entry_author':(tripperson == author)} # this creates the PersonTrip instance. - save_carefully(PersonTrip, lookupAttribs, nonLookupAttribs) # PersonTrip also saved in SetDatesFromLogbookEntries + save_carefully(PersonTrip, lookupAttribs, nonLookupAttribs) def ParseDate(tripdate, year): """ Interprets dates in the expo logbooks and returns a correct datetime.date object """ @@ -465,34 +501,6 @@ def Parseloghtml03(year, expedition, txt): trippeople, expedition, tu, "html03", tid) logentries.append(entrytuple) -def SetDatesFromLogbookEntries(expedition): - """Sets the next and previous entry for a persontrip by setting - persontrip_prev - persontrip_next - for each persontrip instance. - - This is ONLY needed when a logbook entry is displayed. So could be called lazily - only when one of these entries is requested. - - It does NOT do what the docstring says here: - Sets the date_from and date_to field for an expedition based on persontrips. - Then sets the expedition date_from and date_to based on the personexpeditions. - """ - # Probably a faster way to do this. This uses a lot of db queries, but we have all this - # in memory.. - for personexpedition in expedition.personexpedition_set.all(): - persontrips = personexpedition.persontrip_set.order_by('logbook_entry__date') - # sequencing is difficult to do - lprevpersontrip = None - for persontrip in persontrips: - persontrip.persontrip_prev = lprevpersontrip - if lprevpersontrip: - lprevpersontrip.persontrip_next = persontrip - lprevpersontrip.save() - persontrip.persontrip_next = None - lprevpersontrip = persontrip - #persontrip.save() # also saved in EnterLogIntoDbase. MERGE these to speed up import. - def LoadLogbookForExpedition(expedition): """ Parses all logbook entries for one expedition @@ -505,7 +513,7 @@ def LoadLogbookForExpedition(expedition): logbook_parseable = False logbook_cached = False - yearlinks = settings.LOGBOOK_PARSER_SETTINGS + yearlinks = LOGBOOK_PARSER_SETTINGS expologbase = os.path.join(settings.EXPOWEB, "years") logentries=[] @@ -555,85 +563,33 @@ def LoadLogbookForExpedition(expedition): expedition.logbookfile = yearlinks[year][0] parsefunc = yearlinks[year][1] else: - logbookpath = Path(expologbase) / year / settings.DEFAULT_LOGBOOK_FILE - expedition.logbookfile = settings.DEFAULT_LOGBOOK_FILE - parsefunc = settings.DEFAULT_LOGBOOK_PARSER + logbookpath = Path(expologbase) / year / DEFAULT_LOGBOOK_FILE + expedition.logbookfile = DEFAULT_LOGBOOK_FILE + parsefunc = DEFAULT_LOGBOOK_PARSER cache_filename = Path(str(logbookpath) + ".cache") if not cache_filename.is_file(): print(" - Cache file does not exist \"" + str(cache_filename) +"\"") expedition.save() logbook_cached = False - if False: # enable cache system - now = time.time() - bad_cache = False # temporarily disable reading the cache - buggy - try: - cache_t = os.path.getmtime(cache_filename) - if os.path.getmtime(logbookpath) - cache_t > 2: # at least 2 secs later - print(" - ! Cache is older than the logbook file") - bad_cache= True - if now - cache_t > 30*24*60*60: - print(" - ! Cache is > 30 days old") - bad_cache= True - if bad_cache: - print(" - so cache is either stale or more than 30 days old. Deleting it.") - os.remove(cache_filename) - logentries=[] - print(" - Deleted stale or corrupt cache file") - raise - try: - # print(" - Reading cache: " + str(cache_filename), end='') - with open(cache_filename, "rb") as f: - year, lbsize, n, logentries = pickle.load(f) - if validcache(year, n, lbsize): - print(f" -- {year} : Loaded {len(logentries)} log entries") - logbook_cached = True - else: - print(" !- {year} : Cache failed validity checks") - raise - except: - print(" ! Failed to load corrupt cache (or I was told to ignore it). Deleting it.") - os.remove(cache_filename) - logentries=[] - raise - except : - print(" - Cache old or de-pickle failure \"" + str(cache_filename) +"\"") - try: - file_in = open(logbookpath,'rb') - txt = file_in.read().decode("utf-8") - file_in.close() - logbook_parseable = True - except (IOError): - logbook_parseable = False - print(" ! Couldn't open logbook as UTF-8 " + logbookpath) - else: - try: - file_in = open(logbookpath,'rb') - txt = file_in.read().decode("utf-8") - file_in.close() - logbook_parseable = True - except (IOError): - logbook_parseable = False - print(" ! Couldn't open logbook as UTF-8 " + logbookpath) - except: - logbook_parseable = False - print(" ! Very Bad Error opening " + logbookpath) + + try: + file_in = open(logbookpath,'rb') + txt = file_in.read().decode("utf-8") + file_in.close() + logbook_parseable = True + except (IOError): + logbook_parseable = False + print(" ! Couldn't open logbook as UTF-8 " + logbookpath) + except: + logbook_parseable = False + print(" ! Very Bad Error opening " + logbookpath) if logbook_parseable: parser = globals()[parsefunc] print(f' - {year} parsing with {parsefunc}') parser(year, expedition, txt) # this launches the right parser for this year - # SetDatesFromLogbookEntries(expedition) - # if len(logentries) >0: - # print(" - Cacheing " , len(logentries), " log entries") - # lbsize = logbookpath.stat().st_size - # with open(cache_filename, "wb") as fc: # we much check that permission are g+w ! or expo can't delete the cache - # logbk=(expedition,lbsize,len(logentries),logentries) - # pickle.dump(logbk, fc, protocol=4) - # else: - # print(" ! NO TRIP entries found in logbook, check the syntax.") - i=0 for entrytuple in logentries: # date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, tripid1 = entrytuple @@ -644,7 +600,6 @@ def LoadLogbookForExpedition(expedition): EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, 0, tripid1) i +=1 - # SetDatesFromLogbookEntries(expedition) if len(logentries) == expect: # print(f"OK {year} {len(logentries):5d} is {expect}\n") @@ -656,7 +611,6 @@ def LoadLogbookForExpedition(expedition): def LoadLogbooks(): """ This is the master function for parsing all logbooks into the Troggle database. - Parser settings appropriate for each year are set in settings.py LOGBOOK_PARSER_SETTINGS. This should be rewritten to use coroutines to load all logbooks from disc in parallel, but must be serialised to write to database as sqlite is single-user. """