From bb14c94ab10cbd279586c97822372bba8375b67b Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Mon, 19 Dec 2022 20:13:26 +0000 Subject: [PATCH] Updates to make 2018 blog merge work (faster) --- core/models/survex.py | 9 +++++++-- core/views/logbooks.py | 23 +++++++++++++--------- parsers/imports.py | 3 +-- parsers/logbooks.py | 34 ++++++++++++++++++++++++++++----- templates/logbook2005style.html | 1 + 5 files changed, 52 insertions(+), 18 deletions(-) diff --git a/core/models/survex.py b/core/models/survex.py index cc9b9f8..5d60e62 100644 --- a/core/models/survex.py +++ b/core/models/survex.py @@ -171,7 +171,8 @@ class Wallet(models.Model): ''' fpath = models.CharField(max_length=200) walletname = models.CharField(max_length=200) - + walletdate = models.DateField(blank=True, null=True) + class Meta: ordering = ('walletname',) @@ -238,6 +239,8 @@ class Wallet(models.Model): # Yes this is horribly, horribly inefficient, esp. for a page that have date, people and cave in it def date(self): + if self.walletdate: + return self.walletdate if not self.get_json(): return None jsondata = self.get_json() @@ -254,7 +257,9 @@ class Wallet(models.Model): samedate = datetime.date.fromisoformat(datestr[:10]) except: samedate = None - return samedate.isoformat() + self.walletdate = samedate.isoformat() + self.save() + return self.walletdate def people(self): if not self.get_json(): diff --git a/core/views/logbooks.py b/core/views/logbooks.py index 517a48b..52e2d11 100644 --- a/core/views/logbooks.py +++ b/core/views/logbooks.py @@ -1,4 +1,5 @@ import datetime +import time import os.path import re @@ -186,24 +187,26 @@ def personexpedition(request, first_name='', last_name='', year=''): def logbookentry(request, date, slug): - this_logbookentry = LogbookEntry.objects.filter(date=date, slug=slug) + # start = time.time() + trips = LogbookEntry.objects.filter(date=date) # all the trips not just this one + this_logbookentry = trips.filter(date=date, slug=slug) if this_logbookentry: if len(this_logbookentry)>1: return render(request, 'object_list.html',{'object_list':this_logbookentry}) else: - trips = LogbookEntry.objects.filter(date=date) wallets = set() - refwallets = Wallet.objects.filter(survexblock__date=date) + allwallets = Wallet.objects.all() + refwallets = allwallets.filter(survexblock__date=date) for r in refwallets: wallets.add(r) - - allwallets = Wallet.objects.all() + # Note that w.year() only works for wallets which have a valid JSON file existing - for w in allwallets: - if w.date() == date: - wallets.add(w) - + # This is very slow with a big lag as w.date() is a computed field + # Noticably slow with WSL2 and NTFS filesystem, even with caching as walletdate. + jwallets = allwallets.filter(walletdate=date) + for j in jwallets: + wallets.add(j) thisexpo = this_expedition = Expedition.objects.get(year=int(date[0:4])) if thisexpo: expeditionday = thisexpo.get_expedition_day(date) @@ -214,6 +217,8 @@ def logbookentry(request, date, slug): this_logbookentry=this_logbookentry[0] # This is the only page that uses presontrip_next and persontrip_prev # and it is calculated on the fly in the model + # duration = time.time()-start + # print(f"--- Render after {duration:.2f} seconds") return render(request, 'logbookentry.html', {'logbookentry': this_logbookentry, 'trips': trips, 'svxothers': svxothers, 'wallets': wallets}) else: diff --git a/parsers/imports.py b/parsers/imports.py index a253964..47d0c4c 100644 --- a/parsers/imports.py +++ b/parsers/imports.py @@ -41,9 +41,8 @@ def import_logbooks(): with transaction.atomic(): troggle.parsers.logbooks.LoadLogbooks() -def import_logbook(year=2022): +def import_logbook(year=2018): print(f"-- Importing Logbook {year}") - print(f"-- - commented out") with transaction.atomic(): troggle.parsers.logbooks.LoadLogbook(year) diff --git a/parsers/logbooks.py b/parsers/logbooks.py index e37780c..d194a5e 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -51,7 +51,7 @@ MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200 BLOG_PARSER_SETTINGS = { # "2022": ("ukcavingblog.html", "parser_blog"), "2019": ("ukcavingblog.html", "parser_blog"), - "2018": ("ukcavingblog.html", "parser_blog"), +# "2018": ("ukcavingblog.html", "parser_blog"), # "2017": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html } DEFAULT_LOGBOOK_FILE = "logbook.html" @@ -83,7 +83,7 @@ LOGBOOK_PARSER_SETTINGS = { "1982": ("log.htm", "parser_html_01"), } -entries = { "2022": 86, "2019": 56, "2018": 86, "2017": 76, "2016": 83, "2015": 79, +entries = { "2022": 86, "2019": 56, "2018": 100, "2017": 76, "2016": 83, "2015": 79, "2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 53, "2008": 49, "2007": 113, "2006": 60, "2005": 55, "2004": 76, "2003": 42, "2002": 31, "2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 95, "1995": 42, @@ -138,6 +138,9 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None): tripperson = "Nadia" if tripperson =="tcacrossley": tripperson = "Tom Crossley" + if tripperson =="Samouse1": + tripperson = "Todd Rye" + personyear = GetPersonExpeditionNameLookup(expedition).get(tripperson.lower()) @@ -497,6 +500,13 @@ def parser_blog(year, expedition, txt, sq=""): This uses some of the more obscure capabilities of regular expressions, see https://docs.python.org/3/library/re.html + + BLOG entries have this structure: +
+
+
+
+ So the content is nested inside the header. Attachments (images) come after the content. ''' global logentries global logdataissues @@ -508,19 +518,26 @@ def parser_blog(year, expedition, txt, sq=""): print(message) # (?= is a non-consuming match, see https://docs.python.org/3/library/re.html - tripparas = re.findall(r"
\s*([\s\S]*?)(?=\s*([\s\S]*?)(]*>)([\s\S]*?)(?=[\s\S]*?(?=)","",attach) + attach = re.sub(r")","",attach) + tripcontent = tripstuff[0] + attach + #print(f"{i} - {len(tripstuff)} - {tripstuff[1]}") triphead = tripheads[i] logbook_entry_count += 1 tid = set_trip_id(year,logbook_entry_count) +"_blog" + sq @@ -684,8 +701,15 @@ def LoadLogbook(year): nlbe={} TROG['pagecache']['expedition'][year] = None # clear cache - expo = Expedition.objects.get(year=year) + expo = Expedition.objects.get(year=year) + year = expo.year # some type funny nlbe[expo] = LoadLogbookForExpedition(expo) # this actually loads the logbook for one expo + if year in BLOG_PARSER_SETTINGS: + print("BLOG parsing") + LOGBOOK_PARSER_SETTINGS[year] = BLOG_PARSER_SETTINGS[year] + nlbe[expo] = LoadLogbookForExpedition(expo, clean=False) # this loads the blog logbook for one expo + else: + print(f" {year} not in {BLOG_PARSER_SETTINGS}") def LoadLogbooks(): """ This is the master function for parsing all logbooks into the Troggle database. diff --git a/templates/logbook2005style.html b/templates/logbook2005style.html index bf5534c..0104070 100644 --- a/templates/logbook2005style.html +++ b/templates/logbook2005style.html @@ -4,6 +4,7 @@ {{logbook_entries.0.expedition}} Expo Logbook +