forked from expo/troggle
Updates to make 2018 blog merge work (faster)
This commit is contained in:
parent
7e9bb73777
commit
bb14c94ab1
@ -171,6 +171,7 @@ class Wallet(models.Model):
|
|||||||
'''
|
'''
|
||||||
fpath = models.CharField(max_length=200)
|
fpath = models.CharField(max_length=200)
|
||||||
walletname = models.CharField(max_length=200)
|
walletname = models.CharField(max_length=200)
|
||||||
|
walletdate = models.DateField(blank=True, null=True)
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
ordering = ('walletname',)
|
ordering = ('walletname',)
|
||||||
@ -238,6 +239,8 @@ class Wallet(models.Model):
|
|||||||
|
|
||||||
# Yes this is horribly, horribly inefficient, esp. for a page that have date, people and cave in it
|
# Yes this is horribly, horribly inefficient, esp. for a page that have date, people and cave in it
|
||||||
def date(self):
|
def date(self):
|
||||||
|
if self.walletdate:
|
||||||
|
return self.walletdate
|
||||||
if not self.get_json():
|
if not self.get_json():
|
||||||
return None
|
return None
|
||||||
jsondata = self.get_json()
|
jsondata = self.get_json()
|
||||||
@ -254,7 +257,9 @@ class Wallet(models.Model):
|
|||||||
samedate = datetime.date.fromisoformat(datestr[:10])
|
samedate = datetime.date.fromisoformat(datestr[:10])
|
||||||
except:
|
except:
|
||||||
samedate = None
|
samedate = None
|
||||||
return samedate.isoformat()
|
self.walletdate = samedate.isoformat()
|
||||||
|
self.save()
|
||||||
|
return self.walletdate
|
||||||
|
|
||||||
def people(self):
|
def people(self):
|
||||||
if not self.get_json():
|
if not self.get_json():
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
import datetime
|
import datetime
|
||||||
|
import time
|
||||||
import os.path
|
import os.path
|
||||||
import re
|
import re
|
||||||
|
|
||||||
@ -186,24 +187,26 @@ def personexpedition(request, first_name='', last_name='', year=''):
|
|||||||
|
|
||||||
|
|
||||||
def logbookentry(request, date, slug):
|
def logbookentry(request, date, slug):
|
||||||
this_logbookentry = LogbookEntry.objects.filter(date=date, slug=slug)
|
# start = time.time()
|
||||||
|
trips = LogbookEntry.objects.filter(date=date) # all the trips not just this one
|
||||||
|
this_logbookentry = trips.filter(date=date, slug=slug)
|
||||||
|
|
||||||
if this_logbookentry:
|
if this_logbookentry:
|
||||||
if len(this_logbookentry)>1:
|
if len(this_logbookentry)>1:
|
||||||
return render(request, 'object_list.html',{'object_list':this_logbookentry})
|
return render(request, 'object_list.html',{'object_list':this_logbookentry})
|
||||||
else:
|
else:
|
||||||
trips = LogbookEntry.objects.filter(date=date)
|
|
||||||
wallets = set()
|
wallets = set()
|
||||||
refwallets = Wallet.objects.filter(survexblock__date=date)
|
allwallets = Wallet.objects.all()
|
||||||
|
refwallets = allwallets.filter(survexblock__date=date)
|
||||||
for r in refwallets:
|
for r in refwallets:
|
||||||
wallets.add(r)
|
wallets.add(r)
|
||||||
|
|
||||||
allwallets = Wallet.objects.all()
|
|
||||||
# Note that w.year() only works for wallets which have a valid JSON file existing
|
# Note that w.year() only works for wallets which have a valid JSON file existing
|
||||||
for w in allwallets:
|
# This is very slow with a big lag as w.date() is a computed field
|
||||||
if w.date() == date:
|
# Noticably slow with WSL2 and NTFS filesystem, even with caching as walletdate.
|
||||||
wallets.add(w)
|
jwallets = allwallets.filter(walletdate=date)
|
||||||
|
for j in jwallets:
|
||||||
|
wallets.add(j)
|
||||||
thisexpo = this_expedition = Expedition.objects.get(year=int(date[0:4]))
|
thisexpo = this_expedition = Expedition.objects.get(year=int(date[0:4]))
|
||||||
if thisexpo:
|
if thisexpo:
|
||||||
expeditionday = thisexpo.get_expedition_day(date)
|
expeditionday = thisexpo.get_expedition_day(date)
|
||||||
@ -214,6 +217,8 @@ def logbookentry(request, date, slug):
|
|||||||
this_logbookentry=this_logbookentry[0]
|
this_logbookentry=this_logbookentry[0]
|
||||||
# This is the only page that uses presontrip_next and persontrip_prev
|
# This is the only page that uses presontrip_next and persontrip_prev
|
||||||
# and it is calculated on the fly in the model
|
# and it is calculated on the fly in the model
|
||||||
|
# duration = time.time()-start
|
||||||
|
# print(f"--- Render after {duration:.2f} seconds")
|
||||||
return render(request, 'logbookentry.html',
|
return render(request, 'logbookentry.html',
|
||||||
{'logbookentry': this_logbookentry, 'trips': trips, 'svxothers': svxothers, 'wallets': wallets})
|
{'logbookentry': this_logbookentry, 'trips': trips, 'svxothers': svxothers, 'wallets': wallets})
|
||||||
else:
|
else:
|
||||||
|
@ -41,9 +41,8 @@ def import_logbooks():
|
|||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
troggle.parsers.logbooks.LoadLogbooks()
|
troggle.parsers.logbooks.LoadLogbooks()
|
||||||
|
|
||||||
def import_logbook(year=2022):
|
def import_logbook(year=2018):
|
||||||
print(f"-- Importing Logbook {year}")
|
print(f"-- Importing Logbook {year}")
|
||||||
print(f"-- - commented out")
|
|
||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
troggle.parsers.logbooks.LoadLogbook(year)
|
troggle.parsers.logbooks.LoadLogbook(year)
|
||||||
|
|
||||||
|
@ -51,7 +51,7 @@ MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200
|
|||||||
BLOG_PARSER_SETTINGS = {
|
BLOG_PARSER_SETTINGS = {
|
||||||
# "2022": ("ukcavingblog.html", "parser_blog"),
|
# "2022": ("ukcavingblog.html", "parser_blog"),
|
||||||
"2019": ("ukcavingblog.html", "parser_blog"),
|
"2019": ("ukcavingblog.html", "parser_blog"),
|
||||||
"2018": ("ukcavingblog.html", "parser_blog"),
|
# "2018": ("ukcavingblog.html", "parser_blog"),
|
||||||
# "2017": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html
|
# "2017": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html
|
||||||
}
|
}
|
||||||
DEFAULT_LOGBOOK_FILE = "logbook.html"
|
DEFAULT_LOGBOOK_FILE = "logbook.html"
|
||||||
@ -83,7 +83,7 @@ LOGBOOK_PARSER_SETTINGS = {
|
|||||||
"1982": ("log.htm", "parser_html_01"),
|
"1982": ("log.htm", "parser_html_01"),
|
||||||
}
|
}
|
||||||
|
|
||||||
entries = { "2022": 86, "2019": 56, "2018": 86, "2017": 76, "2016": 83, "2015": 79,
|
entries = { "2022": 86, "2019": 56, "2018": 100, "2017": 76, "2016": 83, "2015": 79,
|
||||||
"2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 53,
|
"2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 53,
|
||||||
"2008": 49, "2007": 113, "2006": 60, "2005": 55, "2004": 76, "2003": 42, "2002": 31,
|
"2008": 49, "2007": 113, "2006": 60, "2005": 55, "2004": 76, "2003": 42, "2002": 31,
|
||||||
"2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 95, "1995": 42,
|
"2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 95, "1995": 42,
|
||||||
@ -138,6 +138,9 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
|
|||||||
tripperson = "Nadia"
|
tripperson = "Nadia"
|
||||||
if tripperson =="tcacrossley":
|
if tripperson =="tcacrossley":
|
||||||
tripperson = "Tom Crossley"
|
tripperson = "Tom Crossley"
|
||||||
|
if tripperson =="Samouse1":
|
||||||
|
tripperson = "Todd Rye"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
personyear = GetPersonExpeditionNameLookup(expedition).get(tripperson.lower())
|
personyear = GetPersonExpeditionNameLookup(expedition).get(tripperson.lower())
|
||||||
@ -497,6 +500,13 @@ def parser_blog(year, expedition, txt, sq=""):
|
|||||||
|
|
||||||
This uses some of the more obscure capabilities of regular expressions,
|
This uses some of the more obscure capabilities of regular expressions,
|
||||||
see https://docs.python.org/3/library/re.html
|
see https://docs.python.org/3/library/re.html
|
||||||
|
|
||||||
|
BLOG entries have this structure:
|
||||||
|
<article ... data-author="Tinywoman" data-content="post-298780" id="js-post-298780">
|
||||||
|
<article class="message-body js-selectToQuote">
|
||||||
|
</article>
|
||||||
|
</article>
|
||||||
|
So the content is nested inside the header. Attachments (images) come after the content.
|
||||||
'''
|
'''
|
||||||
global logentries
|
global logentries
|
||||||
global logdataissues
|
global logdataissues
|
||||||
@ -508,19 +518,26 @@ def parser_blog(year, expedition, txt, sq=""):
|
|||||||
print(message)
|
print(message)
|
||||||
|
|
||||||
# (?= is a non-consuming match, see https://docs.python.org/3/library/re.html
|
# (?= is a non-consuming match, see https://docs.python.org/3/library/re.html
|
||||||
tripparas = re.findall(r"<article class=\"message-body js-selectToQuote\"\>\s*([\s\S]*?)(?=</article)", txt)
|
tripparas = re.findall(r"<article class=\"message-body js-selectToQuote\"\>\s*([\s\S]*?)(</article[^>]*>)([\s\S]*?)(?=</article)", txt)
|
||||||
if not ( tripparas ) :
|
if not ( tripparas ) :
|
||||||
message = f" ! - Skipping on failure to parse article content: {txt[:500]}"
|
message = f" ! - Skipping on failure to parse article content: {txt[:500]}"
|
||||||
print(message)
|
print(message)
|
||||||
|
|
||||||
if (len(tripheads) !=len(tripparas)):
|
if (len(tripheads) !=len(tripparas)):
|
||||||
print(f"{len(tripheads)} != {len(tripparas)}")
|
print(f"{len(tripheads)} != {len(tripparas)}")
|
||||||
|
print(f"{len(tripheads)} - {len(tripparas)}")
|
||||||
|
|
||||||
location = "Plateau" # best guess, fix manually later
|
location = "Plateau" # best guess, fix manually later
|
||||||
tu = 0
|
tu = 0
|
||||||
logbook_entry_count = 0
|
logbook_entry_count = 0
|
||||||
for i in range(0, len(tripparas)):
|
for i in range(0, len(tripparas)):
|
||||||
tripcontent = tripparas[i]
|
tripstuff = tripparas[i]
|
||||||
|
attach = tripstuff[2]
|
||||||
|
# note use on non-greedy *? regex idiom here
|
||||||
|
attach = re.sub(r"<div class=\"file-content\">[\s\S]*?(?=</li>)","",attach)
|
||||||
|
attach = re.sub(r"<footer[\s\S]*(</footer>)","",attach)
|
||||||
|
tripcontent = tripstuff[0] + attach
|
||||||
|
#print(f"{i} - {len(tripstuff)} - {tripstuff[1]}")
|
||||||
triphead = tripheads[i]
|
triphead = tripheads[i]
|
||||||
logbook_entry_count += 1
|
logbook_entry_count += 1
|
||||||
tid = set_trip_id(year,logbook_entry_count) +"_blog" + sq
|
tid = set_trip_id(year,logbook_entry_count) +"_blog" + sq
|
||||||
@ -685,7 +702,14 @@ def LoadLogbook(year):
|
|||||||
TROG['pagecache']['expedition'][year] = None # clear cache
|
TROG['pagecache']['expedition'][year] = None # clear cache
|
||||||
|
|
||||||
expo = Expedition.objects.get(year=year)
|
expo = Expedition.objects.get(year=year)
|
||||||
|
year = expo.year # some type funny
|
||||||
nlbe[expo] = LoadLogbookForExpedition(expo) # this actually loads the logbook for one expo
|
nlbe[expo] = LoadLogbookForExpedition(expo) # this actually loads the logbook for one expo
|
||||||
|
if year in BLOG_PARSER_SETTINGS:
|
||||||
|
print("BLOG parsing")
|
||||||
|
LOGBOOK_PARSER_SETTINGS[year] = BLOG_PARSER_SETTINGS[year]
|
||||||
|
nlbe[expo] = LoadLogbookForExpedition(expo, clean=False) # this loads the blog logbook for one expo
|
||||||
|
else:
|
||||||
|
print(f" {year} not in {BLOG_PARSER_SETTINGS}")
|
||||||
|
|
||||||
def LoadLogbooks():
|
def LoadLogbooks():
|
||||||
""" This is the master function for parsing all logbooks into the Troggle database.
|
""" This is the master function for parsing all logbooks into the Troggle database.
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||||
<title>{{logbook_entries.0.expedition}} Expo Logbook</title>
|
<title>{{logbook_entries.0.expedition}} Expo Logbook</title>
|
||||||
<link rel="stylesheet" href="../../css/main2.css" />
|
<link rel="stylesheet" href="../../css/main2.css" />
|
||||||
|
<meta name="keywords" content="NOEDIT">
|
||||||
<style>figure {font-weight: bold; font-size: small; font-family: sans-serif;font-variant-caps: small-caps;}</style>
|
<style>figure {font-weight: bold; font-size: small; font-family: sans-serif;font-variant-caps: small-caps;}</style>
|
||||||
</head>
|
</head>
|
||||||
<!-- Exported by troggle in this format after having been imported using a different format and a different
|
<!-- Exported by troggle in this format after having been imported using a different format and a different
|
||||||
|
Loading…
Reference in New Issue
Block a user