fix frontmatter/endmatter

This commit is contained in:
Philip Sargent 2022-12-21 02:05:26 +00:00
parent 517da57a0c
commit ec040824f6
3 changed files with 61 additions and 18 deletions

View File

@ -181,7 +181,16 @@ def exportlogbook(request,year=None,extension=None):
response['Content-Disposition'] = 'attachment; filename='+filename response['Content-Disposition'] = 'attachment; filename='+filename
t=loader.get_template(template) t=loader.get_template(template)
logbookfile = (t.render({'logbook_entries':logbook_entries})) logbookfile = (t.render({'logbook_entries':logbook_entries}))
endpath = Path(settings.EXPOWEB, "years", year, "endmatter.html")
endmatter = ""
if endpath.is_file():
try:
with open(endpath,"r") as end:
endmatter = end.read()
except:
print(" ! Very Bad Error opening " + endpath)
frontpath = Path(settings.EXPOWEB, "years", year, "frontmatter.html") frontpath = Path(settings.EXPOWEB, "years", year, "frontmatter.html")
if frontpath.is_file(): if frontpath.is_file():
try: try:
@ -189,9 +198,9 @@ def exportlogbook(request,year=None,extension=None):
frontmatter = front.read() frontmatter = front.read()
except: except:
print(" ! Very Bad Error opening " + frontpath) print(" ! Very Bad Error opening " + frontpath)
logbookfile = re.sub(r"<body>", "<body>\n"+frontmatter , logbookfile) logbookfile = re.sub(r"<body>", "<body>\n"+frontmatter+endmatter , logbookfile)
else: else:
logbookfile = re.sub(r"<body>", f"<body>\n<h1>Expo {year}</h1>\n", logbookfile) logbookfile = re.sub(r"<body>", f"<body>\n<h1>Expo {year}</h1>\n"+endmatter, logbookfile)
dir = Path(settings.EXPOWEB) / "years" / year dir = Path(settings.EXPOWEB) / "years" / year

View File

@ -41,7 +41,7 @@ def import_logbooks():
with transaction.atomic(): with transaction.atomic():
troggle.parsers.logbooks.LoadLogbooks() troggle.parsers.logbooks.LoadLogbooks()
def import_logbook(year=1989): def import_logbook(year=1992):
print(f"-- Importing Logbook {year}") print(f"-- Importing Logbook {year}")
with transaction.atomic(): with transaction.atomic():
troggle.parsers.logbooks.LoadLogbook(year) troggle.parsers.logbooks.LoadLogbook(year)

View File

@ -49,18 +49,15 @@ data for old logbooks. New design needed, with a mechanism for flagging fixtures
''' '''
MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200 MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200
BLOG_PARSER_SETTINGS = { BLOG_PARSER_SETTINGS = {
# "2022": ("ukcavingblog.html", "parser_blog"), # "2022": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html
# "2019": ("ukcavingblog.html", "parser_blog"), # "2019": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html
# "2018": ("ukcavingblog.html", "parser_blog"), # "2018": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html
# "2017": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html # "2017": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html
} }
DEFAULT_LOGBOOK_FILE = "logbook.html" DEFAULT_LOGBOOK_FILE = "logbook.html"
DEFAULT_LOGBOOK_PARSER = "parser_html" DEFAULT_LOGBOOK_PARSER = "parser_html"
# All years since 2002 use the default value for Logbook parser # All years since 2002 use the default value for Logbook parser
LOGBOOK_PARSER_SETTINGS = { LOGBOOK_PARSER_SETTINGS = {
# "2009": ("2009logbook.txt", "wiki_parser"), # converted to html
# "2008": ("2008logbook.txt", "wiki_parser"), # converted to html
# "2006": ("logbook/logbook_06.txt", "wiki_parser"), # converted to html
"2002": ("logbook.html", "parser_html"), "2002": ("logbook.html", "parser_html"),
"2001": ("log.htm", "parser_html_01"), "2001": ("log.htm", "parser_html_01"),
"2000": ("log.htm", "parser_html_01"), "2000": ("log.htm", "parser_html_01"),
@ -71,9 +68,9 @@ LOGBOOK_PARSER_SETTINGS = {
"1995": ("log.htm", "parser_html_01"), "1995": ("log.htm", "parser_html_01"),
"1994": ("log.htm", "parser_html_01"), "1994": ("log.htm", "parser_html_01"),
"1993": ("log.htm", "parser_html_01"), "1993": ("log.htm", "parser_html_01"),
"1992": ("log.htm", "parser_html_01"), "1992": ("logbook.html", "parser_html"),
"1991": ("log.htm", "parser_html_01"), "1991": ("logbook.html", "parser_html"),
"1990": ("log.htm", "parser_html_01"), "1990": ("logbook.html", "parser_html"),
"1989": ("logbook.html", "parser_html"), "1989": ("logbook.html", "parser_html"),
"1988": ("logbook.html", "parser_html"), "1988": ("logbook.html", "parser_html"),
"1987": ("logbook.html", "parser_html"), "1987": ("logbook.html", "parser_html"),
@ -83,13 +80,12 @@ LOGBOOK_PARSER_SETTINGS = {
"1982": ("logbook.html", "parser_html"), "1982": ("logbook.html", "parser_html"),
} }
entries = { "2022": 86, "2019": 56, "2018": 100, "2017": 76, "2016": 83, "2015": 79, entries = { "2022": 86, "2019": 56, "2018": 100, "2017": 76, "2016": 83, "2015": 80,
"2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 53, "2014": 65, "2013": 52, "2012": 75, "2011": 71, "2010": 22, "2009": 53,
"2008": 49, "2007": 113, "2006": 60, "2005": 55, "2004": 76, "2003": 42, "2002": 31, "2008": 49, "2007": 113, "2006": 60, "2005": 55, "2004": 76, "2003": 42, "2002": 31,
"2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 95, "1995": 42, "2001": 49, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 95, "1995": 42,
"1994": 32, "1993": 41, "1992": 62, "1991": 39, "1990": 87, "1989": 63,"1988": 61,"1987": 34, "1994": 32, "1993": 41, "1992": 62, "1991": 39, "1990": 87, "1989": 63,"1988": 61,"1987": 34,
"1985": 24, "1984": 32, "1983": 52, "1982": 42,} "1985": 24, "1984": 32, "1983": 52, "1982": 42,}
# Logbooks log.htm exist for 87, 88, 89 but have no full-working parser, or need hand-editing.
logentries = [] # the entire logbook for one year is a single object: a list of entries logentries = [] # the entire logbook for one year is a single object: a list of entries
noncaveplaces = [ "Journey", "Loser Plateau", "UNKNOWN", 'plateau', noncaveplaces = [ "Journey", "Loser Plateau", "UNKNOWN", 'plateau',
@ -290,10 +286,14 @@ def ParseDate(tripdate, year):
logdataissues["tripdate"]=message logdataissues["tripdate"]=message
return datetime.date(1970, 1, 1) return datetime.date(1970, 1, 1)
# 2002, 2004 - now # 2002 - now
def parser_html(year, expedition, txt, seq=""): def parser_html(year, expedition, txt, seq=""):
'''This uses some of the more obscure capabilities of regular expressions, '''This uses some of the more obscure capabilities of regular expressions,
see https://docs.python.org/3/library/re.html see https://docs.python.org/3/library/re.html
You can't see it here, but a round-trip export-then-import will move
the endmatter up to the frontmatter. This makes sense when moving
from parser_html_01 format logfiles, believe me.
''' '''
global logentries global logentries
global logdataissues global logdataissues
@ -307,6 +307,16 @@ def parser_html(year, expedition, txt, seq=""):
frontpath = Path(settings.EXPOWEB, "years", year, "frontmatter.html") frontpath = Path(settings.EXPOWEB, "years", year, "frontmatter.html")
with open(frontpath,"w") as front: with open(frontpath,"w") as front:
front.write(headpara+"\n") front.write(headpara+"\n")
# extract END material and stash for later use when rebuilding from list of entries
endmatch = re.match(r"(?i)(?s).*<hr\s*/>([\s\S]*?)(?=</body)", txt)
endpara = endmatch.groups()[0].strip()
# print(f" - endpara:\n'{endpara}'")
if(len(endpara)>0):
endpath = Path(settings.EXPOWEB, "years", year, "endmatter.html")
with open(endpath,"w") as end:
end.write(endpara+"\n")
tripparas = re.findall(r"<hr\s*/>([\s\S]*?)(?=<hr)", txt) tripparas = re.findall(r"<hr\s*/>([\s\S]*?)(?=<hr)", txt)
logbook_entry_count = 0 logbook_entry_count = 0
@ -371,7 +381,31 @@ def parser_html_01(year, expedition, txt, seq=""):
global logentries global logentries
global logdataissues global logdataissues
errorcount = 0 errorcount = 0
# extract front material and stash for later use when rebuilding from list of entries
headmatch = re.match(r"(?i)(?s).*<body[^>]*>(.*?)<hr.*", txt)
headpara = headmatch.groups()[0].strip()
# print(f" - headpara:\n'{headpara}'")
if(len(headpara)>0):
frontpath = Path(settings.EXPOWEB, "years", year, "frontmatter.html")
with open(frontpath,"w") as front:
front.write(headpara+"\n")
# extract END material and stash for later use when rebuilding from list of entries
endmatch = re.match(r"(?i)(?s).*<hr\s*/>([\s\S]*?)(?=</body)", txt)
if endmatch:
endpara = endmatch.groups()[0].strip()
else:
print(f" ! - {year} NO endmatch")
endpara = ""
# print(f" - endpara:\n'{endpara}'")
if(len(endpara)>0):
endpath = Path(settings.EXPOWEB, "years", year, "endmatter.html")
with open(endpath,"w") as end:
end.write(endpara+"\n")
tripparas = re.findall(r"<hr[\s/]*>([\s\S]*?)(?=<hr)", txt) tripparas = re.findall(r"<hr[\s/]*>([\s\S]*?)(?=<hr)", txt)
logbook_entry_count = 0 logbook_entry_count = 0
for trippara in tripparas: for trippara in tripparas: