fix frontmatter/endmatter

2022-12-21 02:05:26 +00:00
parent 517da57a0c
commit ec040824f6
3 changed files with 61 additions and 18 deletions
--- a/core/views/other.py
+++ b/core/views/other.py
@@ -181,7 +181,16 @@ def exportlogbook(request,year=None,extension=None):
        response['Content-Disposition'] = 'attachment; filename='+filename
        t=loader.get_template(template)
        logbookfile = (t.render({'logbook_entries':logbook_entries}))
-        
+
        endpath = Path(settings.EXPOWEB, "years", year, "endmatter.html")
        endmatter = ""
        if endpath.is_file():         
            try:
                with open(endpath,"r") as end:
                    endmatter = end.read()                    
            except:
                print("   ! Very Bad Error opening " + endpath)
        frontpath = Path(settings.EXPOWEB, "years", year, "frontmatter.html")
        if frontpath.is_file():         
            try:
@@ -189,9 +198,9 @@ def exportlogbook(request,year=None,extension=None):
                    frontmatter = front.read()                    
            except:
                print("   ! Very Bad Error opening " + frontpath)
-            logbookfile = re.sub(r"<body>", "<body>\n"+frontmatter , logbookfile)     
+            logbookfile = re.sub(r"<body>", "<body>\n"+frontmatter+endmatter , logbookfile)     
        else:
-            logbookfile = re.sub(r"<body>", f"<body>\n<h1>Expo {year}</h1>\n", logbookfile)     
+            logbookfile = re.sub(r"<body>", f"<body>\n<h1>Expo {year}</h1>\n"+endmatter, logbookfile)     
        dir = Path(settings.EXPOWEB) / "years" / year
--- a/parsers/imports.py
+++ b/parsers/imports.py
@@ -41,7 +41,7 @@ def import_logbooks():
    with transaction.atomic():
        troggle.parsers.logbooks.LoadLogbooks()
-def import_logbook(year=1989):
+def import_logbook(year=1992):
    print(f"-- Importing Logbook {year}")
    with transaction.atomic():
        troggle.parsers.logbooks.LoadLogbook(year)
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -49,18 +49,15 @@ data for old logbooks. New design needed, with a mechanism for flagging fixtures
 '''
 MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200
 BLOG_PARSER_SETTINGS = {
-#               "2022": ("ukcavingblog.html", "parser_blog"), 
+#               "2022": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html
-#               "2019": ("ukcavingblog.html", "parser_blog"), 
+#               "2019": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html
-#               "2018": ("ukcavingblog.html", "parser_blog"), 
+#               "2018": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html
 #               "2017": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html
            }
 DEFAULT_LOGBOOK_FILE = "logbook.html"
 DEFAULT_LOGBOOK_PARSER = "parser_html"
 # All years since 2002 use the default value for Logbook parser
 LOGBOOK_PARSER_SETTINGS = {               
 #               "2009": ("2009logbook.txt", "wiki_parser"), # converted to html
 #               "2008": ("2008logbook.txt", "wiki_parser"), # converted to html
 #               "2006": ("logbook/logbook_06.txt", "wiki_parser"), # converted to html
                "2002": ("logbook.html", "parser_html"), 
                "2001": ("log.htm", "parser_html_01"), 
                "2000": ("log.htm", "parser_html_01"), 
@@ -71,9 +68,9 @@ LOGBOOK_PARSER_SETTINGS = {
                "1995": ("log.htm", "parser_html_01"), 
                "1994": ("log.htm", "parser_html_01"), 
                "1993": ("log.htm", "parser_html_01"), 
-                "1992": ("log.htm", "parser_html_01"), 
+                "1992": ("logbook.html", "parser_html"), 
-                "1991": ("log.htm", "parser_html_01"), 
+                "1991": ("logbook.html", "parser_html"), 
-                "1990": ("log.htm", "parser_html_01"), 
+                "1990": ("logbook.html", "parser_html"), 
                "1989": ("logbook.html", "parser_html"), 
                "1988": ("logbook.html", "parser_html"), 
                "1987": ("logbook.html", "parser_html"), 
@@ -83,13 +80,12 @@ LOGBOOK_PARSER_SETTINGS = {
                "1982": ("logbook.html", "parser_html"), 
            }
-entries = { "2022": 86, "2019": 56, "2018": 100, "2017": 76, "2016": 83, "2015": 79, 
+entries = { "2022": 86, "2019": 56, "2018": 100, "2017": 76, "2016": 83, "2015": 80, 
-    "2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 53, 
+    "2014": 65, "2013": 52, "2012": 75, "2011": 71, "2010": 22, "2009": 53, 
    "2008": 49, "2007": 113, "2006": 60, "2005": 55, "2004": 76, "2003": 42, "2002": 31, 
-    "2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 95, "1995": 42, 
+    "2001": 49, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 95, "1995": 42, 
    "1994": 32, "1993": 41, "1992": 62, "1991": 39, "1990": 87, "1989": 63,"1988": 61,"1987": 34,
    "1985": 24, "1984": 32, "1983": 52, "1982": 42,}
 # Logbooks log.htm exist for 87, 88, 89 but have no full-working parser, or need hand-editing.
 logentries = [] # the entire logbook for one year is a single object: a list of entries
 noncaveplaces = [ "Journey", "Loser Plateau", "UNKNOWN", 'plateau', 
@@ -290,10 +286,14 @@ def ParseDate(tripdate, year):
        logdataissues["tripdate"]=message
        return datetime.date(1970, 1, 1)
-# 2002, 2004 - now
+# 2002 - now
 def parser_html(year, expedition, txt, seq=""):
    '''This uses some of the more obscure capabilities of regular expressions,
    see https://docs.python.org/3/library/re.html
    You can't see it here, but a round-trip export-then-import will move
    the endmatter up to the frontmatter. This makes sense when moving
    from parser_html_01 format logfiles, believe me.
    '''
    global logentries
    global logdataissues
@@ -307,6 +307,16 @@ def parser_html(year, expedition, txt, seq=""):
        frontpath = Path(settings.EXPOWEB, "years", year, "frontmatter.html")
        with open(frontpath,"w") as front:
            front.write(headpara+"\n")
    # extract END material and stash for later use when rebuilding from list of entries
    endmatch = re.match(r"(?i)(?s).*<hr\s*/>([\s\S]*?)(?=</body)", txt)
    endpara = endmatch.groups()[0].strip()
    # print(f" - endpara:\n'{endpara}'")
    if(len(endpara)>0):
        endpath = Path(settings.EXPOWEB, "years", year, "endmatter.html")
        with open(endpath,"w") as end:
            end.write(endpara+"\n")            
    tripparas = re.findall(r"<hr\s*/>([\s\S]*?)(?=<hr)", txt)
    logbook_entry_count = 0
@@ -371,7 +381,31 @@ def parser_html_01(year, expedition, txt, seq=""):
    global logentries
    global logdataissues
    errorcount = 0
    # extract front material and stash for later use when rebuilding from list of entries
    headmatch = re.match(r"(?i)(?s).*<body[^>]*>(.*?)<hr.*", txt)
    headpara = headmatch.groups()[0].strip()
    # print(f" - headpara:\n'{headpara}'")
    if(len(headpara)>0):
        frontpath = Path(settings.EXPOWEB, "years", year, "frontmatter.html")
        with open(frontpath,"w") as front:
            front.write(headpara+"\n")
    # extract END material and stash for later use when rebuilding from list of entries
    endmatch = re.match(r"(?i)(?s).*<hr\s*/>([\s\S]*?)(?=</body)", txt)
    if endmatch:
        endpara = endmatch.groups()[0].strip()
    else:
        print(f" ! - {year} NO endmatch")
        endpara = ""
    # print(f" - endpara:\n'{endpara}'")
    if(len(endpara)>0):
        endpath = Path(settings.EXPOWEB, "years", year, "endmatter.html")
        with open(endpath,"w") as end:
            end.write(endpara+"\n")     
    tripparas = re.findall(r"<hr[\s/]*>([\s\S]*?)(?=<hr)", txt)
    logbook_entry_count = 0
    for trippara in tripparas: