From ec040824f632ca31e074fdba4dc535a10785ab75 Mon Sep 17 00:00:00 2001
From: Philip Sargent <philip.sargent@gmail.com>
Date: Wed, 21 Dec 2022 02:05:26 +0000
Subject: [PATCH] fix frontmatter/endmatter

---
 core/views/other.py | 15 ++++++++---
 parsers/imports.py  |  2 +-
 parsers/logbooks.py | 62 +++++++++++++++++++++++++++++++++++----------
 3 files changed, 61 insertions(+), 18 deletions(-)
diff --git a/core/views/other.py b/core/views/other.py
index b590f21..167e0b2 100644
--- a/core/views/other.py
+++ b/core/views/other.py
@@ -181,7 +181,16 @@ def exportlogbook(request,year=None,extension=None):
         response['Content-Disposition'] = 'attachment; filename='+filename
         t=loader.get_template(template)
         logbookfile = (t.render({'logbook_entries':logbook_entries}))
-        
+
+        endpath = Path(settings.EXPOWEB, "years", year, "endmatter.html")
+        endmatter = ""
+        if endpath.is_file():         
+            try:
+                with open(endpath,"r") as end:
+                    endmatter = end.read()                    
+            except:
+                print("   ! Very Bad Error opening " + endpath)
+
         frontpath = Path(settings.EXPOWEB, "years", year, "frontmatter.html")
         if frontpath.is_file():         
             try:
@@ -189,9 +198,9 @@ def exportlogbook(request,year=None,extension=None):
                     frontmatter = front.read()                    
             except:
                 print("   ! Very Bad Error opening " + frontpath)
-            logbookfile = re.sub(r"<body>", "<body>\n"+frontmatter , logbookfile)     
+            logbookfile = re.sub(r"<body>", "<body>\n"+frontmatter+endmatter , logbookfile)     
         else:
-            logbookfile = re.sub(r"<body>", f"<body>\n<h1>Expo {year}</h1>\n", logbookfile)     
+            logbookfile = re.sub(r"<body>", f"<body>\n<h1>Expo {year}</h1>\n"+endmatter, logbookfile)     
 
 
         dir = Path(settings.EXPOWEB) / "years" / year
diff --git a/parsers/imports.py b/parsers/imports.py
index 4b531bc..ac671dc 100644
--- a/parsers/imports.py
+++ b/parsers/imports.py
@@ -41,7 +41,7 @@ def import_logbooks():
     with transaction.atomic():
         troggle.parsers.logbooks.LoadLogbooks()
 
-def import_logbook(year=1989):
+def import_logbook(year=1992):
     print(f"-- Importing Logbook {year}")
     with transaction.atomic():
         troggle.parsers.logbooks.LoadLogbook(year)
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index 5a1a2f4..34b564e 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -49,18 +49,15 @@ data for old logbooks. New design needed, with a mechanism for flagging fixtures
 '''
 MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200
 BLOG_PARSER_SETTINGS = {
-#               "2022": ("ukcavingblog.html", "parser_blog"), 
-#               "2019": ("ukcavingblog.html", "parser_blog"), 
-#               "2018": ("ukcavingblog.html", "parser_blog"), 
+#               "2022": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html
+#               "2019": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html
+#               "2018": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html
 #               "2017": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html
             }
 DEFAULT_LOGBOOK_FILE = "logbook.html"
 DEFAULT_LOGBOOK_PARSER = "parser_html"
 # All years since 2002 use the default value for Logbook parser
 LOGBOOK_PARSER_SETTINGS = {               
-#               "2009": ("2009logbook.txt", "wiki_parser"), # converted to html
-#               "2008": ("2008logbook.txt", "wiki_parser"), # converted to html
-#               "2006": ("logbook/logbook_06.txt", "wiki_parser"), # converted to html
                 "2002": ("logbook.html", "parser_html"), 
                 "2001": ("log.htm", "parser_html_01"), 
                 "2000": ("log.htm", "parser_html_01"), 
@@ -71,9 +68,9 @@ LOGBOOK_PARSER_SETTINGS = {
                 "1995": ("log.htm", "parser_html_01"), 
                 "1994": ("log.htm", "parser_html_01"), 
                 "1993": ("log.htm", "parser_html_01"), 
-                "1992": ("log.htm", "parser_html_01"), 
-                "1991": ("log.htm", "parser_html_01"), 
-                "1990": ("log.htm", "parser_html_01"), 
+                "1992": ("logbook.html", "parser_html"), 
+                "1991": ("logbook.html", "parser_html"), 
+                "1990": ("logbook.html", "parser_html"), 
                 "1989": ("logbook.html", "parser_html"), 
                 "1988": ("logbook.html", "parser_html"), 
                 "1987": ("logbook.html", "parser_html"), 
@@ -83,13 +80,12 @@ LOGBOOK_PARSER_SETTINGS = {
                 "1982": ("logbook.html", "parser_html"), 
             }
 
-entries = { "2022": 86, "2019": 56, "2018": 100, "2017": 76, "2016": 83, "2015": 79, 
-    "2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 53, 
+entries = { "2022": 86, "2019": 56, "2018": 100, "2017": 76, "2016": 83, "2015": 80, 
+    "2014": 65, "2013": 52, "2012": 75, "2011": 71, "2010": 22, "2009": 53, 
     "2008": 49, "2007": 113, "2006": 60, "2005": 55, "2004": 76, "2003": 42, "2002": 31, 
-    "2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 95, "1995": 42, 
+    "2001": 49, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 95, "1995": 42, 
     "1994": 32, "1993": 41, "1992": 62, "1991": 39, "1990": 87, "1989": 63,"1988": 61,"1987": 34,
     "1985": 24, "1984": 32, "1983": 52, "1982": 42,}
-# Logbooks log.htm exist for 87, 88, 89 but have no full-working parser, or need hand-editing.
 
 logentries = [] # the entire logbook for one year is a single object: a list of entries
 noncaveplaces = [ "Journey", "Loser Plateau", "UNKNOWN", 'plateau', 
@@ -290,10 +286,14 @@ def ParseDate(tripdate, year):
         logdataissues["tripdate"]=message
         return datetime.date(1970, 1, 1)
         
-# 2002, 2004 - now
+# 2002 - now
 def parser_html(year, expedition, txt, seq=""):
     '''This uses some of the more obscure capabilities of regular expressions,
     see https://docs.python.org/3/library/re.html
+    
+    You can't see it here, but a round-trip export-then-import will move
+    the endmatter up to the frontmatter. This makes sense when moving
+    from parser_html_01 format logfiles, believe me.
     '''
     global logentries
     global logdataissues
@@ -307,6 +307,16 @@ def parser_html(year, expedition, txt, seq=""):
         frontpath = Path(settings.EXPOWEB, "years", year, "frontmatter.html")
         with open(frontpath,"w") as front:
             front.write(headpara+"\n")
+            
+    # extract END material and stash for later use when rebuilding from list of entries
+    endmatch = re.match(r"(?i)(?s).*<hr\s*/>([\s\S]*?)(?=</body)", txt)
+    endpara = endmatch.groups()[0].strip()
+    
+    # print(f" - endpara:\n'{endpara}'")
+    if(len(endpara)>0):
+        endpath = Path(settings.EXPOWEB, "years", year, "endmatter.html")
+        with open(endpath,"w") as end:
+            end.write(endpara+"\n")            
     
     tripparas = re.findall(r"<hr\s*/>([\s\S]*?)(?=<hr)", txt)
     logbook_entry_count = 0
@@ -371,7 +381,31 @@ def parser_html_01(year, expedition, txt, seq=""):
     global logentries
     global logdataissues
     errorcount = 0
+    
+    # extract front material and stash for later use when rebuilding from list of entries
+    headmatch = re.match(r"(?i)(?s).*<body[^>]*>(.*?)<hr.*", txt)
+    headpara = headmatch.groups()[0].strip()
+    
+    # print(f" - headpara:\n'{headpara}'")
+    if(len(headpara)>0):
+        frontpath = Path(settings.EXPOWEB, "years", year, "frontmatter.html")
+        with open(frontpath,"w") as front:
+            front.write(headpara+"\n")
 
+    # extract END material and stash for later use when rebuilding from list of entries
+    endmatch = re.match(r"(?i)(?s).*<hr\s*/>([\s\S]*?)(?=</body)", txt)
+    if endmatch:
+        endpara = endmatch.groups()[0].strip()
+    else:
+        print(f" ! - {year} NO endmatch")
+        endpara = ""
+    
+    # print(f" - endpara:\n'{endpara}'")
+    if(len(endpara)>0):
+        endpath = Path(settings.EXPOWEB, "years", year, "endmatter.html")
+        with open(endpath,"w") as end:
+            end.write(endpara+"\n")     
+            
     tripparas = re.findall(r"<hr[\s/]*>([\s\S]*?)(?=<hr)", txt)
     logbook_entry_count = 0
     for trippara in tripparas: