From 259f85742aa0ffabe300329ca0e671ecaa80ef79 Mon Sep 17 00:00:00 2001
From: Philip Sargent <philip.sargent@klebos.com>
Date: Mon, 21 Nov 2022 16:47:25 +0000
Subject: [PATCH] moved parser settings

---
 parsers/logbooks.py | 72 ++++++++++++++++++++++-----------------------
 settings.py         | 37 -----------------------
 2 files changed, 36 insertions(+), 73 deletions(-)

diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index a1df040..d79a989 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -46,41 +46,41 @@ todo='''
 
 '''
 MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200
-DEFAULT_LOGBOOK_PARSER = "Parseloghtmltxt"
+DEFAULT_LOGBOOK_PARSER = "parser_html"
 DEFAULT_LOGBOOK_FILE = "logbook.html"
 # All years since 2010 use the default value for Logbook parser
 # but several don't work, and are skipped by the parsing code, e.g. 1983
 LOGBOOK_PARSER_SETTINGS = {
-                "2010": ("logbook.html", "Parseloghtmltxt"), 
-                "2009": ("2009logbook.txt", "Parselogwikitxt"), 
-                "2008": ("2008logbook.txt", "Parselogwikitxt"), 
-                "2007": ("logbook.html", "Parseloghtmltxt"), 
-                "2006": ("logbook.html", "Parseloghtmltxt"), 
-#               "2006": ("logbook/logbook_06.txt", "Parselogwikitxt"), 
-                "2006": ("logbook.html", "Parseloghtmltxt"), 
-                "2005": ("logbook.html", "Parseloghtmltxt"), 
-                "2004": ("logbook.html", "Parseloghtmltxt"), 
-                "2003": ("logbook.html", "Parseloghtml03"), 
-                "2002": ("logbook.html", "Parseloghtmltxt"), 
-                "2001": ("log.htm", "Parseloghtml01"), 
-                "2000": ("log.htm", "Parseloghtml01"), 
-                "1999": ("log.htm", "Parseloghtml01"), 
-                "1998": ("log.htm", "Parseloghtml01"), 
-                "1997": ("log.htm", "Parseloghtml01"), 
-                "1996": ("log.htm", "Parseloghtml01"),
-                "1995": ("log.htm", "Parseloghtml01"), 
-                "1994": ("log.htm", "Parseloghtml01"), 
-                "1993": ("log.htm", "Parseloghtml01"), 
-                "1992": ("log.htm", "Parseloghtml01"), 
-                "1991": ("log.htm", "Parseloghtml01"), 
-                "1990": ("log.htm", "Parseloghtml01"), 
-                "1989": ("log.htm", "Parseloghtml01"), #crashes MySQL
-                "1988": ("log.htm", "Parseloghtml01"), #crashes MySQL
-                "1987": ("log.htm", "Parseloghtml01"), #crashes MySQL
-                "1985": ("log.htm", "Parseloghtml01"), 
-                "1984": ("log.htm", "Parseloghtml01"), 
-                "1983": ("log.htm", "Parseloghtml01"), 
-                "1982": ("log.htm", "Parseloghtml01"), 
+                "2010": ("logbook.html", "parser_html"), 
+                "2009": ("2009logbook.txt", "parser_wiki"), 
+                "2008": ("2008logbook.txt", "parser_wiki"), 
+                "2007": ("logbook.html", "parser_html"), 
+                "2006": ("logbook.html", "parser_html"), 
+#               "2006": ("logbook/logbook_06.txt", "parser_wiki"), 
+                "2006": ("logbook.html", "parser_html"), 
+                "2005": ("logbook.html", "parser_html"), 
+                "2004": ("logbook.html", "parser_html"), 
+                "2003": ("logbook.html", "parser_html_03"), 
+                "2002": ("logbook.html", "parser_html"), 
+                "2001": ("log.htm", "parser_html_01"), 
+                "2000": ("log.htm", "parser_html_01"), 
+                "1999": ("log.htm", "parser_html_01"), 
+                "1998": ("log.htm", "parser_html_01"), 
+                "1997": ("log.htm", "parser_html_01"), 
+                "1996": ("log.htm", "parser_html_01"),
+                "1995": ("log.htm", "parser_html_01"), 
+                "1994": ("log.htm", "parser_html_01"), 
+                "1993": ("log.htm", "parser_html_01"), 
+                "1992": ("log.htm", "parser_html_01"), 
+                "1991": ("log.htm", "parser_html_01"), 
+                "1990": ("log.htm", "parser_html_01"), 
+                "1989": ("log.htm", "parser_html_01"), #crashes MySQL
+                "1988": ("log.htm", "parser_html_01"), #crashes MySQL
+                "1987": ("log.htm", "parser_html_01"), #crashes MySQL
+                "1985": ("log.htm", "parser_html_01"), 
+                "1984": ("log.htm", "parser_html_01"), 
+                "1983": ("log.htm", "parser_html_01"), 
+                "1982": ("log.htm", "parser_html_01"), 
             }
 
 entries = { "2022": 64, "2019": 44, "2018": 74, "2017": 60, "2016": 81, "2015": 79, 
@@ -248,7 +248,7 @@ def ParseDate(tripdate, year):
         return datetime.date(1970, 1, 1)
 
 # (2006 - not any more), 2008 - 2009
-def Parselogwikitxt(year, expedition, txt):
+def parser_wiki(year, expedition, txt):
     global logentries
     global logdataissues
 
@@ -290,7 +290,7 @@ def Parselogwikitxt(year, expedition, txt):
         
 # 2002, 2004, 2005, 2007, 2010 - now
 # 2006 wiki text is incomplete, but the html all there. So using this parser now.
-def Parseloghtmltxt(year, expedition, txt):
+def parser_html(year, expedition, txt):
     global logentries
     global logdataissues
 
@@ -349,7 +349,7 @@ def Parseloghtmltxt(year, expedition, txt):
 
 # main parser for 1991 - 2001.  simpler because the data has been hacked so much to fit it
 # trying it out for years 1982 - 1990 too. Some logbook editing required by hand.. place
-def Parseloghtml01(year, expedition, txt):
+def parser_html_01(year, expedition, txt):
     global logentries
     global logdataissues
     errorcount = 0
@@ -457,7 +457,7 @@ def Parseloghtml01(year, expedition, txt):
                 return
 
 # parser for 2003
-def Parseloghtml03(year, expedition, txt):
+def parser_html_03(year, expedition, txt):
     global logentries
     global logdataissues
 
@@ -469,7 +469,7 @@ def Parseloghtml03(year, expedition, txt):
         
         s = re.match(r"(?s)\s*<p>(.*?)</p>(.*)$", trippara)
         if not ( s ) :
-            message = " ! - Skipping logentry {year} on failure to parse Parseloghtml03: {} {} {}...".format(tid,s,trippara[:300])
+            message = " ! - Skipping logentry {year} on failure to parse parser_html_03: {} {} {}...".format(tid,s,trippara[:300])
             DataIssue.objects.create(parser='logbooks', message=message)
             logdataissues[tid]=message
             print(message)
diff --git a/settings.py b/settings.py
index 051543b..540a6b2 100644
--- a/settings.py
+++ b/settings.py
@@ -69,43 +69,6 @@ FIX_PERMISSIONS = []
 # top-level survex file basename (without .svx)
 SURVEX_TOPNAME = "1623-and-1626-no-schoenberg-hs"
 
-MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200
-DEFAULT_LOGBOOK_PARSER = "Parseloghtmltxt"
-DEFAULT_LOGBOOK_FILE = "logbook.html"
-# All years since 2010 use the default value for Logbook parser
-# but several don't work, and are skipped by the parsing code, e.g. 1983
-LOGBOOK_PARSER_SETTINGS = {
-                "2010": ("logbook.html", "Parseloghtmltxt"), 
-                "2009": ("2009logbook.txt", "Parselogwikitxt"), 
-                "2008": ("2008logbook.txt", "Parselogwikitxt"), 
-                "2007": ("logbook.html", "Parseloghtmltxt"), 
-                "2006": ("logbook.html", "Parseloghtmltxt"), 
-#               "2006": ("logbook/logbook_06.txt", "Parselogwikitxt"), 
-                "2006": ("logbook.html", "Parseloghtmltxt"), 
-                "2005": ("logbook.html", "Parseloghtmltxt"), 
-                "2004": ("logbook.html", "Parseloghtmltxt"), 
-                "2003": ("logbook.html", "Parseloghtml03"), 
-                "2002": ("logbook.html", "Parseloghtmltxt"), 
-                "2001": ("log.htm", "Parseloghtml01"), 
-                "2000": ("log.htm", "Parseloghtml01"), 
-                "1999": ("log.htm", "Parseloghtml01"), 
-                "1998": ("log.htm", "Parseloghtml01"), 
-                "1997": ("log.htm", "Parseloghtml01"), 
-                "1996": ("log.htm", "Parseloghtml01"),
-                "1995": ("log.htm", "Parseloghtml01"), 
-                "1994": ("log.htm", "Parseloghtml01"), 
-                "1993": ("log.htm", "Parseloghtml01"), 
-                "1992": ("log.htm", "Parseloghtml01"), 
-                "1991": ("log.htm", "Parseloghtml01"), 
-                "1990": ("log.htm", "Parseloghtml01"), 
-                "1989": ("log.htm", "Parseloghtml01"), #crashes MySQL
-                "1988": ("log.htm", "Parseloghtml01"), #crashes MySQL
-                "1987": ("log.htm", "Parseloghtml01"), #crashes MySQL
-                "1985": ("log.htm", "Parseloghtml01"), 
-                "1984": ("log.htm", "Parseloghtml01"), 
-                "1983": ("log.htm", "Parseloghtml01"), 
-                "1982": ("log.htm", "Parseloghtml01"), 
-            }
 
 # Caves for which survex files exist, but are not otherwise registered
 # replaced (?) by expoweb/cave_data/pendingcaves.txt