moved parser settings

This commit is contained in:
Philip Sargent 2022-11-21 16:47:25 +00:00
parent a795707552
commit 259f85742a
2 changed files with 36 additions and 73 deletions

View File

@ -46,41 +46,41 @@ todo='''
'''
MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200
DEFAULT_LOGBOOK_PARSER = "Parseloghtmltxt"
DEFAULT_LOGBOOK_PARSER = "parser_html"
DEFAULT_LOGBOOK_FILE = "logbook.html"
# All years since 2010 use the default value for Logbook parser
# but several don't work, and are skipped by the parsing code, e.g. 1983
LOGBOOK_PARSER_SETTINGS = {
"2010": ("logbook.html", "Parseloghtmltxt"),
"2009": ("2009logbook.txt", "Parselogwikitxt"),
"2008": ("2008logbook.txt", "Parselogwikitxt"),
"2007": ("logbook.html", "Parseloghtmltxt"),
"2006": ("logbook.html", "Parseloghtmltxt"),
# "2006": ("logbook/logbook_06.txt", "Parselogwikitxt"),
"2006": ("logbook.html", "Parseloghtmltxt"),
"2005": ("logbook.html", "Parseloghtmltxt"),
"2004": ("logbook.html", "Parseloghtmltxt"),
"2003": ("logbook.html", "Parseloghtml03"),
"2002": ("logbook.html", "Parseloghtmltxt"),
"2001": ("log.htm", "Parseloghtml01"),
"2000": ("log.htm", "Parseloghtml01"),
"1999": ("log.htm", "Parseloghtml01"),
"1998": ("log.htm", "Parseloghtml01"),
"1997": ("log.htm", "Parseloghtml01"),
"1996": ("log.htm", "Parseloghtml01"),
"1995": ("log.htm", "Parseloghtml01"),
"1994": ("log.htm", "Parseloghtml01"),
"1993": ("log.htm", "Parseloghtml01"),
"1992": ("log.htm", "Parseloghtml01"),
"1991": ("log.htm", "Parseloghtml01"),
"1990": ("log.htm", "Parseloghtml01"),
"1989": ("log.htm", "Parseloghtml01"), #crashes MySQL
"1988": ("log.htm", "Parseloghtml01"), #crashes MySQL
"1987": ("log.htm", "Parseloghtml01"), #crashes MySQL
"1985": ("log.htm", "Parseloghtml01"),
"1984": ("log.htm", "Parseloghtml01"),
"1983": ("log.htm", "Parseloghtml01"),
"1982": ("log.htm", "Parseloghtml01"),
"2010": ("logbook.html", "parser_html"),
"2009": ("2009logbook.txt", "parser_wiki"),
"2008": ("2008logbook.txt", "parser_wiki"),
"2007": ("logbook.html", "parser_html"),
"2006": ("logbook.html", "parser_html"),
# "2006": ("logbook/logbook_06.txt", "parser_wiki"),
"2006": ("logbook.html", "parser_html"),
"2005": ("logbook.html", "parser_html"),
"2004": ("logbook.html", "parser_html"),
"2003": ("logbook.html", "parser_html_03"),
"2002": ("logbook.html", "parser_html"),
"2001": ("log.htm", "parser_html_01"),
"2000": ("log.htm", "parser_html_01"),
"1999": ("log.htm", "parser_html_01"),
"1998": ("log.htm", "parser_html_01"),
"1997": ("log.htm", "parser_html_01"),
"1996": ("log.htm", "parser_html_01"),
"1995": ("log.htm", "parser_html_01"),
"1994": ("log.htm", "parser_html_01"),
"1993": ("log.htm", "parser_html_01"),
"1992": ("log.htm", "parser_html_01"),
"1991": ("log.htm", "parser_html_01"),
"1990": ("log.htm", "parser_html_01"),
"1989": ("log.htm", "parser_html_01"), #crashes MySQL
"1988": ("log.htm", "parser_html_01"), #crashes MySQL
"1987": ("log.htm", "parser_html_01"), #crashes MySQL
"1985": ("log.htm", "parser_html_01"),
"1984": ("log.htm", "parser_html_01"),
"1983": ("log.htm", "parser_html_01"),
"1982": ("log.htm", "parser_html_01"),
}
entries = { "2022": 64, "2019": 44, "2018": 74, "2017": 60, "2016": 81, "2015": 79,
@ -248,7 +248,7 @@ def ParseDate(tripdate, year):
return datetime.date(1970, 1, 1)
# (2006 - not any more), 2008 - 2009
def Parselogwikitxt(year, expedition, txt):
def parser_wiki(year, expedition, txt):
global logentries
global logdataissues
@ -290,7 +290,7 @@ def Parselogwikitxt(year, expedition, txt):
# 2002, 2004, 2005, 2007, 2010 - now
# 2006 wiki text is incomplete, but the html all there. So using this parser now.
def Parseloghtmltxt(year, expedition, txt):
def parser_html(year, expedition, txt):
global logentries
global logdataissues
@ -349,7 +349,7 @@ def Parseloghtmltxt(year, expedition, txt):
# main parser for 1991 - 2001. simpler because the data has been hacked so much to fit it
# trying it out for years 1982 - 1990 too. Some logbook editing required by hand.. place
def Parseloghtml01(year, expedition, txt):
def parser_html_01(year, expedition, txt):
global logentries
global logdataissues
errorcount = 0
@ -457,7 +457,7 @@ def Parseloghtml01(year, expedition, txt):
return
# parser for 2003
def Parseloghtml03(year, expedition, txt):
def parser_html_03(year, expedition, txt):
global logentries
global logdataissues
@ -469,7 +469,7 @@ def Parseloghtml03(year, expedition, txt):
s = re.match(r"(?s)\s*<p>(.*?)</p>(.*)$", trippara)
if not ( s ) :
message = " ! - Skipping logentry {year} on failure to parse Parseloghtml03: {} {} {}...".format(tid,s,trippara[:300])
message = " ! - Skipping logentry {year} on failure to parse parser_html_03: {} {} {}...".format(tid,s,trippara[:300])
DataIssue.objects.create(parser='logbooks', message=message)
logdataissues[tid]=message
print(message)

View File

@ -69,43 +69,6 @@ FIX_PERMISSIONS = []
# top-level survex file basename (without .svx)
SURVEX_TOPNAME = "1623-and-1626-no-schoenberg-hs"
MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200
DEFAULT_LOGBOOK_PARSER = "Parseloghtmltxt"
DEFAULT_LOGBOOK_FILE = "logbook.html"
# All years since 2010 use the default value for Logbook parser
# but several don't work, and are skipped by the parsing code, e.g. 1983
LOGBOOK_PARSER_SETTINGS = {
"2010": ("logbook.html", "Parseloghtmltxt"),
"2009": ("2009logbook.txt", "Parselogwikitxt"),
"2008": ("2008logbook.txt", "Parselogwikitxt"),
"2007": ("logbook.html", "Parseloghtmltxt"),
"2006": ("logbook.html", "Parseloghtmltxt"),
# "2006": ("logbook/logbook_06.txt", "Parselogwikitxt"),
"2006": ("logbook.html", "Parseloghtmltxt"),
"2005": ("logbook.html", "Parseloghtmltxt"),
"2004": ("logbook.html", "Parseloghtmltxt"),
"2003": ("logbook.html", "Parseloghtml03"),
"2002": ("logbook.html", "Parseloghtmltxt"),
"2001": ("log.htm", "Parseloghtml01"),
"2000": ("log.htm", "Parseloghtml01"),
"1999": ("log.htm", "Parseloghtml01"),
"1998": ("log.htm", "Parseloghtml01"),
"1997": ("log.htm", "Parseloghtml01"),
"1996": ("log.htm", "Parseloghtml01"),
"1995": ("log.htm", "Parseloghtml01"),
"1994": ("log.htm", "Parseloghtml01"),
"1993": ("log.htm", "Parseloghtml01"),
"1992": ("log.htm", "Parseloghtml01"),
"1991": ("log.htm", "Parseloghtml01"),
"1990": ("log.htm", "Parseloghtml01"),
"1989": ("log.htm", "Parseloghtml01"), #crashes MySQL
"1988": ("log.htm", "Parseloghtml01"), #crashes MySQL
"1987": ("log.htm", "Parseloghtml01"), #crashes MySQL
"1985": ("log.htm", "Parseloghtml01"),
"1984": ("log.htm", "Parseloghtml01"),
"1983": ("log.htm", "Parseloghtml01"),
"1982": ("log.htm", "Parseloghtml01"),
}
# Caves for which survex files exist, but are not otherwise registered
# replaced (?) by expoweb/cave_data/pendingcaves.txt