2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2024-11-24 16:21:53 +00:00

moved parser settings

This commit is contained in:
Philip Sargent 2022-11-21 16:47:25 +00:00
parent a795707552
commit 259f85742a
2 changed files with 36 additions and 73 deletions

View File

@ -46,41 +46,41 @@ todo='''
''' '''
MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200 MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200
DEFAULT_LOGBOOK_PARSER = "Parseloghtmltxt" DEFAULT_LOGBOOK_PARSER = "parser_html"
DEFAULT_LOGBOOK_FILE = "logbook.html" DEFAULT_LOGBOOK_FILE = "logbook.html"
# All years since 2010 use the default value for Logbook parser # All years since 2010 use the default value for Logbook parser
# but several don't work, and are skipped by the parsing code, e.g. 1983 # but several don't work, and are skipped by the parsing code, e.g. 1983
LOGBOOK_PARSER_SETTINGS = { LOGBOOK_PARSER_SETTINGS = {
"2010": ("logbook.html", "Parseloghtmltxt"), "2010": ("logbook.html", "parser_html"),
"2009": ("2009logbook.txt", "Parselogwikitxt"), "2009": ("2009logbook.txt", "parser_wiki"),
"2008": ("2008logbook.txt", "Parselogwikitxt"), "2008": ("2008logbook.txt", "parser_wiki"),
"2007": ("logbook.html", "Parseloghtmltxt"), "2007": ("logbook.html", "parser_html"),
"2006": ("logbook.html", "Parseloghtmltxt"), "2006": ("logbook.html", "parser_html"),
# "2006": ("logbook/logbook_06.txt", "Parselogwikitxt"), # "2006": ("logbook/logbook_06.txt", "parser_wiki"),
"2006": ("logbook.html", "Parseloghtmltxt"), "2006": ("logbook.html", "parser_html"),
"2005": ("logbook.html", "Parseloghtmltxt"), "2005": ("logbook.html", "parser_html"),
"2004": ("logbook.html", "Parseloghtmltxt"), "2004": ("logbook.html", "parser_html"),
"2003": ("logbook.html", "Parseloghtml03"), "2003": ("logbook.html", "parser_html_03"),
"2002": ("logbook.html", "Parseloghtmltxt"), "2002": ("logbook.html", "parser_html"),
"2001": ("log.htm", "Parseloghtml01"), "2001": ("log.htm", "parser_html_01"),
"2000": ("log.htm", "Parseloghtml01"), "2000": ("log.htm", "parser_html_01"),
"1999": ("log.htm", "Parseloghtml01"), "1999": ("log.htm", "parser_html_01"),
"1998": ("log.htm", "Parseloghtml01"), "1998": ("log.htm", "parser_html_01"),
"1997": ("log.htm", "Parseloghtml01"), "1997": ("log.htm", "parser_html_01"),
"1996": ("log.htm", "Parseloghtml01"), "1996": ("log.htm", "parser_html_01"),
"1995": ("log.htm", "Parseloghtml01"), "1995": ("log.htm", "parser_html_01"),
"1994": ("log.htm", "Parseloghtml01"), "1994": ("log.htm", "parser_html_01"),
"1993": ("log.htm", "Parseloghtml01"), "1993": ("log.htm", "parser_html_01"),
"1992": ("log.htm", "Parseloghtml01"), "1992": ("log.htm", "parser_html_01"),
"1991": ("log.htm", "Parseloghtml01"), "1991": ("log.htm", "parser_html_01"),
"1990": ("log.htm", "Parseloghtml01"), "1990": ("log.htm", "parser_html_01"),
"1989": ("log.htm", "Parseloghtml01"), #crashes MySQL "1989": ("log.htm", "parser_html_01"), #crashes MySQL
"1988": ("log.htm", "Parseloghtml01"), #crashes MySQL "1988": ("log.htm", "parser_html_01"), #crashes MySQL
"1987": ("log.htm", "Parseloghtml01"), #crashes MySQL "1987": ("log.htm", "parser_html_01"), #crashes MySQL
"1985": ("log.htm", "Parseloghtml01"), "1985": ("log.htm", "parser_html_01"),
"1984": ("log.htm", "Parseloghtml01"), "1984": ("log.htm", "parser_html_01"),
"1983": ("log.htm", "Parseloghtml01"), "1983": ("log.htm", "parser_html_01"),
"1982": ("log.htm", "Parseloghtml01"), "1982": ("log.htm", "parser_html_01"),
} }
entries = { "2022": 64, "2019": 44, "2018": 74, "2017": 60, "2016": 81, "2015": 79, entries = { "2022": 64, "2019": 44, "2018": 74, "2017": 60, "2016": 81, "2015": 79,
@ -248,7 +248,7 @@ def ParseDate(tripdate, year):
return datetime.date(1970, 1, 1) return datetime.date(1970, 1, 1)
# (2006 - not any more), 2008 - 2009 # (2006 - not any more), 2008 - 2009
def Parselogwikitxt(year, expedition, txt): def parser_wiki(year, expedition, txt):
global logentries global logentries
global logdataissues global logdataissues
@ -290,7 +290,7 @@ def Parselogwikitxt(year, expedition, txt):
# 2002, 2004, 2005, 2007, 2010 - now # 2002, 2004, 2005, 2007, 2010 - now
# 2006 wiki text is incomplete, but the html all there. So using this parser now. # 2006 wiki text is incomplete, but the html all there. So using this parser now.
def Parseloghtmltxt(year, expedition, txt): def parser_html(year, expedition, txt):
global logentries global logentries
global logdataissues global logdataissues
@ -349,7 +349,7 @@ def Parseloghtmltxt(year, expedition, txt):
# main parser for 1991 - 2001. simpler because the data has been hacked so much to fit it # main parser for 1991 - 2001. simpler because the data has been hacked so much to fit it
# trying it out for years 1982 - 1990 too. Some logbook editing required by hand.. place # trying it out for years 1982 - 1990 too. Some logbook editing required by hand.. place
def Parseloghtml01(year, expedition, txt): def parser_html_01(year, expedition, txt):
global logentries global logentries
global logdataissues global logdataissues
errorcount = 0 errorcount = 0
@ -457,7 +457,7 @@ def Parseloghtml01(year, expedition, txt):
return return
# parser for 2003 # parser for 2003
def Parseloghtml03(year, expedition, txt): def parser_html_03(year, expedition, txt):
global logentries global logentries
global logdataissues global logdataissues
@ -469,7 +469,7 @@ def Parseloghtml03(year, expedition, txt):
s = re.match(r"(?s)\s*<p>(.*?)</p>(.*)$", trippara) s = re.match(r"(?s)\s*<p>(.*?)</p>(.*)$", trippara)
if not ( s ) : if not ( s ) :
message = " ! - Skipping logentry {year} on failure to parse Parseloghtml03: {} {} {}...".format(tid,s,trippara[:300]) message = " ! - Skipping logentry {year} on failure to parse parser_html_03: {} {} {}...".format(tid,s,trippara[:300])
DataIssue.objects.create(parser='logbooks', message=message) DataIssue.objects.create(parser='logbooks', message=message)
logdataissues[tid]=message logdataissues[tid]=message
print(message) print(message)

View File

@ -69,43 +69,6 @@ FIX_PERMISSIONS = []
# top-level survex file basename (without .svx) # top-level survex file basename (without .svx)
SURVEX_TOPNAME = "1623-and-1626-no-schoenberg-hs" SURVEX_TOPNAME = "1623-and-1626-no-schoenberg-hs"
MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200
DEFAULT_LOGBOOK_PARSER = "Parseloghtmltxt"
DEFAULT_LOGBOOK_FILE = "logbook.html"
# All years since 2010 use the default value for Logbook parser
# but several don't work, and are skipped by the parsing code, e.g. 1983
LOGBOOK_PARSER_SETTINGS = {
"2010": ("logbook.html", "Parseloghtmltxt"),
"2009": ("2009logbook.txt", "Parselogwikitxt"),
"2008": ("2008logbook.txt", "Parselogwikitxt"),
"2007": ("logbook.html", "Parseloghtmltxt"),
"2006": ("logbook.html", "Parseloghtmltxt"),
# "2006": ("logbook/logbook_06.txt", "Parselogwikitxt"),
"2006": ("logbook.html", "Parseloghtmltxt"),
"2005": ("logbook.html", "Parseloghtmltxt"),
"2004": ("logbook.html", "Parseloghtmltxt"),
"2003": ("logbook.html", "Parseloghtml03"),
"2002": ("logbook.html", "Parseloghtmltxt"),
"2001": ("log.htm", "Parseloghtml01"),
"2000": ("log.htm", "Parseloghtml01"),
"1999": ("log.htm", "Parseloghtml01"),
"1998": ("log.htm", "Parseloghtml01"),
"1997": ("log.htm", "Parseloghtml01"),
"1996": ("log.htm", "Parseloghtml01"),
"1995": ("log.htm", "Parseloghtml01"),
"1994": ("log.htm", "Parseloghtml01"),
"1993": ("log.htm", "Parseloghtml01"),
"1992": ("log.htm", "Parseloghtml01"),
"1991": ("log.htm", "Parseloghtml01"),
"1990": ("log.htm", "Parseloghtml01"),
"1989": ("log.htm", "Parseloghtml01"), #crashes MySQL
"1988": ("log.htm", "Parseloghtml01"), #crashes MySQL
"1987": ("log.htm", "Parseloghtml01"), #crashes MySQL
"1985": ("log.htm", "Parseloghtml01"),
"1984": ("log.htm", "Parseloghtml01"),
"1983": ("log.htm", "Parseloghtml01"),
"1982": ("log.htm", "Parseloghtml01"),
}
# Caves for which survex files exist, but are not otherwise registered # Caves for which survex files exist, but are not otherwise registered
# replaced (?) by expoweb/cave_data/pendingcaves.txt # replaced (?) by expoweb/cave_data/pendingcaves.txt