forked from expo/troggle
moved parser settings
This commit is contained in:
parent
a795707552
commit
259f85742a
@ -46,41 +46,41 @@ todo='''
|
||||
|
||||
'''
|
||||
MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200
|
||||
DEFAULT_LOGBOOK_PARSER = "Parseloghtmltxt"
|
||||
DEFAULT_LOGBOOK_PARSER = "parser_html"
|
||||
DEFAULT_LOGBOOK_FILE = "logbook.html"
|
||||
# All years since 2010 use the default value for Logbook parser
|
||||
# but several don't work, and are skipped by the parsing code, e.g. 1983
|
||||
LOGBOOK_PARSER_SETTINGS = {
|
||||
"2010": ("logbook.html", "Parseloghtmltxt"),
|
||||
"2009": ("2009logbook.txt", "Parselogwikitxt"),
|
||||
"2008": ("2008logbook.txt", "Parselogwikitxt"),
|
||||
"2007": ("logbook.html", "Parseloghtmltxt"),
|
||||
"2006": ("logbook.html", "Parseloghtmltxt"),
|
||||
# "2006": ("logbook/logbook_06.txt", "Parselogwikitxt"),
|
||||
"2006": ("logbook.html", "Parseloghtmltxt"),
|
||||
"2005": ("logbook.html", "Parseloghtmltxt"),
|
||||
"2004": ("logbook.html", "Parseloghtmltxt"),
|
||||
"2003": ("logbook.html", "Parseloghtml03"),
|
||||
"2002": ("logbook.html", "Parseloghtmltxt"),
|
||||
"2001": ("log.htm", "Parseloghtml01"),
|
||||
"2000": ("log.htm", "Parseloghtml01"),
|
||||
"1999": ("log.htm", "Parseloghtml01"),
|
||||
"1998": ("log.htm", "Parseloghtml01"),
|
||||
"1997": ("log.htm", "Parseloghtml01"),
|
||||
"1996": ("log.htm", "Parseloghtml01"),
|
||||
"1995": ("log.htm", "Parseloghtml01"),
|
||||
"1994": ("log.htm", "Parseloghtml01"),
|
||||
"1993": ("log.htm", "Parseloghtml01"),
|
||||
"1992": ("log.htm", "Parseloghtml01"),
|
||||
"1991": ("log.htm", "Parseloghtml01"),
|
||||
"1990": ("log.htm", "Parseloghtml01"),
|
||||
"1989": ("log.htm", "Parseloghtml01"), #crashes MySQL
|
||||
"1988": ("log.htm", "Parseloghtml01"), #crashes MySQL
|
||||
"1987": ("log.htm", "Parseloghtml01"), #crashes MySQL
|
||||
"1985": ("log.htm", "Parseloghtml01"),
|
||||
"1984": ("log.htm", "Parseloghtml01"),
|
||||
"1983": ("log.htm", "Parseloghtml01"),
|
||||
"1982": ("log.htm", "Parseloghtml01"),
|
||||
"2010": ("logbook.html", "parser_html"),
|
||||
"2009": ("2009logbook.txt", "parser_wiki"),
|
||||
"2008": ("2008logbook.txt", "parser_wiki"),
|
||||
"2007": ("logbook.html", "parser_html"),
|
||||
"2006": ("logbook.html", "parser_html"),
|
||||
# "2006": ("logbook/logbook_06.txt", "parser_wiki"),
|
||||
"2006": ("logbook.html", "parser_html"),
|
||||
"2005": ("logbook.html", "parser_html"),
|
||||
"2004": ("logbook.html", "parser_html"),
|
||||
"2003": ("logbook.html", "parser_html_03"),
|
||||
"2002": ("logbook.html", "parser_html"),
|
||||
"2001": ("log.htm", "parser_html_01"),
|
||||
"2000": ("log.htm", "parser_html_01"),
|
||||
"1999": ("log.htm", "parser_html_01"),
|
||||
"1998": ("log.htm", "parser_html_01"),
|
||||
"1997": ("log.htm", "parser_html_01"),
|
||||
"1996": ("log.htm", "parser_html_01"),
|
||||
"1995": ("log.htm", "parser_html_01"),
|
||||
"1994": ("log.htm", "parser_html_01"),
|
||||
"1993": ("log.htm", "parser_html_01"),
|
||||
"1992": ("log.htm", "parser_html_01"),
|
||||
"1991": ("log.htm", "parser_html_01"),
|
||||
"1990": ("log.htm", "parser_html_01"),
|
||||
"1989": ("log.htm", "parser_html_01"), #crashes MySQL
|
||||
"1988": ("log.htm", "parser_html_01"), #crashes MySQL
|
||||
"1987": ("log.htm", "parser_html_01"), #crashes MySQL
|
||||
"1985": ("log.htm", "parser_html_01"),
|
||||
"1984": ("log.htm", "parser_html_01"),
|
||||
"1983": ("log.htm", "parser_html_01"),
|
||||
"1982": ("log.htm", "parser_html_01"),
|
||||
}
|
||||
|
||||
entries = { "2022": 64, "2019": 44, "2018": 74, "2017": 60, "2016": 81, "2015": 79,
|
||||
@ -248,7 +248,7 @@ def ParseDate(tripdate, year):
|
||||
return datetime.date(1970, 1, 1)
|
||||
|
||||
# (2006 - not any more), 2008 - 2009
|
||||
def Parselogwikitxt(year, expedition, txt):
|
||||
def parser_wiki(year, expedition, txt):
|
||||
global logentries
|
||||
global logdataissues
|
||||
|
||||
@ -290,7 +290,7 @@ def Parselogwikitxt(year, expedition, txt):
|
||||
|
||||
# 2002, 2004, 2005, 2007, 2010 - now
|
||||
# 2006 wiki text is incomplete, but the html all there. So using this parser now.
|
||||
def Parseloghtmltxt(year, expedition, txt):
|
||||
def parser_html(year, expedition, txt):
|
||||
global logentries
|
||||
global logdataissues
|
||||
|
||||
@ -349,7 +349,7 @@ def Parseloghtmltxt(year, expedition, txt):
|
||||
|
||||
# main parser for 1991 - 2001. simpler because the data has been hacked so much to fit it
|
||||
# trying it out for years 1982 - 1990 too. Some logbook editing required by hand.. place
|
||||
def Parseloghtml01(year, expedition, txt):
|
||||
def parser_html_01(year, expedition, txt):
|
||||
global logentries
|
||||
global logdataissues
|
||||
errorcount = 0
|
||||
@ -457,7 +457,7 @@ def Parseloghtml01(year, expedition, txt):
|
||||
return
|
||||
|
||||
# parser for 2003
|
||||
def Parseloghtml03(year, expedition, txt):
|
||||
def parser_html_03(year, expedition, txt):
|
||||
global logentries
|
||||
global logdataissues
|
||||
|
||||
@ -469,7 +469,7 @@ def Parseloghtml03(year, expedition, txt):
|
||||
|
||||
s = re.match(r"(?s)\s*<p>(.*?)</p>(.*)$", trippara)
|
||||
if not ( s ) :
|
||||
message = " ! - Skipping logentry {year} on failure to parse Parseloghtml03: {} {} {}...".format(tid,s,trippara[:300])
|
||||
message = " ! - Skipping logentry {year} on failure to parse parser_html_03: {} {} {}...".format(tid,s,trippara[:300])
|
||||
DataIssue.objects.create(parser='logbooks', message=message)
|
||||
logdataissues[tid]=message
|
||||
print(message)
|
||||
|
37
settings.py
37
settings.py
@ -69,43 +69,6 @@ FIX_PERMISSIONS = []
|
||||
# top-level survex file basename (without .svx)
|
||||
SURVEX_TOPNAME = "1623-and-1626-no-schoenberg-hs"
|
||||
|
||||
MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200
|
||||
DEFAULT_LOGBOOK_PARSER = "Parseloghtmltxt"
|
||||
DEFAULT_LOGBOOK_FILE = "logbook.html"
|
||||
# All years since 2010 use the default value for Logbook parser
|
||||
# but several don't work, and are skipped by the parsing code, e.g. 1983
|
||||
LOGBOOK_PARSER_SETTINGS = {
|
||||
"2010": ("logbook.html", "Parseloghtmltxt"),
|
||||
"2009": ("2009logbook.txt", "Parselogwikitxt"),
|
||||
"2008": ("2008logbook.txt", "Parselogwikitxt"),
|
||||
"2007": ("logbook.html", "Parseloghtmltxt"),
|
||||
"2006": ("logbook.html", "Parseloghtmltxt"),
|
||||
# "2006": ("logbook/logbook_06.txt", "Parselogwikitxt"),
|
||||
"2006": ("logbook.html", "Parseloghtmltxt"),
|
||||
"2005": ("logbook.html", "Parseloghtmltxt"),
|
||||
"2004": ("logbook.html", "Parseloghtmltxt"),
|
||||
"2003": ("logbook.html", "Parseloghtml03"),
|
||||
"2002": ("logbook.html", "Parseloghtmltxt"),
|
||||
"2001": ("log.htm", "Parseloghtml01"),
|
||||
"2000": ("log.htm", "Parseloghtml01"),
|
||||
"1999": ("log.htm", "Parseloghtml01"),
|
||||
"1998": ("log.htm", "Parseloghtml01"),
|
||||
"1997": ("log.htm", "Parseloghtml01"),
|
||||
"1996": ("log.htm", "Parseloghtml01"),
|
||||
"1995": ("log.htm", "Parseloghtml01"),
|
||||
"1994": ("log.htm", "Parseloghtml01"),
|
||||
"1993": ("log.htm", "Parseloghtml01"),
|
||||
"1992": ("log.htm", "Parseloghtml01"),
|
||||
"1991": ("log.htm", "Parseloghtml01"),
|
||||
"1990": ("log.htm", "Parseloghtml01"),
|
||||
"1989": ("log.htm", "Parseloghtml01"), #crashes MySQL
|
||||
"1988": ("log.htm", "Parseloghtml01"), #crashes MySQL
|
||||
"1987": ("log.htm", "Parseloghtml01"), #crashes MySQL
|
||||
"1985": ("log.htm", "Parseloghtml01"),
|
||||
"1984": ("log.htm", "Parseloghtml01"),
|
||||
"1983": ("log.htm", "Parseloghtml01"),
|
||||
"1982": ("log.htm", "Parseloghtml01"),
|
||||
}
|
||||
|
||||
# Caves for which survex files exist, but are not otherwise registered
|
||||
# replaced (?) by expoweb/cave_data/pendingcaves.txt
|
||||
|
Loading…
Reference in New Issue
Block a user