2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2024-11-22 15:21:52 +00:00

parse 2006 logbook from html

This commit is contained in:
Philip Sargent 2020-07-07 19:07:45 +01:00
parent 52afb9f466
commit 71b5383090
2 changed files with 9 additions and 7 deletions

View File

@ -1,4 +1,3 @@
#.-*- coding: utf-8 -*-
import csv import csv
import datetime import datetime
import os import os
@ -136,14 +135,14 @@ def ParseDate(tripdate, year):
yadd = int(year[:2]) * 100 yadd = int(year[:2]) * 100
day, month, year = int(mdategoof.group(1)), int(mdategoof.group(2)), int(mdategoof.group(4)) + yadd day, month, year = int(mdategoof.group(1)), int(mdategoof.group(2)), int(mdategoof.group(4)) + yadd
else: else:
assert False, tripdate
message = " ! - Bad date in logbook: " + tripdate + " - " + year message = " ! - Bad date in logbook: " + tripdate + " - " + year
DataIssue.objects.create(parser='logbooks', message=message) DataIssue.objects.create(parser='logbooks', message=message)
logdataissues[tid+"author"]=message logdataissues["author"]=message
assert False, tripdate
return datetime.date(year, month, day) return datetime.date(year, month, day)
# 2006, 2008 - 2009 # (2006 - not any more), 2008 - 2009
def Parselogwikitxt(year, expedition, txt): def Parselogwikitxt(year, expedition, txt):
global logentries global logentries
global logdataissues global logdataissues
@ -207,8 +206,10 @@ def EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, tu,
# message = " ! - Skipping logentry: %s - no author for entry in year '%s'" % (title, expedition.year) # message = " ! - Skipping logentry: %s - no author for entry in year '%s'" % (title, expedition.year)
# DataIssue.objects.create(parser='logbooks', message=message) # DataIssue.objects.create(parser='logbooks', message=message)
# logdataissues[tid+"author"]=message # logdataissues[tid+"author"]=message
pass
# 2002, 2004, 2005, 2007, 2010 - now # 2002, 2004, 2005, 2007, 2010 - now
# 2006 wiki text is incomplete, but the html all there. So using this parser now.
def Parseloghtmltxt(year, expedition, txt): def Parseloghtmltxt(year, expedition, txt):
global logentries global logentries
global logdataissues global logdataissues
@ -229,7 +230,7 @@ def Parseloghtmltxt(year, expedition, txt):
''', trippara) ''', trippara)
if not s: if not s:
if not re.search(r"Rigging Guide", trippara): if not re.search(r"Rigging Guide", trippara):
msg = " !- can't parse: {}".format(trippara) # this is 2007 which needs editing msg = " !- Logbook. Can't parse: {} entry:{}".format(trippara, logbook_entry_count)
print(msg) print(msg)
DataIssue.objects.create(parser='logbooks', message=msg) DataIssue.objects.create(parser='logbooks', message=msg)
logdataissues[tid]=msg logdataissues[tid]=msg
@ -488,7 +489,7 @@ def LoadLogbooks():
"1985","1986","1987","1988","1989","1990",] "1985","1986","1987","1988","1989","1990",]
entries = {"2020": 0, "2019": 20, "2018": 74, "2017": 60, "2016": 81, "2015": 79, entries = {"2020": 0, "2019": 20, "2018": 74, "2017": 60, "2016": 81, "2015": 79,
"2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52, "2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52,
"2008": 49, "2007": 111, "2006": 24, "2005": 55, "2004": 76, "2003": 40, "2002": 31, "2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 40, "2002": 31,
"2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41, "2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41,
"1994": 32, "1993": 41, "1992": 61, "1991": 38, "1982": 0} "1994": 32, "1993": 41, "1992": 61, "1991": 38, "1982": 0}
try: try:

View File

@ -70,7 +70,8 @@ LOGBOOK_PARSER_SETTINGS = {
"2009": ("2009/2009logbook.txt", "Parselogwikitxt"), "2009": ("2009/2009logbook.txt", "Parselogwikitxt"),
"2008": ("2008/2008logbook.txt", "Parselogwikitxt"), "2008": ("2008/2008logbook.txt", "Parselogwikitxt"),
"2007": ("2007/logbook.html", "Parseloghtmltxt"), "2007": ("2007/logbook.html", "Parseloghtmltxt"),
"2006": ("2006/logbook/logbook_06.txt", "Parselogwikitxt"), # "2006": ("2006/logbook/logbook_06.txt", "Parselogwikitxt"),
"2006": ("2006/logbook.html", "Parseloghtmltxt"),
"2005": ("2005/logbook.html", "Parseloghtmltxt"), "2005": ("2005/logbook.html", "Parseloghtmltxt"),
"2004": ("2004/logbook.html", "Parseloghtmltxt"), "2004": ("2004/logbook.html", "Parseloghtmltxt"),
"2003": ("2003/logbook.html", "Parseloghtml03"), "2003": ("2003/logbook.html", "Parseloghtml03"),