From 71b5383090def8219964d814b0870dbac65a2849 Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Tue, 7 Jul 2020 19:07:45 +0100 Subject: [PATCH] parse 2006 logbook from html --- parsers/logbooks.py | 13 +++++++------ settings.py | 3 ++- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/parsers/logbooks.py b/parsers/logbooks.py index 0158ccc..2ecef42 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -1,4 +1,3 @@ -#.-*- coding: utf-8 -*- import csv import datetime import os @@ -136,14 +135,14 @@ def ParseDate(tripdate, year): yadd = int(year[:2]) * 100 day, month, year = int(mdategoof.group(1)), int(mdategoof.group(2)), int(mdategoof.group(4)) + yadd else: - assert False, tripdate message = " ! - Bad date in logbook: " + tripdate + " - " + year DataIssue.objects.create(parser='logbooks', message=message) - logdataissues[tid+"author"]=message + logdataissues["author"]=message + assert False, tripdate return datetime.date(year, month, day) -# 2006, 2008 - 2009 +# (2006 - not any more), 2008 - 2009 def Parselogwikitxt(year, expedition, txt): global logentries global logdataissues @@ -207,8 +206,10 @@ def EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, tu, # message = " ! - Skipping logentry: %s - no author for entry in year '%s'" % (title, expedition.year) # DataIssue.objects.create(parser='logbooks', message=message) # logdataissues[tid+"author"]=message + pass # 2002, 2004, 2005, 2007, 2010 - now +# 2006 wiki text is incomplete, but the html all there. So using this parser now. def Parseloghtmltxt(year, expedition, txt): global logentries global logdataissues @@ -229,7 +230,7 @@ def Parseloghtmltxt(year, expedition, txt): ''', trippara) if not s: if not re.search(r"Rigging Guide", trippara): - msg = " !- can't parse: {}".format(trippara) # this is 2007 which needs editing + msg = " !- Logbook. Can't parse: {} entry:{}".format(trippara, logbook_entry_count) print(msg) DataIssue.objects.create(parser='logbooks', message=msg) logdataissues[tid]=msg @@ -488,7 +489,7 @@ def LoadLogbooks(): "1985","1986","1987","1988","1989","1990",] entries = {"2020": 0, "2019": 20, "2018": 74, "2017": 60, "2016": 81, "2015": 79, "2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52, - "2008": 49, "2007": 111, "2006": 24, "2005": 55, "2004": 76, "2003": 40, "2002": 31, + "2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 40, "2002": 31, "2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41, "1994": 32, "1993": 41, "1992": 61, "1991": 38, "1982": 0} try: diff --git a/settings.py b/settings.py index b4b9778..4ac2cc2 100644 --- a/settings.py +++ b/settings.py @@ -70,7 +70,8 @@ LOGBOOK_PARSER_SETTINGS = { "2009": ("2009/2009logbook.txt", "Parselogwikitxt"), "2008": ("2008/2008logbook.txt", "Parselogwikitxt"), "2007": ("2007/logbook.html", "Parseloghtmltxt"), - "2006": ("2006/logbook/logbook_06.txt", "Parselogwikitxt"), +# "2006": ("2006/logbook/logbook_06.txt", "Parselogwikitxt"), + "2006": ("2006/logbook.html", "Parseloghtmltxt"), "2005": ("2005/logbook.html", "Parseloghtmltxt"), "2004": ("2004/logbook.html", "Parseloghtmltxt"), "2003": ("2003/logbook.html", "Parseloghtml03"),