diff --git a/parsers/logbooks.py b/parsers/logbooks.py index 889387d..0bbc23d 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -93,7 +93,7 @@ entries = { "2022": 64, "2019": 56, "2018": 74, "2017": 60, "2016": 81, "2015": "2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 42, "2002": 31, "2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41, "1994": 32, "1993": 41, "1992": 61, "1991": 38, "1990": 87, "1989": 1,"1988": 1,"1987": 1, - "1985": 24,"1984": 32,"1983": 52,"1982": 42,} + "1985": 24, "1984": 32, "1983": 52, "1982": 42,} # Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, or need hand-editing. logentries = [] # the entire logbook for one year is a single object: a list of entries @@ -471,57 +471,58 @@ def parser_html_01(year, expedition, txt): print(message) return -# parser for 2003 -def parser_html_03(year, expedition, txt): - global logentries - global logdataissues +# parser for 2003. Retired after conversion of the logbook.html +# KEEP THIS COMMENTED-OUT example until after we have doen the same thing with the html_01 parser +# def parser_html_03(year, expedition, txt): + # global logentries + # global logdataissues - tripparas = re.findall(r"([\s\S]*?)(?=([\s\S]*?)(?=(.*?)

(.*)$", trippara) - if not ( s ) : - message = " ! - Skipping logentry {year} on failure to parse parser_html_03: {} {} {}...".format(tid,s,trippara[:300]) - DataIssue.objects.create(parser='logbooks', message=message) - logdataissues[tid]=message - print(message) - break + # s = re.match(r"(?s)\s*

(.*?)

(.*)$", trippara) + # if not ( s ) : + # message = " ! - Skipping logentry {year} on failure to parse parser_html_03: {} {} {}...".format(tid,s,trippara[:300]) + # DataIssue.objects.create(parser='logbooks', message=message) + # logdataissues[tid]=message + # print(message) + # break - tripheader, triptext = s.group(1), s.group(2) - tripheader = re.sub(r" ", " ", tripheader) - tripheader = re.sub(r"\s+", " ", tripheader).strip() - sheader = tripheader.split(" -- ") - tu = "" - if re.match("T/U|Time underwater", sheader[-1]): - tu = sheader.pop() # not a number in 2003 usually - # print(f" - {logbook_entry_count} '{tu}' ") - if len(sheader) != 3: - print(" ! Header not three pieces for parser_html_03() ", sheader) - tripdate, triptitle, trippeople = sheader - ldate = ParseDate(tripdate.strip(), year) - # print(f" - {logbook_entry_count} '{ldate}' from '{tripdate.strip()}' ") - # print(f" - {logbook_entry_count} '{trippeople}' ") - titlelist = triptitle.split(" , ") - if len(titlelist) >= 2: - location, *namelist = titlelist # list unpacking operator - tripname = ", ".join(namelist) # concatenate strings - # print(f" - {logbook_entry_count} {location} '{tripname}'") - else: - location = "UNKNOWN" + # tripheader, triptext = s.group(1), s.group(2) + # tripheader = re.sub(r" ", " ", tripheader) + # tripheader = re.sub(r"\s+", " ", tripheader).strip() + # sheader = tripheader.split(" -- ") + # tu = "" + # if re.match("T/U|Time underwater", sheader[-1]): + # tu = sheader.pop() # not a number in 2003 usually + # # print(f" - {logbook_entry_count} '{tu}' ") + # if len(sheader) != 3: + # print(" ! Header not three pieces for parser_html_03() ", sheader) + # tripdate, triptitle, trippeople = sheader + # ldate = ParseDate(tripdate.strip(), year) + # # print(f" - {logbook_entry_count} '{ldate}' from '{tripdate.strip()}' ") + # # print(f" - {logbook_entry_count} '{trippeople}' ") + # titlelist = triptitle.split(" , ") + # if len(titlelist) >= 2: + # location, *namelist = titlelist # list unpacking operator + # tripname = ", ".join(namelist) # concatenate strings + # # print(f" - {logbook_entry_count} {location} '{tripname}'") + # else: + # location = "UNKNOWN" - ltriptext = triptext + "

\n\n" + tu - ltriptext = re.sub(r"

", "", ltriptext) - #ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext) - ltriptext = re.sub(r"

", "

\n\n", ltriptext).strip() - #ltriptext = re.sub(r"[^\s0-9a-zA-Z\-.,:;'!&()\[\]<>?=+*%]", "_NONASCII_", ltriptext) + # ltriptext = triptext + "

\n\n" + tu + # ltriptext = re.sub(r"

", "", ltriptext) + # #ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext) + # ltriptext = re.sub(r"

", "

\n\n", ltriptext).strip() + # #ltriptext = re.sub(r"[^\s0-9a-zA-Z\-.,:;'!&()\[\]<>?=+*%]", "_NONASCII_", ltriptext) - entrytuple = (ldate, location, tripname, ltriptext, - trippeople, expedition, tu, tid) - logentries.append(entrytuple) + # entrytuple = (ldate, location, tripname, ltriptext, + # trippeople, expedition, tu, tid) + # logentries.append(entrytuple) def LoadLogbookForExpedition(expedition): diff --git a/templates/logbookentry.html b/templates/logbookentry.html index 9090928..7ddbf05 100644 --- a/templates/logbookentry.html +++ b/templates/logbookentry.html @@ -1,6 +1,6 @@ {% extends "base.html" %} -{% block title %}Logbook {{logbookentry.id}}{% endblock %} +{% block title %}Logbook {{logbookentry.expedition.name}}{% endblock %} {% block content %} {% block related %}{% endblock %}