forked from expo/troggle
tidy and comments
This commit is contained in:
parent
cabcada0b8
commit
0e47909704
@ -471,57 +471,58 @@ def parser_html_01(year, expedition, txt):
|
||||
print(message)
|
||||
return
|
||||
|
||||
# parser for 2003
|
||||
def parser_html_03(year, expedition, txt):
|
||||
global logentries
|
||||
global logdataissues
|
||||
# parser for 2003. Retired after conversion of the logbook.html
|
||||
# KEEP THIS COMMENTED-OUT example until after we have doen the same thing with the html_01 parser
|
||||
# def parser_html_03(year, expedition, txt):
|
||||
# global logentries
|
||||
# global logdataissues
|
||||
|
||||
tripparas = re.findall(r"<hr\s*/>([\s\S]*?)(?=<hr)", txt)
|
||||
logbook_entry_count = 0
|
||||
for trippara in tripparas:
|
||||
logbook_entry_count += 1
|
||||
tid = set_trip_id(year,logbook_entry_count) # default trip id, before we read the date
|
||||
# tripparas = re.findall(r"<hr\s*/>([\s\S]*?)(?=<hr)", txt)
|
||||
# logbook_entry_count = 0
|
||||
# for trippara in tripparas:
|
||||
# logbook_entry_count += 1
|
||||
# tid = set_trip_id(year,logbook_entry_count) # default trip id, before we read the date
|
||||
|
||||
s = re.match(r"(?s)\s*<p>(.*?)</p>(.*)$", trippara)
|
||||
if not ( s ) :
|
||||
message = " ! - Skipping logentry {year} on failure to parse parser_html_03: {} {} {}...".format(tid,s,trippara[:300])
|
||||
DataIssue.objects.create(parser='logbooks', message=message)
|
||||
logdataissues[tid]=message
|
||||
print(message)
|
||||
break
|
||||
# s = re.match(r"(?s)\s*<p>(.*?)</p>(.*)$", trippara)
|
||||
# if not ( s ) :
|
||||
# message = " ! - Skipping logentry {year} on failure to parse parser_html_03: {} {} {}...".format(tid,s,trippara[:300])
|
||||
# DataIssue.objects.create(parser='logbooks', message=message)
|
||||
# logdataissues[tid]=message
|
||||
# print(message)
|
||||
# break
|
||||
|
||||
tripheader, triptext = s.group(1), s.group(2)
|
||||
tripheader = re.sub(r" ", " ", tripheader)
|
||||
tripheader = re.sub(r"\s+", " ", tripheader).strip()
|
||||
sheader = tripheader.split(" -- ")
|
||||
tu = ""
|
||||
if re.match("T/U|Time underwater", sheader[-1]):
|
||||
tu = sheader.pop() # not a number in 2003 usually
|
||||
# print(f" - {logbook_entry_count} '{tu}' ")
|
||||
if len(sheader) != 3:
|
||||
print(" ! Header not three pieces for parser_html_03() ", sheader)
|
||||
tripdate, triptitle, trippeople = sheader
|
||||
ldate = ParseDate(tripdate.strip(), year)
|
||||
# print(f" - {logbook_entry_count} '{ldate}' from '{tripdate.strip()}' ")
|
||||
# print(f" - {logbook_entry_count} '{trippeople}' ")
|
||||
titlelist = triptitle.split(" , ")
|
||||
if len(titlelist) >= 2:
|
||||
location, *namelist = titlelist # list unpacking operator
|
||||
tripname = ", ".join(namelist) # concatenate strings
|
||||
# print(f" - {logbook_entry_count} {location} '{tripname}'")
|
||||
else:
|
||||
location = "UNKNOWN"
|
||||
# tripheader, triptext = s.group(1), s.group(2)
|
||||
# tripheader = re.sub(r" ", " ", tripheader)
|
||||
# tripheader = re.sub(r"\s+", " ", tripheader).strip()
|
||||
# sheader = tripheader.split(" -- ")
|
||||
# tu = ""
|
||||
# if re.match("T/U|Time underwater", sheader[-1]):
|
||||
# tu = sheader.pop() # not a number in 2003 usually
|
||||
# # print(f" - {logbook_entry_count} '{tu}' ")
|
||||
# if len(sheader) != 3:
|
||||
# print(" ! Header not three pieces for parser_html_03() ", sheader)
|
||||
# tripdate, triptitle, trippeople = sheader
|
||||
# ldate = ParseDate(tripdate.strip(), year)
|
||||
# # print(f" - {logbook_entry_count} '{ldate}' from '{tripdate.strip()}' ")
|
||||
# # print(f" - {logbook_entry_count} '{trippeople}' ")
|
||||
# titlelist = triptitle.split(" , ")
|
||||
# if len(titlelist) >= 2:
|
||||
# location, *namelist = titlelist # list unpacking operator
|
||||
# tripname = ", ".join(namelist) # concatenate strings
|
||||
# # print(f" - {logbook_entry_count} {location} '{tripname}'")
|
||||
# else:
|
||||
# location = "UNKNOWN"
|
||||
|
||||
ltriptext = triptext + "<br /><br />\n\n" + tu
|
||||
ltriptext = re.sub(r"</p>", "", ltriptext)
|
||||
#ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext)
|
||||
ltriptext = re.sub(r"<p>", "<br /><br />\n\n", ltriptext).strip()
|
||||
#ltriptext = re.sub(r"[^\s0-9a-zA-Z\-.,:;'!&()\[\]<>?=+*%]", "_NONASCII_", ltriptext)
|
||||
# ltriptext = triptext + "<br /><br />\n\n" + tu
|
||||
# ltriptext = re.sub(r"</p>", "", ltriptext)
|
||||
# #ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext)
|
||||
# ltriptext = re.sub(r"<p>", "<br /><br />\n\n", ltriptext).strip()
|
||||
# #ltriptext = re.sub(r"[^\s0-9a-zA-Z\-.,:;'!&()\[\]<>?=+*%]", "_NONASCII_", ltriptext)
|
||||
|
||||
|
||||
entrytuple = (ldate, location, tripname, ltriptext,
|
||||
trippeople, expedition, tu, tid)
|
||||
logentries.append(entrytuple)
|
||||
# entrytuple = (ldate, location, tripname, ltriptext,
|
||||
# trippeople, expedition, tu, tid)
|
||||
# logentries.append(entrytuple)
|
||||
|
||||
|
||||
def LoadLogbookForExpedition(expedition):
|
||||
|
@ -1,6 +1,6 @@
|
||||
{% extends "base.html" %}
|
||||
<!-- templates/logbookentry.html - this text visible because this template has been included -->
|
||||
{% block title %}Logbook {{logbookentry.id}}{% endblock %}
|
||||
{% block title %}Logbook {{logbookentry.expedition.name}}{% endblock %}
|
||||
{% block content %}
|
||||
|
||||
{% block related %}{% endblock %}
|
||||
|
Loading…
Reference in New Issue
Block a user