diff --git a/parsers/logbooks.py b/parsers/logbooks.py index 5ef125e..c30831f 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -395,137 +395,6 @@ def parser_html(year, expedition, txt, seq=""): logentries.append(entrytuple) -# main parser for 1991 - 2001. simpler because the data has been hacked so much to fit it -# def parser_html_01(year, expedition, txt, seq=""): - # global logentries - # global logdataissues - # errorcount = 0 - - # # extract front material and stash for later use when rebuilding from list of entries - # headmatch = re.match(r"(?i)(?s).*
]*>(.*?)]*>(T/?U.*)", triptext) - # if mtu: - # tu = mtu.group(1) - # triptext = triptext[: mtu.start(0)] + triptext[mtu.end() :] - # else: - # tu = "" - - # triptitles = triptitle.split(" - ") - # tripcave = triptitles[0].strip() - - # ltriptext = triptext - - # mtail = re.search(r'(?:[^<]*|\s|/|-|&|?p>|\((?:same day|\d+)\))*$', ltriptext) - # if mtail: - # ltriptext = ltriptext[: mtail.start(0)] - # ltriptext = re.sub(r"
", "", ltriptext) - # ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext) - # ltriptext = re.sub(r"?u>", "_", ltriptext) - # ltriptext = re.sub(r"?i>", "''", ltriptext) - # ltriptext = re.sub(r"?b>", "'''", ltriptext) - # ltriptext = re.sub(r"", "
", ltriptext).strip()
-
- # if ltriptext == "":
- # message = " ! - Zero content for logbook entry!: " + tid
- # DataIssue.objects.create(parser="logbooks", message=message)
- # logdataissues[tid] = message
- # print(message)
-
- # entrytuple = (ldate, tripcave, triptitle, ltriptext, trippeople, expedition, tu, tid)
- # logentries.append(entrytuple)
-
- # except:
- # message = f" ! - Skipping logentry {year} due to exception in: {tid}"
- # DataIssue.objects.create(parser="logbooks", message=message)
- # logdataissues[tid] = message
- # print(message)
- # errorcount += 1
- # raise
- # if errorcount > 5:
- # message = f" !!- TOO MANY ERRORS - aborting at '{tid}' logbook: {year}"
- # DataIssue.objects.create(parser="logbooks", message=message)
- # logdataissues[tid] = message
- # print(message)
- # return
-
-
def parser_blog(year, expedition, txt, sq=""):
"""Parses the format of web pages collected as 'Save As HTML" from the UK Caving blog website.
Note that the entries have dates and authors, but no titles.