From 8ce86aabee672ba56e8ef06207e09edfb6bfae1a Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Sun, 18 Dec 2022 20:36:11 +0000 Subject: [PATCH] strip spaces from titles --- parsers/logbooks.py | 80 ++++++++++++++++++++++----------------------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/parsers/logbooks.py b/parsers/logbooks.py index d4db001..9502147 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -297,52 +297,50 @@ def ParseDate(tripdate, year): logdataissues["tripdate"]=message return datetime.date(1970, 1, 1) -# (2006 - not any more), 2008 - 2009 -def wiki_parser(year, expedition, txt, seq=""): - global logentries - global logdataissues +# # (2006 - not any more), 2008 - 2009 +# def wiki_parser(year, expedition, txt, seq=""): + # global logentries + # global logdataissues - logbook_entry_count = 0 - trippara = re.findall(r"===(.*?)===([\s\S]*?)(?====)", txt) - for triphead, triptext in trippara: - logbook_entry_count += 1 - tid = set_trip_id(year,logbook_entry_count) + # logbook_entry_count = 0 + # trippara = re.findall(r"===(.*?)===([\s\S]*?)(?====)", txt) + # for triphead, triptext in trippara: + # logbook_entry_count += 1 + # tid = set_trip_id(year,logbook_entry_count) - tripheadp = triphead.split("|") - if not (len(tripheadp) == 3): - message = " ! - Bad no of items in tripdate in logbook: " + tripdate + " - " + tripheadp - DataIssue.objects.create(parser='logbooks', message=message) - logdataissues["tripdate"]=message + # tripheadp = triphead.split("|") + # if not (len(tripheadp) == 3): + # message = " ! - Bad no of items in tripdate in logbook: " + tripdate + " - " + tripheadp + # DataIssue.objects.create(parser='logbooks', message=message) + # logdataissues["tripdate"]=message - tripdate, tripplace, trippeople = tripheadp - tripsplace = tripplace.split(" - ") - tripcave = tripsplace[0].strip() - if len(tripsplace) == 1: - tripsplace = tripsplace[0] - else: - tripsplace = tripsplace[1] + # tripdate, tripplace, trippeople = tripheadp + # tripsplace = tripplace.split(" - ") + # tripcave = tripsplace[0].strip() + # if len(tripsplace) == 1: + # tripsplace = tripsplace[0] + # else: + # tripsplace = tripsplace[1] - #tul = re.findall(r"T/?U:?\s*(\d+(?:\.\d*)?|unknown)\s*(hrs|hours)?", triptext) - tul = re.findall(r"T/U:?\s*(\d+[.]?\d*)\s*(hr|hrs|hours)?.*", triptext) - if tul: - tu = tul[0][0] - else: - tu = "" - print(f"! LOGBOOK {year} {logbook_entry_count:2} {len(triptext):4} T/U:{tu} '{tripcave} - {tripsplace}' ") + # #tul = re.findall(r"T/?U:?\s*(\d+(?:\.\d*)?|unknown)\s*(hrs|hours)?", triptext) + # tul = re.findall(r"T/U:?\s*(\d+[.]?\d*)\s*(hr|hrs|hours)?.*", triptext) + # if tul: + # tu = tul[0][0] + # else: + # tu = "" + # print(f"! LOGBOOK {year} {logbook_entry_count:2} {len(triptext):4} T/U:{tu} '{tripcave} - {tripsplace}' ") - ldate = ParseDate(tripdate.strip(), year) - tripid = set_trip_id(year,logbook_entry_count) + # ldate = ParseDate(tripdate.strip(), year) + # tripid = set_trip_id(year,logbook_entry_count) - ltriptext = re.sub(r"\n", "

\n", triptext) - ltriptext = ltriptext.replace("

\n

\n","

\n") - - triptitle = f'{tripcave} - {tripsplace}' - entrytuple = (ldate, tripcave, triptitle, ltriptext, - trippeople, expedition, tu, tripid) - logentries.append(entrytuple) - + # ltriptext = re.sub(r"\n", "

\n", triptext) + # ltriptext = ltriptext.replace("

\n

\n","

\n") + # triptitle = f'{tripcave} - {tripsplace}' + # entrytuple = (ldate, tripcave, triptitle, ltriptext, + # trippeople, expedition, tu, tripid) + # logentries.append(entrytuple) # 2002, 2004 - now def parser_html(year, expedition, txt, seq=""): @@ -411,7 +409,8 @@ def parser_html(year, expedition, txt, seq=""): ltriptext = re.sub(r"

", "", triptext) #ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext) ltriptext = re.sub(r"

", "

", ltriptext).strip() - + + triptitle = triptitle.strip() entrytuple = (ldate, tripcave, triptitle, ltriptext, trippeople, expedition, tu, tripid1) logentries.append(entrytuple) @@ -486,6 +485,7 @@ def parser_html_01(year, expedition, txt, seq=""): print(message) break #print(f" #3 - tid: {tid}") + triptitle = triptitle.strip() ldate = ParseDate(tripdate.strip(), year) #print(f" # - tid: {tid} <{tripdate}> <{triptitle}> <{trippeople}>") #print(f" #4 - tid: {tid}") @@ -699,7 +699,7 @@ def LoadLogbookForExpedition(expedition, clean=True): if check in dupl: dupl[check] += 1 triptitle = f"{triptitle} #{dupl[check]}" - print(f' - {triptitle}') + print(f' - {triptitle} -- {date}') else: dupl[check] = 1 EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, logtime_underground,