strip spaces from titles

This commit is contained in:
Philip Sargent 2022-12-18 20:36:11 +00:00
parent d1b94763b4
commit 8ce86aabee

View File

@ -297,52 +297,50 @@ def ParseDate(tripdate, year):
logdataissues["tripdate"]=message logdataissues["tripdate"]=message
return datetime.date(1970, 1, 1) return datetime.date(1970, 1, 1)
# (2006 - not any more), 2008 - 2009 # # (2006 - not any more), 2008 - 2009
def wiki_parser(year, expedition, txt, seq=""): # def wiki_parser(year, expedition, txt, seq=""):
global logentries # global logentries
global logdataissues # global logdataissues
logbook_entry_count = 0 # logbook_entry_count = 0
trippara = re.findall(r"===(.*?)===([\s\S]*?)(?====)", txt) # trippara = re.findall(r"===(.*?)===([\s\S]*?)(?====)", txt)
for triphead, triptext in trippara: # for triphead, triptext in trippara:
logbook_entry_count += 1 # logbook_entry_count += 1
tid = set_trip_id(year,logbook_entry_count) # tid = set_trip_id(year,logbook_entry_count)
tripheadp = triphead.split("|") # tripheadp = triphead.split("|")
if not (len(tripheadp) == 3): # if not (len(tripheadp) == 3):
message = " ! - Bad no of items in tripdate in logbook: " + tripdate + " - " + tripheadp # message = " ! - Bad no of items in tripdate in logbook: " + tripdate + " - " + tripheadp
DataIssue.objects.create(parser='logbooks', message=message) # DataIssue.objects.create(parser='logbooks', message=message)
logdataissues["tripdate"]=message # logdataissues["tripdate"]=message
tripdate, tripplace, trippeople = tripheadp # tripdate, tripplace, trippeople = tripheadp
tripsplace = tripplace.split(" - ") # tripsplace = tripplace.split(" - ")
tripcave = tripsplace[0].strip() # tripcave = tripsplace[0].strip()
if len(tripsplace) == 1: # if len(tripsplace) == 1:
tripsplace = tripsplace[0] # tripsplace = tripsplace[0]
else: # else:
tripsplace = tripsplace[1] # tripsplace = tripsplace[1]
#tul = re.findall(r"T/?U:?\s*(\d+(?:\.\d*)?|unknown)\s*(hrs|hours)?", triptext) # #tul = re.findall(r"T/?U:?\s*(\d+(?:\.\d*)?|unknown)\s*(hrs|hours)?", triptext)
tul = re.findall(r"T/U:?\s*(\d+[.]?\d*)\s*(hr|hrs|hours)?.*", triptext) # tul = re.findall(r"T/U:?\s*(\d+[.]?\d*)\s*(hr|hrs|hours)?.*", triptext)
if tul: # if tul:
tu = tul[0][0] # tu = tul[0][0]
else: # else:
tu = "" # tu = ""
print(f"! LOGBOOK {year} {logbook_entry_count:2} {len(triptext):4} T/U:{tu} '{tripcave} - {tripsplace}' ") # print(f"! LOGBOOK {year} {logbook_entry_count:2} {len(triptext):4} T/U:{tu} '{tripcave} - {tripsplace}' ")
ldate = ParseDate(tripdate.strip(), year) # ldate = ParseDate(tripdate.strip(), year)
tripid = set_trip_id(year,logbook_entry_count) # tripid = set_trip_id(year,logbook_entry_count)
ltriptext = re.sub(r"\n", "<br /><br />\n", triptext) # ltriptext = re.sub(r"\n", "<br /><br />\n", triptext)
ltriptext = ltriptext.replace("<br /><br />\n<br /><br />\n","<br /><br />\n") # ltriptext = ltriptext.replace("<br /><br />\n<br /><br />\n","<br /><br />\n")
triptitle = f'{tripcave} - {tripsplace}'
entrytuple = (ldate, tripcave, triptitle, ltriptext,
trippeople, expedition, tu, tripid)
logentries.append(entrytuple)
# triptitle = f'{tripcave} - {tripsplace}'
# entrytuple = (ldate, tripcave, triptitle, ltriptext,
# trippeople, expedition, tu, tripid)
# logentries.append(entrytuple)
# 2002, 2004 - now # 2002, 2004 - now
def parser_html(year, expedition, txt, seq=""): def parser_html(year, expedition, txt, seq=""):
@ -411,7 +409,8 @@ def parser_html(year, expedition, txt, seq=""):
ltriptext = re.sub(r"</p>", "", triptext) ltriptext = re.sub(r"</p>", "", triptext)
#ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext) #ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext)
ltriptext = re.sub(r"<p>", "<br /><br />", ltriptext).strip() ltriptext = re.sub(r"<p>", "<br /><br />", ltriptext).strip()
triptitle = triptitle.strip()
entrytuple = (ldate, tripcave, triptitle, ltriptext, entrytuple = (ldate, tripcave, triptitle, ltriptext,
trippeople, expedition, tu, tripid1) trippeople, expedition, tu, tripid1)
logentries.append(entrytuple) logentries.append(entrytuple)
@ -486,6 +485,7 @@ def parser_html_01(year, expedition, txt, seq=""):
print(message) print(message)
break break
#print(f" #3 - tid: {tid}") #print(f" #3 - tid: {tid}")
triptitle = triptitle.strip()
ldate = ParseDate(tripdate.strip(), year) ldate = ParseDate(tripdate.strip(), year)
#print(f" # - tid: {tid} <{tripdate}> <{triptitle}> <{trippeople}>") #print(f" # - tid: {tid} <{tripdate}> <{triptitle}> <{trippeople}>")
#print(f" #4 - tid: {tid}") #print(f" #4 - tid: {tid}")
@ -699,7 +699,7 @@ def LoadLogbookForExpedition(expedition, clean=True):
if check in dupl: if check in dupl:
dupl[check] += 1 dupl[check] += 1
triptitle = f"{triptitle} #{dupl[check]}" triptitle = f"{triptitle} #{dupl[check]}"
print(f' - {triptitle}') print(f' - {triptitle} -- {date}')
else: else:
dupl[check] = 1 dupl[check] = 1
EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, logtime_underground,