|
|
|
|
@@ -60,8 +60,8 @@ LOGBOOK_PARSER_SETTINGS = {
|
|
|
|
|
LOGBOOKS_DIR = "years" # subfolder of settings.EXPOWEB
|
|
|
|
|
|
|
|
|
|
ENTRIES = {
|
|
|
|
|
"2023": 81,
|
|
|
|
|
"2022": 93,
|
|
|
|
|
"2023": 83,
|
|
|
|
|
"2022": 94,
|
|
|
|
|
"2019": 55,
|
|
|
|
|
"2018": 95,
|
|
|
|
|
"2017": 74,
|
|
|
|
|
@@ -127,7 +127,7 @@ def reset_trip_id(date):
|
|
|
|
|
suffix = alphabet_suffix(n)
|
|
|
|
|
|
|
|
|
|
tid = f"{date}{suffix}"
|
|
|
|
|
# print(tid)
|
|
|
|
|
# print(already, n, tid)
|
|
|
|
|
return tid
|
|
|
|
|
|
|
|
|
|
rx_tripperson = re.compile(r"(?i)<u>(.*?)</u>$")
|
|
|
|
|
@@ -136,6 +136,7 @@ rx_round_bracket = re.compile(r"[\(\[].*?[\)\]]")
|
|
|
|
|
def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
|
|
|
|
|
res = []
|
|
|
|
|
author = None
|
|
|
|
|
guests = []
|
|
|
|
|
# print(f'# {tid}')
|
|
|
|
|
# print(f" - {tid} '{trippeople}' ")
|
|
|
|
|
|
|
|
|
|
@@ -154,11 +155,12 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
|
|
|
|
|
try:
|
|
|
|
|
personyear = GetPersonExpeditionNameLookup(expedition).get(tripperson.lower())
|
|
|
|
|
if not personyear:
|
|
|
|
|
if known_foreigner(tripperson):
|
|
|
|
|
message = f" ! - {expedition.year} Known foreigner: '{tripperson}' in entry {tid=}"
|
|
|
|
|
guests.append(nickname_used)
|
|
|
|
|
if known_foreigner(nickname_used):
|
|
|
|
|
message = f" ! - {expedition.year} Known foreigner: '{nickname_used}' in entry {tid=}"
|
|
|
|
|
print(message)
|
|
|
|
|
else:
|
|
|
|
|
message = f" ! - {expedition.year} No name match for: '{tripperson}' in entry {tid=} for this year."
|
|
|
|
|
message = f" ! - {expedition.year} No name match for: '{nickname_used}' in entry {tid=} for this year."
|
|
|
|
|
print(message)
|
|
|
|
|
DataIssue.objects.create(parser="logbooks", message=message)
|
|
|
|
|
res.append((personyear, nickname_used, logtime_underground))
|
|
|
|
|
@@ -170,10 +172,9 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
|
|
|
|
|
raise
|
|
|
|
|
if author_u:
|
|
|
|
|
author = personyear
|
|
|
|
|
else:
|
|
|
|
|
# a person but with * prefix. Ignored everywhere.
|
|
|
|
|
# print(f" ! - {expedition.year} * person : {tripperson}")
|
|
|
|
|
pass
|
|
|
|
|
else: # *guest
|
|
|
|
|
guests.append(tripperson)
|
|
|
|
|
# print(f" ! - {expedition.year} * GUEST : {tripperson}")
|
|
|
|
|
|
|
|
|
|
if not author:
|
|
|
|
|
if not res:
|
|
|
|
|
@@ -181,7 +182,7 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
|
|
|
|
|
author = res[-1][0] # the previous valid person and a time of 0 hours
|
|
|
|
|
|
|
|
|
|
# print(f" - {tid} [{author.person}] '{res[0][0].person}'...")
|
|
|
|
|
return res, author
|
|
|
|
|
return res, author, guests
|
|
|
|
|
|
|
|
|
|
def tidy_time_underground(logtime_underground):
|
|
|
|
|
# Nasty hack, must tidy this up..
|
|
|
|
|
@@ -202,7 +203,7 @@ def tidy_time_underground(logtime_underground):
|
|
|
|
|
|
|
|
|
|
def tidy_trip_persons(trippeople, title, expedition, logtime_underground, tid):
|
|
|
|
|
try:
|
|
|
|
|
trippersons, author = GetTripPersons(trippeople, expedition, logtime_underground, tid=tid)
|
|
|
|
|
trippersons, author, guests = GetTripPersons(trippeople, expedition, logtime_underground, tid=tid)
|
|
|
|
|
# trippersons is a list of tuples (personyear, nickname_used, logtime_underground)
|
|
|
|
|
except:
|
|
|
|
|
message = f" ! - {expedition.year} Logentry: {title} - GetTripPersons FAIL to recognise nickname"
|
|
|
|
|
@@ -216,7 +217,7 @@ def tidy_trip_persons(trippeople, title, expedition, logtime_underground, tid):
|
|
|
|
|
DataIssue.objects.create(parser="logbooks", message=message)
|
|
|
|
|
print(message)
|
|
|
|
|
|
|
|
|
|
return trippersons, author
|
|
|
|
|
return trippersons, author, guests
|
|
|
|
|
|
|
|
|
|
def tidy_trip_cave(place):
|
|
|
|
|
# GetCaveLookup() need to work better. None of this data is *used* though?
|
|
|
|
|
@@ -251,16 +252,18 @@ def tidy_tid(tid, title):
|
|
|
|
|
tid = str(randint(1000, 9999)) + "_" + slugify(title)[:10].replace("-", "_")
|
|
|
|
|
return tid
|
|
|
|
|
|
|
|
|
|
def store_entry_into_database(date, place, tripcave, title, text, trippersons, author, expedition, logtime_underground, tid):
|
|
|
|
|
def store_entry_into_database(date, place, tripcave, title, text, trippersons, author, guests, expedition, logtime_underground, tid):
|
|
|
|
|
"""saves a single logbook entry and related personlogentry items
|
|
|
|
|
|
|
|
|
|
We could do a bulk update to save all the entries, but then we would need to do a query on
|
|
|
|
|
each one to get the primary key to asign to the PersonLogEntries. So overall probably not much
|
|
|
|
|
each one to get the primary key to assign to the PersonLogEntries. So overall probably not much
|
|
|
|
|
faster ?
|
|
|
|
|
"""
|
|
|
|
|
other_people = ", ".join(guests) # join list members separated by comma
|
|
|
|
|
|
|
|
|
|
nonLookupAttribs = {
|
|
|
|
|
"place": place,
|
|
|
|
|
"other_people": other_people, # *Ol's Mum, foreigners..
|
|
|
|
|
"text": text,
|
|
|
|
|
"expedition": expedition,
|
|
|
|
|
"time_underground": logtime_underground,
|
|
|
|
|
@@ -324,6 +327,17 @@ def parser_date(tripdate, year):
|
|
|
|
|
def parser_html(year, expedition, txt, seq=""):
|
|
|
|
|
"""This uses some of the more obscure capabilities of regular expressions,
|
|
|
|
|
see https://docs.python.org/3/library/re.html
|
|
|
|
|
|
|
|
|
|
e.g.
|
|
|
|
|
* is greedy
|
|
|
|
|
*? is non-greedy
|
|
|
|
|
|
|
|
|
|
(?x) flag means VERBOSE
|
|
|
|
|
|
|
|
|
|
(?: ) non-capturing parentheses
|
|
|
|
|
|
|
|
|
|
\s whitespace
|
|
|
|
|
\S NOT whitespace
|
|
|
|
|
|
|
|
|
|
You can't see it here, but a round-trip export-then-import will move
|
|
|
|
|
the endmatter up to the frontmatter. This made sense when translating
|
|
|
|
|
@@ -357,7 +371,7 @@ def parser_html(year, expedition, txt, seq=""):
|
|
|
|
|
for trippara in tripparas:
|
|
|
|
|
logbook_entry_count += 1
|
|
|
|
|
tid = set_trip_seq_id(year, logbook_entry_count)
|
|
|
|
|
# print(f' - new tid:{tid} lbe count: {logbook_entry_count}')
|
|
|
|
|
# print(f' - new seq tid:{tid} lbe count: {logbook_entry_count}')
|
|
|
|
|
|
|
|
|
|
s = re.match(
|
|
|
|
|
r"""(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)? # second date
|
|
|
|
|
@@ -367,15 +381,17 @@ def parser_html(year, expedition, txt, seq=""):
|
|
|
|
|
\s*<div\s+class="triptitle">\s*(.*?)</div>
|
|
|
|
|
([\s\S]*?)
|
|
|
|
|
\s*(?:<div\s+class="timeug">\s*(.*?)</div>)?
|
|
|
|
|
\s*(?:<div\s+class="editentry"\s*.*?</div>)?
|
|
|
|
|
\s*$
|
|
|
|
|
""",
|
|
|
|
|
trippara,
|
|
|
|
|
)
|
|
|
|
|
if s:
|
|
|
|
|
tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups()
|
|
|
|
|
# print(f"#{logbook_entry_count} {tu} {len(triptext)} ")
|
|
|
|
|
else:
|
|
|
|
|
# if not re.search(r"Rigging Guide", trippara):
|
|
|
|
|
msg = f" !- Logbook. Can't parse entry, skipping:{logbook_entry_count} '{trippara[:55]}'...'{trippara}'"
|
|
|
|
|
msg = f" !- Logbook. Can't parse entry, skipping:{logbook_entry_count} '{trippara[:75]}'..."
|
|
|
|
|
print(msg)
|
|
|
|
|
DataIssue.objects.create(parser="logbooks", message=msg)
|
|
|
|
|
continue
|
|
|
|
|
@@ -403,12 +419,12 @@ def parser_html(year, expedition, txt, seq=""):
|
|
|
|
|
dupl[check] = 1
|
|
|
|
|
|
|
|
|
|
tu = tidy_time_underground(tu)
|
|
|
|
|
trippersons, author = tidy_trip_persons(trippeople, triptitle, expedition, tu, tid)
|
|
|
|
|
trippersons, author, guests = tidy_trip_persons(trippeople, triptitle, expedition, tu, tid)
|
|
|
|
|
tripcave = tidy_trip_cave(place)
|
|
|
|
|
tripcontent = tidy_trip_image_urls(tripcontent, ldate)
|
|
|
|
|
tid = tidy_tid(tid, triptitle)
|
|
|
|
|
|
|
|
|
|
entrytuple = (ldate, place, tripcave, triptitle, tripcontent, trippersons, author, expedition, tu, tid)
|
|
|
|
|
entrytuple = (ldate, place, tripcave, triptitle, tripcontent, trippersons, author, guests, expedition, tu, tid)
|
|
|
|
|
logentries.append(entrytuple)
|
|
|
|
|
return logentries
|
|
|
|
|
|
|
|
|
|
@@ -509,13 +525,13 @@ def parser_blog(year, expedition, txt, sq=""):
|
|
|
|
|
tripcontent = f"\n\n<!-- Content parsed from UK Caving Blog -->\nBlog Author: {trippeople}" + tripcontent
|
|
|
|
|
|
|
|
|
|
logtime_underground = 0
|
|
|
|
|
trippersons, author = tidy_trip_persons(trippeople, triptitle, expedition, logtime_underground, tid)
|
|
|
|
|
trippersons, author, guests = tidy_trip_persons(trippeople, triptitle, expedition, logtime_underground, tid)
|
|
|
|
|
# print(f" - author: {author}")
|
|
|
|
|
tripcave = tidy_trip_cave(place)
|
|
|
|
|
tripcontent = tidy_trip_image_urls(tripcontent, year)
|
|
|
|
|
tid = tidy_tid(tid, triptitle)
|
|
|
|
|
|
|
|
|
|
entrytuple = (tripdate, place, tripcave, triptitle, tripcontent, trippersons, author, expedition, tu, tid)
|
|
|
|
|
entrytuple = (tripdate, place, tripcave, triptitle, tripcontent, trippersons, author, guests, expedition, tu, tid)
|
|
|
|
|
logentries.append(entrytuple)
|
|
|
|
|
return logentries
|
|
|
|
|
|
|
|
|
|
@@ -621,10 +637,10 @@ def LoadLogbook(year):
|
|
|
|
|
f" - Not a year with extant blog entries to import: '{year}' not in BLOG_PARSER_SETTINGS {BLOG_PARSER_SETTINGS}"
|
|
|
|
|
)
|
|
|
|
|
for entrytuple in logentries:
|
|
|
|
|
date, place, tripcave, triptitle, text, trippersons, author, expedition, tu, tid = entrytuple
|
|
|
|
|
date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid = entrytuple
|
|
|
|
|
if expo == expedition: # unneeded check, we zeroed it before filling it
|
|
|
|
|
# print(f" -- {triptitle}")
|
|
|
|
|
store_entry_into_database(date, place, tripcave, triptitle, text, trippersons, author, expedition, tu, tid)
|
|
|
|
|
store_entry_into_database(date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid)
|
|
|
|
|
else:
|
|
|
|
|
print(f" ! unexpected log entry labelled as '{expedition}' {tid}" )
|
|
|
|
|
expo.save() # to save logbook name property
|
|
|
|
|
@@ -708,8 +724,8 @@ def LoadLogbooks():
|
|
|
|
|
# - LogBookEntry (text, who when etc.)
|
|
|
|
|
# - PersonLogEntry (who was on that specific trip mentione din the logbook entry)
|
|
|
|
|
for entrytuple in allentries:
|
|
|
|
|
date, place, tripcave, triptitle, text, trippersons, author, expedition, tu, tid = entrytuple
|
|
|
|
|
store_entry_into_database(date, place, tripcave, triptitle, text, trippersons, author, expedition, tu, tid)
|
|
|
|
|
date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid = entrytuple
|
|
|
|
|
store_entry_into_database(date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid)
|
|
|
|
|
|
|
|
|
|
for expo in expos:
|
|
|
|
|
expo.save() # to save logbook name property
|
|
|
|
|
|