From 88833941b5fec8e2dcfb129301455044a33e850b Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Sat, 22 Nov 2025 14:32:16 +0200 Subject: [PATCH] nearly parsed JSON into correct form for database --- parsers/logbooks.py | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/parsers/logbooks.py b/parsers/logbooks.py index e8239a4..bc2fc56 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -523,6 +523,8 @@ def parser_html(year, expedition, txt, seq=""): entry = LogbookEntryData(ldate, place, tripcave, triptitle, tripcontent, trippersons, author, guests, expedition, tu, tid) logentries.append(entry) + if str(ldate) == "2025-07-08": + print(f"PARSED from html\n",entry,"\n") return logentries @@ -656,10 +658,9 @@ def parse_logbook_for_expedition(expedition, blog=False): """ def load_from_json(): entries = [] + logentries = [] for jsonfile in json_entries_dir.rglob("*.json"): with open(jsonfile, 'r', encoding='utf-8') as json_f: - print(f"OPENING {jsonfile}") - message = "" try: entrydict = json.load(json_f) @@ -674,10 +675,30 @@ def parse_logbook_for_expedition(expedition, blog=False): DataIssue.objects.update_or_create(parser="logbooks", message=message, url=jsonurl) return None entries.append(entrydict) + logentries.append(parse_from_dict(entrydict)) check_number_of_entries(entries) # temp check on pre-parsed list - return entries + check_number_of_entries(logentries) + return logentries + + def parse_from_dict(entrydict): + ldate = entrydict["date"] + place = entrydict["place"] + tripcave = entrydict["cave"] + triptitle = entrydict["title"] + tripcontent = entrydict["text"] + trippersons = None + author = entrydict["author"]["slug"] + guests = entrydict["other_people"] + expedition = entrydict["expedition"] + tu = entrydict["time_underground"] + tid = entrydict["slug"] # Is this right ? Or is it the end txt? + + logentry = LogbookEntryData(ldate, place, tripcave, triptitle, tripcontent, trippersons, author, guests, expedition, tu, tid) + if ldate == "2025-07-08": + print(f"PARSED from JSON\n",logentry,"\n") + return logentry def check_number_of_entries(logentries): if logentries: @@ -700,11 +721,11 @@ def parse_logbook_for_expedition(expedition, blog=False): json_entries_dir = settings.EXPOWEB / "years" / year / "log_entries" if json_entries_dir.is_dir(): - print(f" # WARNING year {year} has JSON-encoded logbook entries. Should use these instead of the archive .html file") + print(f" # WARNING year {year} has JSON-encoded logbook entries. Using these instead of the archive .html file.") logentries = load_from_json() logentries = [] - # check_number_of_entries() + # check_number_of_entries(logentries) # return logentries