From e2c1bc3516e994d4ef4b0fa70a0e05365161a724 Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Sat, 22 Nov 2025 13:50:50 +0200 Subject: [PATCH] parsing JSON logentries OK as valid --- core/views/logbooks.py | 36 ++++---------------------- parsers/logbooks.py | 57 +++++++++++++++++++++++++++++++++++------- 2 files changed, 53 insertions(+), 40 deletions(-) diff --git a/core/views/logbooks.py b/core/views/logbooks.py index 562fc7b..0d5bb68 100644 --- a/core/views/logbooks.py +++ b/core/views/logbooks.py @@ -411,9 +411,9 @@ def write_entries(entries, year, editor): raise e def serialize_logentry(le): - # REPLACE this with hand-built serializer which includes .author, .who which were added to the entries but re not in the model Class directly - # see below for Gemini code to do that. Going to bed now. - + """When doing JSON output of objects which have foreign keys to other objects in the database, + we need to use a custom serializer instead of just "json.dump() + """ author_link = PersonLogEntry.objects.select_related('personexpedition').get( logbook_entry=le, is_logbook_entry_author=True @@ -428,11 +428,11 @@ def write_entries(entries, year, editor): for pl in participants_links: participants.append(pl.personexpedition.person) - author_data = model_to_dict(author, fields=['id', 'slug']) + author_data = model_to_dict(author, fields=['id', 'slug', 'nickname_used']) participants_data = [] for p in participants: - participants_data.append(model_to_dict(p, fields=['id', 'slug'])) + participants_data.append(model_to_dict(p, fields=['id', 'slug', 'nickname_used'])) entrydict = model_to_dict(le, fields=('slug', 'date', 'expedition', 'title', 'cave', 'place', 'other_people', 'time_underground', 'text')) entrydict['author'] = author_data @@ -444,7 +444,6 @@ def write_entries(entries, year, editor): for le in entries: filename = f"{le.slug}-{le.pk:03}.json" filepath = dirpath / filename - # description = f" {le.slug} :: {le.date} - {le.title}" ensure_dir_exists(filepath) entrydict = serialize_logentry(le) @@ -456,28 +455,3 @@ def write_entries(entries, year, editor): return True -def export_entry_with_author_details(request, entry_id): - - - # 3. Manually create the nested dictionary structure - # Use model_to_dict for easy extraction of the simple fields - - # Author data (specify fields you want to expose) - author_data = model_to_dict(author, fields=['id', 'first_name', 'last_name', 'email']) - - # Entry data (specify fields you want to expose) - entry_data = model_to_dict(entry, fields=['id', 'title', 'content', 'date_created']) - - # Nest the author data inside the entry data - entry_data['author'] = author_data - - # Add data from the intermediate model if needed (e.g., the date the person was added) - entry_data['author_assignment_date'] = author_link.date_assigned.isoformat() - - - # 4. Return the custom dictionary using JsonResponse - return JsonResponse( - entry_data, - encoder=CustomJSONEncoder, - safe=False # Set to True if entry_data was a list/QuerySet - ) \ No newline at end of file diff --git a/parsers/logbooks.py b/parsers/logbooks.py index aa1b290..e8239a4 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -1,3 +1,4 @@ +import json import os import re import string @@ -653,6 +654,39 @@ def clean_logbook_for_expedition(expedition): def parse_logbook_for_expedition(expedition, blog=False): """Parses all logbook entries for one expedition """ + def load_from_json(): + entries = [] + for jsonfile in json_entries_dir.rglob("*.json"): + with open(jsonfile, 'r', encoding='utf-8') as json_f: + print(f"OPENING {jsonfile}") + + message = "" + try: + entrydict = json.load(json_f) + except FileNotFoundError: + message = f"File {jsonfile} not found!" + except json.JSONDecodeError: + message = f"Invalid JSON format! - JSONDecodeError for {jsonfile}" + except Exception as e: + message = f"! Failed to load {jsonfile} JSON file. Exception <{e}>" + if message: + print(message) + DataIssue.objects.update_or_create(parser="logbooks", message=message, url=jsonurl) + return None + entries.append(entrydict) + + check_number_of_entries(entries) # temp check on pre-parsed list + return entries + + + def check_number_of_entries(logentries): + if logentries: + if len(logentries) == expect: + # print(f"OK {year} {len(logentries):5d} is {expect}\n") + pass + else: + print(f"Mismatch in number of log entries: {year} {len(logentries):5d} is not {expect}\n") + global ENTRIES logentries = [] @@ -662,6 +696,17 @@ def parse_logbook_for_expedition(expedition, blog=False): year = expedition.year expect = ENTRIES[year] # print(" - Logbook for: " + year) + + json_entries_dir = settings.EXPOWEB / "years" / year / "log_entries" + + if json_entries_dir.is_dir(): + print(f" # WARNING year {year} has JSON-encoded logbook entries. Should use these instead of the archive .html file") + logentries = load_from_json() + + logentries = [] + # check_number_of_entries() + # return logentries + if year in LOGBOOK_PARSER_SETTINGS: yearfile, parsefunc = LOGBOOK_PARSER_SETTINGS[year] @@ -675,7 +720,7 @@ def parse_logbook_for_expedition(expedition, blog=False): if blog: if year not in BLOG_PARSER_SETTINGS: - message = f" ! - Expecting blog parser buut none specified for {year}" + message = f" ! - Expecting blog parser but none specified for {year}" DataIssue.objects.create(parser="logbooks", message=message) print(message) else: @@ -711,13 +756,7 @@ def parse_logbook_for_expedition(expedition, blog=False): logentries = parser(year, expedition, txt, sq) # this launches the right parser # -------------------- - if logentries: - if len(logentries) == expect: - # print(f"OK {year} {len(logentries):5d} is {expect}\n") - pass - else: - print(f"Mismatch in number of log entries: {year} {len(logentries):5d} is not {expect}\n") - + check_number_of_entries(logentries) return logentries def _collect_logbook_entries_for_expos(expos, nologbook, ENTRIES, BLOG_PARSER_SETTINGS): @@ -779,7 +818,7 @@ def LoadLogbook(year): def LoadLogbooks(): """This is the master function for parsing all logbooks into the Troggle database. - This should be rewritten to use coroutines to load all logbooks from disc in parallel, + This could be rewritten to use coroutines to load all logbooks from disc in parallel, but must be serialised to write to database as sqlite is single-user. This is inside an atomic transaction. Maybe it shouldn't be..