parsing JSON logentries OK as valid

2026-02-08 09:26:47 +00:00 · 2025-11-22 13:50:50 +02:00
parent 2807ed5c21
commit e2c1bc3516
2 changed files with 53 additions and 40 deletions
--- a/core/views/logbooks.py
+++ b/core/views/logbooks.py
@@ -411,9 +411,9 @@ def write_entries(entries, year, editor):
            raise e       
    def serialize_logentry(le):
-        # REPLACE this with hand-built serializer which includes .author, .who which were added to the entries but re not in the model Class directly
+        """When doing JSON output of objects which have foreign keys to other objects in the database,
-        # see below for Gemini code to do that. Going to bed now.
+        we need to use a custom serializer instead of just "json.dump()
-        
+        """
        author_link = PersonLogEntry.objects.select_related('personexpedition').get(
            logbook_entry=le, 
            is_logbook_entry_author=True 
@@ -428,11 +428,11 @@ def write_entries(entries, year, editor):
        for pl in participants_links:
            participants.append(pl.personexpedition.person)
-        author_data = model_to_dict(author, fields=['id', 'slug'])
+        author_data = model_to_dict(author, fields=['id', 'slug', 'nickname_used'])
        participants_data = []
        for p in participants:
-            participants_data.append(model_to_dict(p, fields=['id', 'slug']))
+            participants_data.append(model_to_dict(p, fields=['id', 'slug', 'nickname_used']))
        entrydict = model_to_dict(le, fields=('slug', 'date', 'expedition', 'title', 'cave', 'place', 'other_people', 'time_underground', 'text'))
        entrydict['author'] = author_data
@@ -444,7 +444,6 @@ def write_entries(entries, year, editor):
    for le in entries:        
        filename = f"{le.slug}-{le.pk:03}.json"
        filepath = dirpath / filename
        # description =  f" {le.slug} :: {le.date} - {le.title}"
        ensure_dir_exists(filepath)
        entrydict = serialize_logentry(le)
@@ -456,28 +455,3 @@ def write_entries(entries, year, editor):
    return True
 def export_entry_with_author_details(request, entry_id):
    # 3. Manually create the nested dictionary structure
    # Use model_to_dict for easy extraction of the simple fields
    # Author data (specify fields you want to expose)
    author_data = model_to_dict(author, fields=['id', 'first_name', 'last_name', 'email'])
    # Entry data (specify fields you want to expose)
    entry_data = model_to_dict(entry, fields=['id', 'title', 'content', 'date_created'])
    # Nest the author data inside the entry data
    entry_data['author'] = author_data
    # Add data from the intermediate model if needed (e.g., the date the person was added)
    entry_data['author_assignment_date'] = author_link.date_assigned.isoformat()
    # 4. Return the custom dictionary using JsonResponse
    return JsonResponse(
        entry_data, 
        encoder=CustomJSONEncoder,
        safe=False # Set to True if entry_data was a list/QuerySet
    )
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -1,3 +1,4 @@
 import json
 import os
 import re
 import string
@@ -653,6 +654,39 @@ def clean_logbook_for_expedition(expedition):
 def parse_logbook_for_expedition(expedition, blog=False):
    """Parses all logbook entries for one expedition
    """
    def load_from_json():
        entries = []
        for jsonfile in json_entries_dir.rglob("*.json"):
            with open(jsonfile, 'r', encoding='utf-8') as json_f:
                print(f"OPENING {jsonfile}")
                message = ""
                try:
                    entrydict = json.load(json_f)
                except FileNotFoundError:
                    message = f"File {jsonfile} not found!"
                except json.JSONDecodeError:
                    message = f"Invalid JSON format! - JSONDecodeError for  {jsonfile}"
                except Exception as e:
                    message = f"! Failed to load {jsonfile} JSON file. Exception <{e}>"
                if message:
                    print(message)
                    DataIssue.objects.update_or_create(parser="logbooks", message=message, url=jsonurl)
                    return None  
                entries.append(entrydict)
        check_number_of_entries(entries) # temp check on pre-parsed list
        return entries
    def check_number_of_entries(logentries):
        if logentries:
            if len(logentries) == expect:
                # print(f"OK  {year} {len(logentries):5d} is {expect}\n")
                pass
            else:
                print(f"Mismatch in number of log entries: {year} {len(logentries):5d} is not {expect}\n")
    global ENTRIES
    logentries = [] 
@@ -662,6 +696,17 @@ def parse_logbook_for_expedition(expedition, blog=False):
    year = expedition.year
    expect = ENTRIES[year]
    # print(" - Logbook for: " + year)
    json_entries_dir = settings.EXPOWEB / "years" / year / "log_entries"
    if json_entries_dir.is_dir():
        print(f" # WARNING year {year} has JSON-encoded logbook entries. Should use these instead of the archive .html file")
        logentries = load_from_json()
        logentries = []
        # check_number_of_entries()
        # return logentries
    if year in LOGBOOK_PARSER_SETTINGS:
        yearfile, parsefunc = LOGBOOK_PARSER_SETTINGS[year]
@@ -675,7 +720,7 @@ def parse_logbook_for_expedition(expedition, blog=False):
    if blog:
        if year not in  BLOG_PARSER_SETTINGS:
-            message = f" ! - Expecting blog parser buut none specified for {year}"
+            message = f" ! - Expecting blog parser but none specified for {year}"
            DataIssue.objects.create(parser="logbooks", message=message)
            print(message)
        else:
@@ -711,13 +756,7 @@ def parse_logbook_for_expedition(expedition, blog=False):
            logentries = parser(year, expedition, txt, sq)  # this launches the right parser
            # --------------------
-    if logentries:
+    check_number_of_entries(logentries)
        if len(logentries) == expect:
            # print(f"OK  {year} {len(logentries):5d} is {expect}\n")
            pass
        else:
            print(f"Mismatch in number of log entries: {year} {len(logentries):5d} is not {expect}\n")
    return logentries
 def _collect_logbook_entries_for_expos(expos, nologbook, ENTRIES, BLOG_PARSER_SETTINGS):
@@ -779,7 +818,7 @@ def LoadLogbook(year):
 def LoadLogbooks():
    """This is the master function for parsing all logbooks into the Troggle database.
-    This should be rewritten to use coroutines to load all logbooks from disc in parallel,
+    This could be rewritten to use coroutines to load all logbooks from disc in parallel,
    but must be serialised to write to database as sqlite is single-user.
    This is inside an atomic transaction. Maybe it shouldn't be..