From e2c1bc3516e994d4ef4b0fa70a0e05365161a724 Mon Sep 17 00:00:00 2001
From: Philip Sargent <philip.sargent@gmail.com>
Date: Sat, 22 Nov 2025 13:50:50 +0200
Subject: [PATCH] parsing JSON logentries OK as valid

---
 core/views/logbooks.py | 36 ++++----------------------
 parsers/logbooks.py    | 57 +++++++++++++++++++++++++++++++++++-------
 2 files changed, 53 insertions(+), 40 deletions(-)

diff --git a/core/views/logbooks.py b/core/views/logbooks.py
index 562fc7b..0d5bb68 100644
--- a/core/views/logbooks.py
+++ b/core/views/logbooks.py
@@ -411,9 +411,9 @@ def write_entries(entries, year, editor):
             raise e       
 
     def serialize_logentry(le):
-        # REPLACE this with hand-built serializer which includes .author, .who which were added to the entries but re not in the model Class directly
-        # see below for Gemini code to do that. Going to bed now.
-        
+        """When doing JSON output of objects which have foreign keys to other objects in the database,
+        we need to use a custom serializer instead of just "json.dump()
+        """
         author_link = PersonLogEntry.objects.select_related('personexpedition').get(
             logbook_entry=le, 
             is_logbook_entry_author=True 
@@ -428,11 +428,11 @@ def write_entries(entries, year, editor):
         for pl in participants_links:
             participants.append(pl.personexpedition.person)
             
-        author_data = model_to_dict(author, fields=['id', 'slug'])
+        author_data = model_to_dict(author, fields=['id', 'slug', 'nickname_used'])
         
         participants_data = []
         for p in participants:
-            participants_data.append(model_to_dict(p, fields=['id', 'slug']))
+            participants_data.append(model_to_dict(p, fields=['id', 'slug', 'nickname_used']))
 
         entrydict = model_to_dict(le, fields=('slug', 'date', 'expedition', 'title', 'cave', 'place', 'other_people', 'time_underground', 'text'))
         entrydict['author'] = author_data
@@ -444,7 +444,6 @@ def write_entries(entries, year, editor):
     for le in entries:        
         filename = f"{le.slug}-{le.pk:03}.json"
         filepath = dirpath / filename
-        # description =  f" {le.slug} :: {le.date} - {le.title}"
         ensure_dir_exists(filepath)
 
         entrydict = serialize_logentry(le)
@@ -456,28 +455,3 @@ def write_entries(entries, year, editor):
     return True
     
 
-def export_entry_with_author_details(request, entry_id):
- 
-
-    # 3. Manually create the nested dictionary structure
-    # Use model_to_dict for easy extraction of the simple fields
-    
-    # Author data (specify fields you want to expose)
-    author_data = model_to_dict(author, fields=['id', 'first_name', 'last_name', 'email'])
-    
-    # Entry data (specify fields you want to expose)
-    entry_data = model_to_dict(entry, fields=['id', 'title', 'content', 'date_created'])
-
-    # Nest the author data inside the entry data
-    entry_data['author'] = author_data
-    
-    # Add data from the intermediate model if needed (e.g., the date the person was added)
-    entry_data['author_assignment_date'] = author_link.date_assigned.isoformat()
-
-
-    # 4. Return the custom dictionary using JsonResponse
-    return JsonResponse(
-        entry_data, 
-        encoder=CustomJSONEncoder,
-        safe=False # Set to True if entry_data was a list/QuerySet
-    )
\ No newline at end of file
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index aa1b290..e8239a4 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -1,3 +1,4 @@
+import json
 import os
 import re
 import string
@@ -653,6 +654,39 @@ def clean_logbook_for_expedition(expedition):
 def parse_logbook_for_expedition(expedition, blog=False):
     """Parses all logbook entries for one expedition
     """
+    def load_from_json():
+        entries = []
+        for jsonfile in json_entries_dir.rglob("*.json"):
+            with open(jsonfile, 'r', encoding='utf-8') as json_f:
+                print(f"OPENING {jsonfile}")
+                
+                message = ""
+                try:
+                    entrydict = json.load(json_f)
+                except FileNotFoundError:
+                    message = f"File {jsonfile} not found!"
+                except json.JSONDecodeError:
+                    message = f"Invalid JSON format! - JSONDecodeError for  {jsonfile}"
+                except Exception as e:
+                    message = f"! Failed to load {jsonfile} JSON file. Exception <{e}>"
+                if message:
+                    print(message)
+                    DataIssue.objects.update_or_create(parser="logbooks", message=message, url=jsonurl)
+                    return None  
+                entries.append(entrydict)
+        
+        check_number_of_entries(entries) # temp check on pre-parsed list
+        return entries
+        
+        
+    def check_number_of_entries(logentries):
+        if logentries:
+            if len(logentries) == expect:
+                # print(f"OK  {year} {len(logentries):5d} is {expect}\n")
+                pass
+            else:
+                print(f"Mismatch in number of log entries: {year} {len(logentries):5d} is not {expect}\n")
+
     global ENTRIES
     logentries = [] 
     
@@ -662,6 +696,17 @@ def parse_logbook_for_expedition(expedition, blog=False):
     year = expedition.year
     expect = ENTRIES[year]
     # print(" - Logbook for: " + year)
+    
+    json_entries_dir = settings.EXPOWEB / "years" / year / "log_entries"
+
+    if json_entries_dir.is_dir():
+        print(f" # WARNING year {year} has JSON-encoded logbook entries. Should use these instead of the archive .html file")
+        logentries = load_from_json()
+        
+        logentries = []
+        # check_number_of_entries()
+        # return logentries
+
 
     if year in LOGBOOK_PARSER_SETTINGS:
         yearfile, parsefunc = LOGBOOK_PARSER_SETTINGS[year]
@@ -675,7 +720,7 @@ def parse_logbook_for_expedition(expedition, blog=False):
         
     if blog:
         if year not in  BLOG_PARSER_SETTINGS:
-            message = f" ! - Expecting blog parser buut none specified for {year}"
+            message = f" ! - Expecting blog parser but none specified for {year}"
             DataIssue.objects.create(parser="logbooks", message=message)
             print(message)
         else:
@@ -711,13 +756,7 @@ def parse_logbook_for_expedition(expedition, blog=False):
             logentries = parser(year, expedition, txt, sq)  # this launches the right parser
             # --------------------
 
-    if logentries:
-        if len(logentries) == expect:
-            # print(f"OK  {year} {len(logentries):5d} is {expect}\n")
-            pass
-        else:
-            print(f"Mismatch in number of log entries: {year} {len(logentries):5d} is not {expect}\n")
-
+    check_number_of_entries(logentries)
     return logentries
 
 def _collect_logbook_entries_for_expos(expos, nologbook, ENTRIES, BLOG_PARSER_SETTINGS):
@@ -779,7 +818,7 @@ def LoadLogbook(year):
     
 def LoadLogbooks():
     """This is the master function for parsing all logbooks into the Troggle database.
-    This should be rewritten to use coroutines to load all logbooks from disc in parallel,
+    This could be rewritten to use coroutines to load all logbooks from disc in parallel,
     but must be serialised to write to database as sqlite is single-user.
     
     This is inside an atomic transaction. Maybe it shouldn't be..