parsing JSON logentries OK as valid

2026-02-08 06:08:21 +00:00 · 2025-11-22 13:50:50 +02:00
parent 2807ed5c21
commit e2c1bc3516
2 changed files with 53 additions and 40 deletions
--- a/core/views/logbooks.py
+++ b/core/views/logbooks.py
@@ -411,9 +411,9 @@ def write_entries(entries, year, editor):
            raise e       

    def serialize_logentry(le):
-        # REPLACE this with hand-built serializer which includes .author, .who which were added to the entries but re not in the model Class directly
-        # see below for Gemini code to do that. Going to bed now.
-        
+        """When doing JSON output of objects which have foreign keys to other objects in the database,
+        we need to use a custom serializer instead of just "json.dump()
+        """
        author_link = PersonLogEntry.objects.select_related('personexpedition').get(
            logbook_entry=le, 
            is_logbook_entry_author=True 
@@ -428,11 +428,11 @@ def write_entries(entries, year, editor):
        for pl in participants_links:
            participants.append(pl.personexpedition.person)
            
-        author_data = model_to_dict(author, fields=['id', 'slug'])
+        author_data = model_to_dict(author, fields=['id', 'slug', 'nickname_used'])
        
        participants_data = []
        for p in participants:
-            participants_data.append(model_to_dict(p, fields=['id', 'slug']))
+            participants_data.append(model_to_dict(p, fields=['id', 'slug', 'nickname_used']))

        entrydict = model_to_dict(le, fields=('slug', 'date', 'expedition', 'title', 'cave', 'place', 'other_people', 'time_underground', 'text'))
        entrydict['author'] = author_data
@@ -444,7 +444,6 @@ def write_entries(entries, year, editor):
    for le in entries:        
        filename = f"{le.slug}-{le.pk:03}.json"
        filepath = dirpath / filename
-        # description =  f" {le.slug} :: {le.date} - {le.title}"
        ensure_dir_exists(filepath)

        entrydict = serialize_logentry(le)
@@ -456,28 +455,3 @@ def write_entries(entries, year, editor):
    return True
    

-def export_entry_with_author_details(request, entry_id):
- 
-
-    # 3. Manually create the nested dictionary structure
-    # Use model_to_dict for easy extraction of the simple fields
-    
-    # Author data (specify fields you want to expose)
-    author_data = model_to_dict(author, fields=['id', 'first_name', 'last_name', 'email'])
-    
-    # Entry data (specify fields you want to expose)
-    entry_data = model_to_dict(entry, fields=['id', 'title', 'content', 'date_created'])
-
-    # Nest the author data inside the entry data
-    entry_data['author'] = author_data
-    
-    # Add data from the intermediate model if needed (e.g., the date the person was added)
-    entry_data['author_assignment_date'] = author_link.date_assigned.isoformat()
-
-
-    # 4. Return the custom dictionary using JsonResponse
-    return JsonResponse(
-        entry_data, 
-        encoder=CustomJSONEncoder,
-        safe=False # Set to True if entry_data was a list/QuerySet
-    )
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -1,3 +1,4 @@
+import json
 import os
 import re
 import string
@@ -653,6 +654,39 @@ def clean_logbook_for_expedition(expedition):
 def parse_logbook_for_expedition(expedition, blog=False):
    """Parses all logbook entries for one expedition
    """
+    def load_from_json():
+        entries = []
+        for jsonfile in json_entries_dir.rglob("*.json"):
+            with open(jsonfile, 'r', encoding='utf-8') as json_f:
+                print(f"OPENING {jsonfile}")
+                
+                message = ""
+                try:
+                    entrydict = json.load(json_f)
+                except FileNotFoundError:
+                    message = f"File {jsonfile} not found!"
+                except json.JSONDecodeError:
+                    message = f"Invalid JSON format! - JSONDecodeError for  {jsonfile}"
+                except Exception as e:
+                    message = f"! Failed to load {jsonfile} JSON file. Exception <{e}>"
+                if message:
+                    print(message)
+                    DataIssue.objects.update_or_create(parser="logbooks", message=message, url=jsonurl)
+                    return None  
+                entries.append(entrydict)
+        
+        check_number_of_entries(entries) # temp check on pre-parsed list
+        return entries
+        
+        
+    def check_number_of_entries(logentries):
+        if logentries:
+            if len(logentries) == expect:
+                # print(f"OK  {year} {len(logentries):5d} is {expect}\n")
+                pass
+            else:
+                print(f"Mismatch in number of log entries: {year} {len(logentries):5d} is not {expect}\n")
+
    global ENTRIES
    logentries = [] 
    
@@ -662,6 +696,17 @@ def parse_logbook_for_expedition(expedition, blog=False):
    year = expedition.year
    expect = ENTRIES[year]
    # print(" - Logbook for: " + year)
+    
+    json_entries_dir = settings.EXPOWEB / "years" / year / "log_entries"
+
+    if json_entries_dir.is_dir():
+        print(f" # WARNING year {year} has JSON-encoded logbook entries. Should use these instead of the archive .html file")
+        logentries = load_from_json()
+        
+        logentries = []
+        # check_number_of_entries()
+        # return logentries
+

    if year in LOGBOOK_PARSER_SETTINGS:
        yearfile, parsefunc = LOGBOOK_PARSER_SETTINGS[year]
@@ -675,7 +720,7 @@ def parse_logbook_for_expedition(expedition, blog=False):
        
    if blog:
        if year not in  BLOG_PARSER_SETTINGS:
-            message = f" ! - Expecting blog parser buut none specified for {year}"
+            message = f" ! - Expecting blog parser but none specified for {year}"
            DataIssue.objects.create(parser="logbooks", message=message)
            print(message)
        else:
@@ -711,13 +756,7 @@ def parse_logbook_for_expedition(expedition, blog=False):
            logentries = parser(year, expedition, txt, sq)  # this launches the right parser
            # --------------------

-    if logentries:
-        if len(logentries) == expect:
-            # print(f"OK  {year} {len(logentries):5d} is {expect}\n")
-            pass
-        else:
-            print(f"Mismatch in number of log entries: {year} {len(logentries):5d} is not {expect}\n")
-
+    check_number_of_entries(logentries)
    return logentries

 def _collect_logbook_entries_for_expos(expos, nologbook, ENTRIES, BLOG_PARSER_SETTINGS):
@@ -779,7 +818,7 @@ def LoadLogbook(year):
    
 def LoadLogbooks():
    """This is the master function for parsing all logbooks into the Troggle database.
-    This should be rewritten to use coroutines to load all logbooks from disc in parallel,
+    This could be rewritten to use coroutines to load all logbooks from disc in parallel,
    but must be serialised to write to database as sqlite is single-user.
    
    This is inside an atomic transaction. Maybe it shouldn't be..