2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2025-12-15 18:57:13 +00:00

parsing JSON logentries OK as valid

This commit is contained in:
2025-11-22 13:50:50 +02:00
parent 2807ed5c21
commit e2c1bc3516
2 changed files with 53 additions and 40 deletions

View File

@@ -411,9 +411,9 @@ def write_entries(entries, year, editor):
raise e
def serialize_logentry(le):
# REPLACE this with hand-built serializer which includes .author, .who which were added to the entries but re not in the model Class directly
# see below for Gemini code to do that. Going to bed now.
"""When doing JSON output of objects which have foreign keys to other objects in the database,
we need to use a custom serializer instead of just "json.dump()
"""
author_link = PersonLogEntry.objects.select_related('personexpedition').get(
logbook_entry=le,
is_logbook_entry_author=True
@@ -428,11 +428,11 @@ def write_entries(entries, year, editor):
for pl in participants_links:
participants.append(pl.personexpedition.person)
author_data = model_to_dict(author, fields=['id', 'slug'])
author_data = model_to_dict(author, fields=['id', 'slug', 'nickname_used'])
participants_data = []
for p in participants:
participants_data.append(model_to_dict(p, fields=['id', 'slug']))
participants_data.append(model_to_dict(p, fields=['id', 'slug', 'nickname_used']))
entrydict = model_to_dict(le, fields=('slug', 'date', 'expedition', 'title', 'cave', 'place', 'other_people', 'time_underground', 'text'))
entrydict['author'] = author_data
@@ -444,7 +444,6 @@ def write_entries(entries, year, editor):
for le in entries:
filename = f"{le.slug}-{le.pk:03}.json"
filepath = dirpath / filename
# description = f" {le.slug} :: {le.date} - {le.title}"
ensure_dir_exists(filepath)
entrydict = serialize_logentry(le)
@@ -456,28 +455,3 @@ def write_entries(entries, year, editor):
return True
def export_entry_with_author_details(request, entry_id):
# 3. Manually create the nested dictionary structure
# Use model_to_dict for easy extraction of the simple fields
# Author data (specify fields you want to expose)
author_data = model_to_dict(author, fields=['id', 'first_name', 'last_name', 'email'])
# Entry data (specify fields you want to expose)
entry_data = model_to_dict(entry, fields=['id', 'title', 'content', 'date_created'])
# Nest the author data inside the entry data
entry_data['author'] = author_data
# Add data from the intermediate model if needed (e.g., the date the person was added)
entry_data['author_assignment_date'] = author_link.date_assigned.isoformat()
# 4. Return the custom dictionary using JsonResponse
return JsonResponse(
entry_data,
encoder=CustomJSONEncoder,
safe=False # Set to True if entry_data was a list/QuerySet
)

View File

@@ -1,3 +1,4 @@
import json
import os
import re
import string
@@ -653,6 +654,39 @@ def clean_logbook_for_expedition(expedition):
def parse_logbook_for_expedition(expedition, blog=False):
"""Parses all logbook entries for one expedition
"""
def load_from_json():
entries = []
for jsonfile in json_entries_dir.rglob("*.json"):
with open(jsonfile, 'r', encoding='utf-8') as json_f:
print(f"OPENING {jsonfile}")
message = ""
try:
entrydict = json.load(json_f)
except FileNotFoundError:
message = f"File {jsonfile} not found!"
except json.JSONDecodeError:
message = f"Invalid JSON format! - JSONDecodeError for {jsonfile}"
except Exception as e:
message = f"! Failed to load {jsonfile} JSON file. Exception <{e}>"
if message:
print(message)
DataIssue.objects.update_or_create(parser="logbooks", message=message, url=jsonurl)
return None
entries.append(entrydict)
check_number_of_entries(entries) # temp check on pre-parsed list
return entries
def check_number_of_entries(logentries):
if logentries:
if len(logentries) == expect:
# print(f"OK {year} {len(logentries):5d} is {expect}\n")
pass
else:
print(f"Mismatch in number of log entries: {year} {len(logentries):5d} is not {expect}\n")
global ENTRIES
logentries = []
@@ -662,6 +696,17 @@ def parse_logbook_for_expedition(expedition, blog=False):
year = expedition.year
expect = ENTRIES[year]
# print(" - Logbook for: " + year)
json_entries_dir = settings.EXPOWEB / "years" / year / "log_entries"
if json_entries_dir.is_dir():
print(f" # WARNING year {year} has JSON-encoded logbook entries. Should use these instead of the archive .html file")
logentries = load_from_json()
logentries = []
# check_number_of_entries()
# return logentries
if year in LOGBOOK_PARSER_SETTINGS:
yearfile, parsefunc = LOGBOOK_PARSER_SETTINGS[year]
@@ -675,7 +720,7 @@ def parse_logbook_for_expedition(expedition, blog=False):
if blog:
if year not in BLOG_PARSER_SETTINGS:
message = f" ! - Expecting blog parser buut none specified for {year}"
message = f" ! - Expecting blog parser but none specified for {year}"
DataIssue.objects.create(parser="logbooks", message=message)
print(message)
else:
@@ -711,13 +756,7 @@ def parse_logbook_for_expedition(expedition, blog=False):
logentries = parser(year, expedition, txt, sq) # this launches the right parser
# --------------------
if logentries:
if len(logentries) == expect:
# print(f"OK {year} {len(logentries):5d} is {expect}\n")
pass
else:
print(f"Mismatch in number of log entries: {year} {len(logentries):5d} is not {expect}\n")
check_number_of_entries(logentries)
return logentries
def _collect_logbook_entries_for_expos(expos, nologbook, ENTRIES, BLOG_PARSER_SETTINGS):
@@ -779,7 +818,7 @@ def LoadLogbook(year):
def LoadLogbooks():
"""This is the master function for parsing all logbooks into the Troggle database.
This should be rewritten to use coroutines to load all logbooks from disc in parallel,
This could be rewritten to use coroutines to load all logbooks from disc in parallel,
but must be serialised to write to database as sqlite is single-user.
This is inside an atomic transaction. Maybe it shouldn't be..