2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2026-02-08 11:28:23 +00:00

parsing JSON logentries OK as valid

This commit is contained in:
2025-11-22 13:50:50 +02:00
parent 2807ed5c21
commit e2c1bc3516
2 changed files with 53 additions and 40 deletions

View File

@@ -1,3 +1,4 @@
import json
import os
import re
import string
@@ -653,6 +654,39 @@ def clean_logbook_for_expedition(expedition):
def parse_logbook_for_expedition(expedition, blog=False):
"""Parses all logbook entries for one expedition
"""
def load_from_json():
entries = []
for jsonfile in json_entries_dir.rglob("*.json"):
with open(jsonfile, 'r', encoding='utf-8') as json_f:
print(f"OPENING {jsonfile}")
message = ""
try:
entrydict = json.load(json_f)
except FileNotFoundError:
message = f"File {jsonfile} not found!"
except json.JSONDecodeError:
message = f"Invalid JSON format! - JSONDecodeError for {jsonfile}"
except Exception as e:
message = f"! Failed to load {jsonfile} JSON file. Exception <{e}>"
if message:
print(message)
DataIssue.objects.update_or_create(parser="logbooks", message=message, url=jsonurl)
return None
entries.append(entrydict)
check_number_of_entries(entries) # temp check on pre-parsed list
return entries
def check_number_of_entries(logentries):
if logentries:
if len(logentries) == expect:
# print(f"OK {year} {len(logentries):5d} is {expect}\n")
pass
else:
print(f"Mismatch in number of log entries: {year} {len(logentries):5d} is not {expect}\n")
global ENTRIES
logentries = []
@@ -662,6 +696,17 @@ def parse_logbook_for_expedition(expedition, blog=False):
year = expedition.year
expect = ENTRIES[year]
# print(" - Logbook for: " + year)
json_entries_dir = settings.EXPOWEB / "years" / year / "log_entries"
if json_entries_dir.is_dir():
print(f" # WARNING year {year} has JSON-encoded logbook entries. Should use these instead of the archive .html file")
logentries = load_from_json()
logentries = []
# check_number_of_entries()
# return logentries
if year in LOGBOOK_PARSER_SETTINGS:
yearfile, parsefunc = LOGBOOK_PARSER_SETTINGS[year]
@@ -675,7 +720,7 @@ def parse_logbook_for_expedition(expedition, blog=False):
if blog:
if year not in BLOG_PARSER_SETTINGS:
message = f" ! - Expecting blog parser buut none specified for {year}"
message = f" ! - Expecting blog parser but none specified for {year}"
DataIssue.objects.create(parser="logbooks", message=message)
print(message)
else:
@@ -711,13 +756,7 @@ def parse_logbook_for_expedition(expedition, blog=False):
logentries = parser(year, expedition, txt, sq) # this launches the right parser
# --------------------
if logentries:
if len(logentries) == expect:
# print(f"OK {year} {len(logentries):5d} is {expect}\n")
pass
else:
print(f"Mismatch in number of log entries: {year} {len(logentries):5d} is not {expect}\n")
check_number_of_entries(logentries)
return logentries
def _collect_logbook_entries_for_expos(expos, nologbook, ENTRIES, BLOG_PARSER_SETTINGS):
@@ -779,7 +818,7 @@ def LoadLogbook(year):
def LoadLogbooks():
"""This is the master function for parsing all logbooks into the Troggle database.
This should be rewritten to use coroutines to load all logbooks from disc in parallel,
This could be rewritten to use coroutines to load all logbooks from disc in parallel,
but must be serialised to write to database as sqlite is single-user.
This is inside an atomic transaction. Maybe it shouldn't be..