2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2026-01-18 16:42:48 +00:00

nearly parsed JSON into correct form for database

This commit is contained in:
2025-11-22 14:32:16 +02:00
parent e2c1bc3516
commit 88833941b5

View File

@@ -523,6 +523,8 @@ def parser_html(year, expedition, txt, seq=""):
entry = LogbookEntryData(ldate, place, tripcave, triptitle, tripcontent, trippersons, author, guests, expedition, tu, tid)
logentries.append(entry)
if str(ldate) == "2025-07-08":
print(f"PARSED from html\n",entry,"\n")
return logentries
@@ -656,10 +658,9 @@ def parse_logbook_for_expedition(expedition, blog=False):
"""
def load_from_json():
entries = []
logentries = []
for jsonfile in json_entries_dir.rglob("*.json"):
with open(jsonfile, 'r', encoding='utf-8') as json_f:
print(f"OPENING {jsonfile}")
message = ""
try:
entrydict = json.load(json_f)
@@ -674,10 +675,30 @@ def parse_logbook_for_expedition(expedition, blog=False):
DataIssue.objects.update_or_create(parser="logbooks", message=message, url=jsonurl)
return None
entries.append(entrydict)
logentries.append(parse_from_dict(entrydict))
check_number_of_entries(entries) # temp check on pre-parsed list
return entries
check_number_of_entries(logentries)
return logentries
def parse_from_dict(entrydict):
ldate = entrydict["date"]
place = entrydict["place"]
tripcave = entrydict["cave"]
triptitle = entrydict["title"]
tripcontent = entrydict["text"]
trippersons = None
author = entrydict["author"]["slug"]
guests = entrydict["other_people"]
expedition = entrydict["expedition"]
tu = entrydict["time_underground"]
tid = entrydict["slug"] # Is this right ? Or is it the end txt?
logentry = LogbookEntryData(ldate, place, tripcave, triptitle, tripcontent, trippersons, author, guests, expedition, tu, tid)
if ldate == "2025-07-08":
print(f"PARSED from JSON\n",logentry,"\n")
return logentry
def check_number_of_entries(logentries):
if logentries:
@@ -700,11 +721,11 @@ def parse_logbook_for_expedition(expedition, blog=False):
json_entries_dir = settings.EXPOWEB / "years" / year / "log_entries"
if json_entries_dir.is_dir():
print(f" # WARNING year {year} has JSON-encoded logbook entries. Should use these instead of the archive .html file")
print(f" # WARNING year {year} has JSON-encoded logbook entries. Using these instead of the archive .html file.")
logentries = load_from_json()
logentries = []
# check_number_of_entries()
# check_number_of_entries(logentries)
# return logentries