2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2025-12-17 18:27:08 +00:00

refactored to use a dataclass not a tuple

This commit is contained in:
2025-09-21 19:33:20 +03:00
parent 5b129fee8f
commit 64419ffb7c

View File

@@ -3,6 +3,7 @@ import re
import string
import sys
import time
from dataclasses import dataclass
from datetime import date, datetime
from pathlib import Path
from random import randint
@@ -11,6 +12,7 @@ from django.conf import settings
from django.template.defaultfilters import slugify
from parsers.people import GetPersonExpeditionNameLookup, known_foreigner, load_people_expos
from typing import Any, List, Tuple
from troggle.core.models.caves import GetCaveLookup
from troggle.core.models.logbooks import LogbookEntry, PersonLogEntry
from troggle.core.models.troggle import DataIssue, Expedition
@@ -40,6 +42,23 @@ e.g. cave descriptions
txt = file_in.read().decode("latin1")
"""
@dataclass
class LogbookEntryData:
# All dataclass fields have a type annotation, by definition.
# Fields with no type annotation are not dataclass fields; they're class attributes.
tripdate: date
place: str
tripcave: Any
triptitle: str
text: str
trippersons: List[Tuple[Any, str, float]] # adjust types as needed
author: Any
guests: List[str]
expedition: Any
tu: float # time underground, not actually used anywhere
tid: str
MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200
BLOG_PARSER_SETTINGS = { # no default, must be explicit
# "2023": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html
@@ -283,38 +302,44 @@ def tidy_tid(tid, title, date):
tid = str(randint(1000, 9999)) + "_" + slugify(title)[:10].replace("-", "_")
return tid
def store_entry_into_database(date, place, tripcave, title, text, trippersons, author, guests, expedition, logtime_underground, tid):
def store_entry_into_database(entry):
"""saves a single logbook entry and related personlogentry items
We could do a bulk update to save all the entries, but then we would need to do a query on
each one to get the primary key to assign to the PersonLogEntries. So overall probably not much
faster ?
"""
other_people = ", ".join(guests) # join list members separated by comma
other_people = ", ".join(entry.guests) # join list members separated by comma
# if guests:
# print(f" {date} - {guests}")
otherAttribs = {
"place": place,
"place": entry.place,
"other_people": other_people, # *Ol's Mum, foreigners..
"text": text,
"expedition": expedition,
"time_underground": logtime_underground,
"cave": tripcave,
"text": entry.text,
"expedition": entry.expedition,
"time_underground": entry.tu,
"cave": entry.tripcave,
}
coUniqueAttribs = {"slug": tid, "date": date, "title": title}
if LogbookEntry.objects.filter(slug=tid).exists():
coUniqueAttribs = {"slug": entry.tid, "date": entry.tripdate, "title": entry.triptitle}
if LogbookEntry.objects.filter(slug=entry.tid).exists():
# oops. Our code should already have ensured this is unique.
message = " ! - DUPLICATE SLUG for logbook entry " + tripdate + " - " + slug
message = " ! - DUPLICATE SLUG for logbook entry " + entry.tripdate + " - " + entry.tid
DataIssue.objects.create(parser="logbooks", message=message)
slug = slug + "_" + unique_slug(text,2)
lbo = LogbookEntry.objects.create(**otherAttribs, **coUniqueAttribs)
pt_list = []
for tripperson, nickname_used, time_underground in trippersons:
coUniqueAttribs = {"personexpedition": tripperson, "nickname_used": nickname_used, "logbook_entry": lbo} # lbo is primary key
otherAttribs = {"time_underground": time_underground, "is_logbook_entry_author": (tripperson == author)}
for tripperson, nickname_used, time_underground in entry.trippersons:
coUniqueAttribs = {
"personexpedition": tripperson,
"nickname_used": nickname_used,
"logbook_entry": lbo
} # lbo is primary key
otherAttribs = {
"time_underground": time_underground,
"is_logbook_entry_author": (tripperson == entry.author)}
pt_list.append(PersonLogEntry(**otherAttribs, **coUniqueAttribs))
PersonLogEntry.objects.bulk_create(pt_list)
@@ -485,8 +510,8 @@ def parser_html(year, expedition, txt, seq=""):
tripcontent = tidy_trip_image_urls(tripcontent, ldate)
tid = tidy_tid(tid, triptitle, lgdate)
entrytuple = (ldate, place, tripcave, triptitle, tripcontent, trippersons, author, guests, expedition, tu, tid)
logentries.append(entrytuple)
entry = LogbookEntryData(ldate, place, tripcave, triptitle, tripcontent, trippersons, author, guests, expedition, tu, tid)
logentries.append(entry)
return logentries
@@ -592,8 +617,8 @@ def parser_blog(year, expedition, txt, sq=""):
tripcontent = tidy_trip_image_urls(tripcontent, year)
tid = tidy_tid(tid, triptitle, datestamp)
entrytuple = (tripdate, place, tripcave, triptitle, tripcontent, trippersons, author, guests, expedition, tu, tid)
logentries.append(entrytuple)
entry = LogbookEntryData(tripdate, place, tripcave, triptitle, tripcontent, trippersons, author, guests, expedition, tu, tid)
logentries.append(entry)
return logentries
def clean_all_logbooks():
@@ -704,13 +729,13 @@ def LoadLogbook(year):
print(
f" - Not a year with extant blog entries to import: '{year}' not in BLOG_PARSER_SETTINGS {BLOG_PARSER_SETTINGS}"
)
for entrytuple in logentries:
date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid = entrytuple
if expo == expedition: # unneeded check, we zeroed it before filling it
for entry in logentries:
#date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid = entrytuple
if expo == entry.expedition: # unneeded check, we zeroed it before filling it
# print(f" -- {triptitle}")
store_entry_into_database(date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid)
store_entry_into_database(entry)
else:
print(f" ! unexpected log entry labelled as '{expedition}' {tid}" )
print(f" ! unexpected log entry labelled as '{entry.expedition}' {entry.tid}" )
expo.save() # to save logbook name property
def LoadLogbooks():
@@ -792,9 +817,9 @@ def LoadLogbooks():
# - Expedition (the 'logbook.html' value)
# - LogBookEntry (text, who when etc.)
# - PersonLogEntry (who was on that specific trip mentione din the logbook entry)
for entrytuple in allentries:
date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid = entrytuple
store_entry_into_database(date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid)
for entry in allentries:
# date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid = entrytuple
store_entry_into_database(entry)
for expo in expos:
expo.save() # to save logbook name property