2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2026-02-08 14:37:53 +00:00

refactored to use a dataclass not a tuple

This commit is contained in:
2025-09-21 19:33:20 +03:00
parent 5b129fee8f
commit 64419ffb7c

View File

@@ -3,6 +3,7 @@ import re
import string import string
import sys import sys
import time import time
from dataclasses import dataclass
from datetime import date, datetime from datetime import date, datetime
from pathlib import Path from pathlib import Path
from random import randint from random import randint
@@ -11,6 +12,7 @@ from django.conf import settings
from django.template.defaultfilters import slugify from django.template.defaultfilters import slugify
from parsers.people import GetPersonExpeditionNameLookup, known_foreigner, load_people_expos from parsers.people import GetPersonExpeditionNameLookup, known_foreigner, load_people_expos
from typing import Any, List, Tuple
from troggle.core.models.caves import GetCaveLookup from troggle.core.models.caves import GetCaveLookup
from troggle.core.models.logbooks import LogbookEntry, PersonLogEntry from troggle.core.models.logbooks import LogbookEntry, PersonLogEntry
from troggle.core.models.troggle import DataIssue, Expedition from troggle.core.models.troggle import DataIssue, Expedition
@@ -40,6 +42,23 @@ e.g. cave descriptions
txt = file_in.read().decode("latin1") txt = file_in.read().decode("latin1")
""" """
@dataclass
class LogbookEntryData:
# All dataclass fields have a type annotation, by definition.
# Fields with no type annotation are not dataclass fields; they're class attributes.
tripdate: date
place: str
tripcave: Any
triptitle: str
text: str
trippersons: List[Tuple[Any, str, float]] # adjust types as needed
author: Any
guests: List[str]
expedition: Any
tu: float # time underground, not actually used anywhere
tid: str
MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200 MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200
BLOG_PARSER_SETTINGS = { # no default, must be explicit BLOG_PARSER_SETTINGS = { # no default, must be explicit
# "2023": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html # "2023": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html
@@ -283,38 +302,44 @@ def tidy_tid(tid, title, date):
tid = str(randint(1000, 9999)) + "_" + slugify(title)[:10].replace("-", "_") tid = str(randint(1000, 9999)) + "_" + slugify(title)[:10].replace("-", "_")
return tid return tid
def store_entry_into_database(date, place, tripcave, title, text, trippersons, author, guests, expedition, logtime_underground, tid): def store_entry_into_database(entry):
"""saves a single logbook entry and related personlogentry items """saves a single logbook entry and related personlogentry items
We could do a bulk update to save all the entries, but then we would need to do a query on We could do a bulk update to save all the entries, but then we would need to do a query on
each one to get the primary key to assign to the PersonLogEntries. So overall probably not much each one to get the primary key to assign to the PersonLogEntries. So overall probably not much
faster ? faster ?
""" """
other_people = ", ".join(guests) # join list members separated by comma other_people = ", ".join(entry.guests) # join list members separated by comma
# if guests: # if guests:
# print(f" {date} - {guests}") # print(f" {date} - {guests}")
otherAttribs = { otherAttribs = {
"place": place, "place": entry.place,
"other_people": other_people, # *Ol's Mum, foreigners.. "other_people": other_people, # *Ol's Mum, foreigners..
"text": text, "text": entry.text,
"expedition": expedition, "expedition": entry.expedition,
"time_underground": logtime_underground, "time_underground": entry.tu,
"cave": tripcave, "cave": entry.tripcave,
} }
coUniqueAttribs = {"slug": tid, "date": date, "title": title} coUniqueAttribs = {"slug": entry.tid, "date": entry.tripdate, "title": entry.triptitle}
if LogbookEntry.objects.filter(slug=tid).exists(): if LogbookEntry.objects.filter(slug=entry.tid).exists():
# oops. Our code should already have ensured this is unique. # oops. Our code should already have ensured this is unique.
message = " ! - DUPLICATE SLUG for logbook entry " + tripdate + " - " + slug message = " ! - DUPLICATE SLUG for logbook entry " + entry.tripdate + " - " + entry.tid
DataIssue.objects.create(parser="logbooks", message=message) DataIssue.objects.create(parser="logbooks", message=message)
slug = slug + "_" + unique_slug(text,2) slug = slug + "_" + unique_slug(text,2)
lbo = LogbookEntry.objects.create(**otherAttribs, **coUniqueAttribs) lbo = LogbookEntry.objects.create(**otherAttribs, **coUniqueAttribs)
pt_list = [] pt_list = []
for tripperson, nickname_used, time_underground in trippersons: for tripperson, nickname_used, time_underground in entry.trippersons:
coUniqueAttribs = {"personexpedition": tripperson, "nickname_used": nickname_used, "logbook_entry": lbo} # lbo is primary key coUniqueAttribs = {
otherAttribs = {"time_underground": time_underground, "is_logbook_entry_author": (tripperson == author)} "personexpedition": tripperson,
"nickname_used": nickname_used,
"logbook_entry": lbo
} # lbo is primary key
otherAttribs = {
"time_underground": time_underground,
"is_logbook_entry_author": (tripperson == entry.author)}
pt_list.append(PersonLogEntry(**otherAttribs, **coUniqueAttribs)) pt_list.append(PersonLogEntry(**otherAttribs, **coUniqueAttribs))
PersonLogEntry.objects.bulk_create(pt_list) PersonLogEntry.objects.bulk_create(pt_list)
@@ -485,8 +510,8 @@ def parser_html(year, expedition, txt, seq=""):
tripcontent = tidy_trip_image_urls(tripcontent, ldate) tripcontent = tidy_trip_image_urls(tripcontent, ldate)
tid = tidy_tid(tid, triptitle, lgdate) tid = tidy_tid(tid, triptitle, lgdate)
entrytuple = (ldate, place, tripcave, triptitle, tripcontent, trippersons, author, guests, expedition, tu, tid) entry = LogbookEntryData(ldate, place, tripcave, triptitle, tripcontent, trippersons, author, guests, expedition, tu, tid)
logentries.append(entrytuple) logentries.append(entry)
return logentries return logentries
@@ -592,8 +617,8 @@ def parser_blog(year, expedition, txt, sq=""):
tripcontent = tidy_trip_image_urls(tripcontent, year) tripcontent = tidy_trip_image_urls(tripcontent, year)
tid = tidy_tid(tid, triptitle, datestamp) tid = tidy_tid(tid, triptitle, datestamp)
entrytuple = (tripdate, place, tripcave, triptitle, tripcontent, trippersons, author, guests, expedition, tu, tid) entry = LogbookEntryData(tripdate, place, tripcave, triptitle, tripcontent, trippersons, author, guests, expedition, tu, tid)
logentries.append(entrytuple) logentries.append(entry)
return logentries return logentries
def clean_all_logbooks(): def clean_all_logbooks():
@@ -704,13 +729,13 @@ def LoadLogbook(year):
print( print(
f" - Not a year with extant blog entries to import: '{year}' not in BLOG_PARSER_SETTINGS {BLOG_PARSER_SETTINGS}" f" - Not a year with extant blog entries to import: '{year}' not in BLOG_PARSER_SETTINGS {BLOG_PARSER_SETTINGS}"
) )
for entrytuple in logentries: for entry in logentries:
date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid = entrytuple #date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid = entrytuple
if expo == expedition: # unneeded check, we zeroed it before filling it if expo == entry.expedition: # unneeded check, we zeroed it before filling it
# print(f" -- {triptitle}") # print(f" -- {triptitle}")
store_entry_into_database(date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid) store_entry_into_database(entry)
else: else:
print(f" ! unexpected log entry labelled as '{expedition}' {tid}" ) print(f" ! unexpected log entry labelled as '{entry.expedition}' {entry.tid}" )
expo.save() # to save logbook name property expo.save() # to save logbook name property
def LoadLogbooks(): def LoadLogbooks():
@@ -792,9 +817,9 @@ def LoadLogbooks():
# - Expedition (the 'logbook.html' value) # - Expedition (the 'logbook.html' value)
# - LogBookEntry (text, who when etc.) # - LogBookEntry (text, who when etc.)
# - PersonLogEntry (who was on that specific trip mentione din the logbook entry) # - PersonLogEntry (who was on that specific trip mentione din the logbook entry)
for entrytuple in allentries: for entry in allentries:
date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid = entrytuple # date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid = entrytuple
store_entry_into_database(date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid) store_entry_into_database(entry)
for expo in expos: for expo in expos:
expo.save() # to save logbook name property expo.save() # to save logbook name property