From 64419ffb7ced8f5b2f5e2ee67088edfccd23ef94 Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Sun, 21 Sep 2025 19:33:20 +0300 Subject: [PATCH] refactored to use a dataclass not a tuple --- parsers/logbooks.py | 75 ++++++++++++++++++++++++++++++--------------- 1 file changed, 50 insertions(+), 25 deletions(-) diff --git a/parsers/logbooks.py b/parsers/logbooks.py index 434364ee1..b609ebfbe 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -3,6 +3,7 @@ import re import string import sys import time +from dataclasses import dataclass from datetime import date, datetime from pathlib import Path from random import randint @@ -11,6 +12,7 @@ from django.conf import settings from django.template.defaultfilters import slugify from parsers.people import GetPersonExpeditionNameLookup, known_foreigner, load_people_expos +from typing import Any, List, Tuple from troggle.core.models.caves import GetCaveLookup from troggle.core.models.logbooks import LogbookEntry, PersonLogEntry from troggle.core.models.troggle import DataIssue, Expedition @@ -40,6 +42,23 @@ e.g. cave descriptions txt = file_in.read().decode("latin1") """ + +@dataclass +class LogbookEntryData: + # All dataclass fields have a type annotation, by definition. + # Fields with no type annotation are not dataclass fields; they're class attributes. + tripdate: date + place: str + tripcave: Any + triptitle: str + text: str + trippersons: List[Tuple[Any, str, float]] # adjust types as needed + author: Any + guests: List[str] + expedition: Any + tu: float # time underground, not actually used anywhere + tid: str + MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200 BLOG_PARSER_SETTINGS = { # no default, must be explicit # "2023": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html @@ -283,38 +302,44 @@ def tidy_tid(tid, title, date): tid = str(randint(1000, 9999)) + "_" + slugify(title)[:10].replace("-", "_") return tid -def store_entry_into_database(date, place, tripcave, title, text, trippersons, author, guests, expedition, logtime_underground, tid): +def store_entry_into_database(entry): """saves a single logbook entry and related personlogentry items We could do a bulk update to save all the entries, but then we would need to do a query on each one to get the primary key to assign to the PersonLogEntries. So overall probably not much faster ? """ - other_people = ", ".join(guests) # join list members separated by comma + other_people = ", ".join(entry.guests) # join list members separated by comma # if guests: # print(f" {date} - {guests}") otherAttribs = { - "place": place, + "place": entry.place, "other_people": other_people, # *Ol's Mum, foreigners.. - "text": text, - "expedition": expedition, - "time_underground": logtime_underground, - "cave": tripcave, + "text": entry.text, + "expedition": entry.expedition, + "time_underground": entry.tu, + "cave": entry.tripcave, } - coUniqueAttribs = {"slug": tid, "date": date, "title": title} - if LogbookEntry.objects.filter(slug=tid).exists(): + coUniqueAttribs = {"slug": entry.tid, "date": entry.tripdate, "title": entry.triptitle} + if LogbookEntry.objects.filter(slug=entry.tid).exists(): # oops. Our code should already have ensured this is unique. - message = " ! - DUPLICATE SLUG for logbook entry " + tripdate + " - " + slug + message = " ! - DUPLICATE SLUG for logbook entry " + entry.tripdate + " - " + entry.tid DataIssue.objects.create(parser="logbooks", message=message) slug = slug + "_" + unique_slug(text,2) lbo = LogbookEntry.objects.create(**otherAttribs, **coUniqueAttribs) pt_list = [] - for tripperson, nickname_used, time_underground in trippersons: - coUniqueAttribs = {"personexpedition": tripperson, "nickname_used": nickname_used, "logbook_entry": lbo} # lbo is primary key - otherAttribs = {"time_underground": time_underground, "is_logbook_entry_author": (tripperson == author)} + for tripperson, nickname_used, time_underground in entry.trippersons: + coUniqueAttribs = { + "personexpedition": tripperson, + "nickname_used": nickname_used, + "logbook_entry": lbo + } # lbo is primary key + otherAttribs = { + "time_underground": time_underground, + "is_logbook_entry_author": (tripperson == entry.author)} pt_list.append(PersonLogEntry(**otherAttribs, **coUniqueAttribs)) PersonLogEntry.objects.bulk_create(pt_list) @@ -485,8 +510,8 @@ def parser_html(year, expedition, txt, seq=""): tripcontent = tidy_trip_image_urls(tripcontent, ldate) tid = tidy_tid(tid, triptitle, lgdate) - entrytuple = (ldate, place, tripcave, triptitle, tripcontent, trippersons, author, guests, expedition, tu, tid) - logentries.append(entrytuple) + entry = LogbookEntryData(ldate, place, tripcave, triptitle, tripcontent, trippersons, author, guests, expedition, tu, tid) + logentries.append(entry) return logentries @@ -592,8 +617,8 @@ def parser_blog(year, expedition, txt, sq=""): tripcontent = tidy_trip_image_urls(tripcontent, year) tid = tidy_tid(tid, triptitle, datestamp) - entrytuple = (tripdate, place, tripcave, triptitle, tripcontent, trippersons, author, guests, expedition, tu, tid) - logentries.append(entrytuple) + entry = LogbookEntryData(tripdate, place, tripcave, triptitle, tripcontent, trippersons, author, guests, expedition, tu, tid) + logentries.append(entry) return logentries def clean_all_logbooks(): @@ -704,13 +729,13 @@ def LoadLogbook(year): print( f" - Not a year with extant blog entries to import: '{year}' not in BLOG_PARSER_SETTINGS {BLOG_PARSER_SETTINGS}" ) - for entrytuple in logentries: - date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid = entrytuple - if expo == expedition: # unneeded check, we zeroed it before filling it + for entry in logentries: + #date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid = entrytuple + if expo == entry.expedition: # unneeded check, we zeroed it before filling it # print(f" -- {triptitle}") - store_entry_into_database(date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid) + store_entry_into_database(entry) else: - print(f" ! unexpected log entry labelled as '{expedition}' {tid}" ) + print(f" ! unexpected log entry labelled as '{entry.expedition}' {entry.tid}" ) expo.save() # to save logbook name property def LoadLogbooks(): @@ -792,9 +817,9 @@ def LoadLogbooks(): # - Expedition (the 'logbook.html' value) # - LogBookEntry (text, who when etc.) # - PersonLogEntry (who was on that specific trip mentione din the logbook entry) - for entrytuple in allentries: - date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid = entrytuple - store_entry_into_database(date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid) + for entry in allentries: + # date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid = entrytuple + store_entry_into_database(entry) for expo in expos: expo.save() # to save logbook name property