refactored to use a dataclass not a tuple

2025-12-17 18:27:08 +00:00 · 2025-09-21 19:33:20 +03:00
parent 5b129fee8f
commit 64419ffb7c
1 changed files with 50 additions and 25 deletions
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -3,6 +3,7 @@ import re
 import string
 import sys
 import time
+from dataclasses import dataclass
 from datetime import date, datetime
 from pathlib import Path
 from random import randint
@@ -11,6 +12,7 @@ from django.conf import settings
 from django.template.defaultfilters import slugify

 from parsers.people import GetPersonExpeditionNameLookup, known_foreigner, load_people_expos
+from typing import Any, List, Tuple
 from troggle.core.models.caves import GetCaveLookup
 from troggle.core.models.logbooks import LogbookEntry, PersonLogEntry
 from troggle.core.models.troggle import DataIssue, Expedition
@@ -40,6 +42,23 @@ e.g. cave descriptions
            txt = file_in.read().decode("latin1")
            
 """
+
+@dataclass
+class LogbookEntryData:
+    # All dataclass fields have a type annotation, by definition. 
+    # Fields with no type annotation are not dataclass fields; they're class attributes. 
+    tripdate: date
+    place: str
+    tripcave: Any  
+    triptitle: str
+    text: str
+    trippersons: List[Tuple[Any, str, float]]  # adjust types as needed
+    author: Any    
+    guests: List[str]
+    expedition: Any  
+    tu: float # time underground, not actually used anywhere
+    tid: str
+
 MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200
 BLOG_PARSER_SETTINGS = { # no default, must be explicit
    #  "2023": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html
@@ -283,38 +302,44 @@ def tidy_tid(tid, title, date):
    tid = str(randint(1000, 9999)) + "_" + slugify(title)[:10].replace("-", "_")    
    return tid
    
-def store_entry_into_database(date, place, tripcave, title, text, trippersons, author, guests, expedition, logtime_underground, tid):
+def store_entry_into_database(entry):
    """saves a single logbook entry and related personlogentry items
    
    We could do a bulk update to save all the entries, but then we would need to do a query on
    each one to get the primary key to assign to the PersonLogEntries. So overall probably not much
    faster ? 
    """
-    other_people = ", ".join(guests) # join list members separated by comma
+    other_people = ", ".join(entry.guests) # join list members separated by comma
    # if guests:
        # print(f" {date} - {guests}")
    
    otherAttribs = {
-        "place": place,
+        "place": entry.place,
        "other_people": other_people, # *Ol's Mum, foreigners..
-        "text": text,
-        "expedition": expedition,
-        "time_underground": logtime_underground,
-        "cave": tripcave,
+        "text": entry.text,
+        "expedition": entry.expedition,
+        "time_underground": entry.tu,
+        "cave": entry.tripcave,
    }
-    coUniqueAttribs = {"slug": tid, "date": date, "title": title}
-    if LogbookEntry.objects.filter(slug=tid).exists():
+    coUniqueAttribs = {"slug": entry.tid, "date": entry.tripdate, "title": entry.triptitle}
+    if LogbookEntry.objects.filter(slug=entry.tid).exists():
        # oops. Our code should already have ensured this is unique.
-        message = " ! - DUPLICATE SLUG for logbook entry " + tripdate + " - " + slug
+        message = " ! - DUPLICATE SLUG for logbook entry " + entry.tripdate + " - " + entry.tid
        DataIssue.objects.create(parser="logbooks", message=message)
        slug = slug + "_" + unique_slug(text,2)

    lbo = LogbookEntry.objects.create(**otherAttribs, **coUniqueAttribs)
    
    pt_list = []
-    for tripperson, nickname_used, time_underground in trippersons:
-        coUniqueAttribs = {"personexpedition": tripperson, "nickname_used": nickname_used, "logbook_entry": lbo} # lbo is primary key
-        otherAttribs = {"time_underground": time_underground, "is_logbook_entry_author": (tripperson == author)}
+    for tripperson, nickname_used, time_underground in entry.trippersons:
+        coUniqueAttribs = {
+            "personexpedition": tripperson, 
+            "nickname_used": nickname_used, 
+            "logbook_entry": lbo
+            } # lbo is primary key
+        otherAttribs = {
+            "time_underground": time_underground, 
+            "is_logbook_entry_author": (tripperson == entry.author)}
        pt_list.append(PersonLogEntry(**otherAttribs, **coUniqueAttribs))
    PersonLogEntry.objects.bulk_create(pt_list)
        
@@ -485,8 +510,8 @@ def parser_html(year, expedition, txt, seq=""):
        tripcontent = tidy_trip_image_urls(tripcontent, ldate)
        tid = tidy_tid(tid, triptitle, lgdate)
   
-        entrytuple = (ldate, place, tripcave, triptitle, tripcontent, trippersons, author, guests, expedition, tu, tid)
-        logentries.append(entrytuple)
+        entry = LogbookEntryData(ldate, place, tripcave, triptitle, tripcontent, trippersons, author, guests, expedition, tu, tid)
+        logentries.append(entry)
    return logentries


@@ -592,8 +617,8 @@ def parser_blog(year, expedition, txt, sq=""):
        tripcontent = tidy_trip_image_urls(tripcontent, year)
        tid = tidy_tid(tid, triptitle, datestamp)

-        entrytuple = (tripdate, place, tripcave, triptitle, tripcontent, trippersons, author, guests, expedition, tu, tid)
-        logentries.append(entrytuple)
+        entry = LogbookEntryData(tripdate, place, tripcave, triptitle, tripcontent, trippersons, author, guests, expedition, tu, tid)
+        logentries.append(entry)
    return logentries

 def clean_all_logbooks():
@@ -704,13 +729,13 @@ def LoadLogbook(year):
            print(
                f" - Not a year with extant blog entries to import: '{year}' not in BLOG_PARSER_SETTINGS {BLOG_PARSER_SETTINGS}"
            )
-        for entrytuple in logentries:
-            date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid = entrytuple
-            if expo == expedition: # unneeded check, we zeroed it before filling it
+        for entry in logentries:
+            #date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid = entrytuple
+            if expo == entry.expedition: # unneeded check, we zeroed it before filling it
                # print(f" -- {triptitle}")
-                store_entry_into_database(date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid)
+                store_entry_into_database(entry)
            else:
-                print(f" ! unexpected log entry labelled as '{expedition}' {tid}" ) 
+                print(f" ! unexpected log entry labelled as '{entry.expedition}' {entry.tid}" ) 
        expo.save() # to save logbook name property
    
 def LoadLogbooks():
@@ -792,9 +817,9 @@ def LoadLogbooks():
    # - Expedition (the 'logbook.html' value)
    # - LogBookEntry (text, who when etc.)
    # - PersonLogEntry (who was on that specific trip mentione din the logbook entry)
-    for entrytuple in allentries:
-        date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid = entrytuple
-        store_entry_into_database(date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid)
+    for entry in allentries:
+        # date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid = entrytuple
+        store_entry_into_database(entry)
 
    for expo in expos: 
        expo.save() # to save logbook name property