part-way though converting to slugs for people

2026-02-08 12:27:35 +00:00 · 2023-10-01 15:55:28 +03:00
parent 16d3ee9f92
commit 7b8703dadc
9 changed files with 84 additions and 43 deletions
--- a/parsers/people.py
+++ b/parsers/people.py
@@ -3,9 +3,9 @@ import os
 import re
 from html import unescape
 from pathlib import Path
+from unidecode import unidecode

 from django.conf import settings
-from unidecode import unidecode

 from troggle.core.models.troggle import DataIssue, Expedition, Person, PersonExpedition

@@ -17,7 +17,9 @@ or they should use the same code by importing a module.


 def parse_blurb(personline, header, person):
-    """create mugshot Photo instance"""
+    """create mugshot Photo instance
+    Would be better if all this was done before the Person object was created in the db, then it would not
+    need re-saving (which is slow)"""
    ms_filename = personline[header["Mugshot"]]
    ms_path = Path(settings.EXPOWEB, "folk", ms_filename)

@@ -60,7 +62,19 @@ def parse_blurb(personline, header, person):

    person.save()

-
+slug_cache = {}
+def troggle_slugify(longname):
+    """Uniqueness enforcement too. Yes we have had two "Dave Johnson"s
+    """
+    slug = longname.strip().lower().replace(" ","-")
+    if len(slug) > 40: # slugfield is 50 chars
+        slug = slug[:40]
+    if slug in slug_cache: 
+        slug_cache[slug] += 1
+        slug = f"{slug}_{slug_cache[slug]}"
+    slug_cache[slug] = 1
+    return slug
+    
 def load_people_expos():
    """This is where the folk.csv file is parsed to read people's names.
    Which it gets wrong for people like Lydia-Clare Leather and various 'von' and 'de' middle 'names'
@@ -86,8 +100,11 @@ def load_people_expos():

    for personline in personreader:
        name = personline[header["Name"]]
-        name = re.sub(r"<.*?>", "", name)
-        slug = slugify(name)
+        name = re.sub(r"<.*?>", "", name) 
+        
+        match = re.match(r"^([^(]*)(\(([^)]*)\))?", name) # removes nickname in brackets
+        displayname = match.group(1)
+        slug = troggle_slugify(displayname)

        firstname = ""
        nick = ""
@@ -97,34 +114,39 @@ def load_people_expos():
        lastname = matchlastname.group(1).strip()

        splitnick = re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", name)
-        fullname = splitnick.group(1)
-
-        nick = splitnick.group(2) or ""
+        fullname = splitnick.group(1) # removes Nickname in brackets, but also cuts hyphenated names
+        nick = splitnick.group(2) or "" 

        fullname = fullname.strip()
-        names = fullname.split(" ")
+        
+        names = fullname.split(" ") # This may have more than one, e.g. "Adeleide de Diesback"
        firstname = names[0]
        if len(names) == 1:
-            lastname = ""
+            lastname = "" # wookey special code
+            
+        #restore fullname to be the whole string
+        fullname = displayname

        if personline[header["VfHO member"]] == "":
            vfho = False
        else:
            vfho = True

-        coUniqueAttribs = {"first_name": firstname, "last_name": (lastname or "")}
-        otherAttribs = {"is_vfho": vfho, "fullname": fullname, "nickname": nick}
+        # would be better to just create the python object, and only cmmit to db once all done inc blurb
+        # and better to save all the Persons in a bulk update, then do all the PersonExpeditions
+        coUniqueAttribs = {"slug": slug}
+        otherAttribs = {"first_name": firstname, "last_name": (lastname or ""), "is_vfho": vfho, "fullname": fullname, "nickname": nick,"is_guest": (personline[header["Guest"]] == "1")}
        person = Person.objects.create(**otherAttribs, **coUniqueAttribs)

-        parse_blurb(personline=personline, header=header, person=person)
+        parse_blurb(personline=personline, header=header, person=person) # saves to db too

        # make person expedition from table
        for year, attended in list(zip(headers, personline))[5:]:
            expedition = Expedition.objects.get(year=year)
            if attended == "1" or attended == "-1":
                coUniqueAttribs = {"person": person, "expedition": expedition}
-                otherAttribs = {"is_guest": (personline[header["Guest"]] == "1")}
-                pe = PersonExpedition.objects.create(**otherAttribs, **coUniqueAttribs)
+                # otherAttribs = {"is_guest": (personline[header["Guest"]] == "1")} 
+                pe = PersonExpedition.objects.create(**coUniqueAttribs)
    print("", flush=True)