refactoring people import from folk.csv

2026-02-08 13:10:05 +00:00 · 2025-07-21 15:41:05 +02:00
parent 1cfcbccf76
commit 2039501672
8 changed files with 141 additions and 34 deletions
--- a/core/models/troggle.py
+++ b/core/models/troggle.py
@@ -67,7 +67,7 @@ class Expedition(TroggleModel):
    logbookfile = models.CharField(max_length=100, blank=True, null=True)

    def __str__(self):
-        return self.year
+        return str(self.year)

    class Meta:
        ordering = ("-year",)
@@ -85,11 +85,13 @@ class Person(TroggleModel):
    definied in django.contrib.auth.models
    """

-    first_name = models.CharField(max_length=100)
-    last_name = models.CharField(max_length=100)
-    fullname = models.CharField(max_length=200) # display name, but should not be used for lookups
-    nickname = models.CharField(max_length=200, blank=True)
-    slug = models.SlugField(max_length=50, blank=True, null=True) # unique, enforced in code not in db
+    input_name = models.CharField(max_length=100, blank=True, null=True)
+    input_surname = models.CharField(max_length=100, blank=True, null=True)
+    first_name = models.CharField(max_length=100, blank=True, null=True)
+    last_name = models.CharField(max_length=100, blank=True, null=True)
+    fullname = models.CharField(max_length=200, blank=True, null=True) # display name, but should not be used for lookups
+    nickname = models.CharField(max_length=200, blank=True, null=True)
+    slug = models.SlugField(max_length=50) # unique, enforced in code not in db
    # no delete cascade. We have users without Persons, and Persons without users
    user = models.OneToOneField(User, models.SET_NULL, blank=True, null=True) 

@@ -99,6 +101,7 @@ class Person(TroggleModel):
    )
    is_guest = models.BooleanField(default=False) # This is per-Person, not per-PersonExpedition
    mug_shot = models.CharField(max_length=100, blank=True, null=True)
+    blurbfile = models.CharField(max_length=100, blank=True, null=True)
    blurb = models.TextField(blank=True, null=True)
    orderref = models.CharField(max_length=200, blank=True)  # for alphabetic

@@ -164,6 +167,10 @@ class PersonExpedition(TroggleModel):
    expedition = models.ForeignKey(Expedition, on_delete=models.CASCADE, db_index=True)
    person = models.ForeignKey(Person, on_delete=models.CASCADE, db_index=True)
    slugfield = models.SlugField(max_length=50, blank=True, null=True)  # 2022 to be used in future
+    noncaver = models.BooleanField(
+        help_text="if flagged as -1 in the folk.csv input file",
+        default=True,
+    )

    # is_guest = models.BooleanField(default=False) # This is per-Person, not per-PersonExpedition

--- a/core/views/cave_kataster.py
+++ b/core/views/cave_kataster.py
@@ -316,7 +316,7 @@ def kataster(request, slug=None):
            if dir.is_dir():
                error += f"STOP: a target directory exists. REMOVE or RENAME it first: {dir}<br>\n"

-        # 1623 : fixedpts/gps/gps23.svx:26:*fix p2023-mg-03 reference 13.81514 47.69169 1767           
+        # 1623 : fixedpts/gps/gps23.svx:26:*fix p2023-mg-03  13.81514 47.69169 1767           
        # 1626: fixedpts/1626-no-schoenberg-hs-not-tied-to-caves.svx
        return l_script, error

--- a/core/views/other.py
+++ b/core/views/other.py
@@ -1,4 +1,5 @@
 import re
+from datetime import datetime
 from pathlib import Path

 from django.conf import settings
@@ -10,7 +11,8 @@ from troggle.core.models.caves import Cave
 from troggle.core.models.logbooks import LogbookEntry, writelogbook  # , PersonLogEntry

 # from databaseReset import reinit_db # don't do this. databaseRest runs code *at import time*
-from troggle.core.models.troggle import Expedition
+from troggle.core.models.troggle import Expedition, Person, PersonExpedition
+
 from troggle.core.utils import current_expo, COOKIE_SHORT_TIMEOUT, PUBLIC_LAPTOP_COOKIE_NAME, PUBLIC_LAPTOP_COOKIE_TEXT
 from troggle.parsers.imports import (
    import_caves,
@@ -145,6 +147,67 @@ def controlpanel(request):
            {"expeditions": Expedition.objects.all(), "year": current_expo()},
        )

+def folk_export(request):
+    """Recreates the folk.csv file from the database contents
+    WORK IN PROGRESS JULY 2025
+    """
+    def deslugify(slug):
+        deslug = slug.replace("-"," ",1).title()
+        return deslug
+        
+    def real_surname(person):
+        return deslugify(person.slug).replace(person.first_name,"").strip()
+        
+    expo = {}
+    yearlist = range(1976, int(current_expo())+1)
+    newfile = settings.EXPOWEB / 'folk' / 'new_folk.csv'
+    
+    with open(newfile, 'w') as f:
+        f.write("Name,Lastname,Guest,VfHO member,Mugshot")
+        for y in yearlist:
+            try:
+                expo[y] = Expedition.objects.get(year=y)
+            except:
+                expo[y] = None
+            f.write(f",{y}")
+        f.write("\n")
+        for person in Person.objects.all().order_by("last_name"):
+            f.write(f"{person.input_name}")
+            if person.nickname:
+                f.write(f" ({person.nickname})")
+            f.write(f",{person.input_surname}") 
+            if person.is_guest:
+                f.write(",1")
+            else:
+                f.write(",")
+            if person.is_vfho:
+                f.write(",1")
+            else:
+                f.write(",")                
+            if person.mug_shot:
+                f.write(f",{person.mug_shot.replace("/folk/","")}")
+            elif person.blurbfile:
+                f.write(f",{person.blurbfile.replace("/folk/","")}")
+            else:
+                f.write(",")           
+                
+            for y in yearlist:
+                present = PersonExpedition.objects.filter(expedition=expo[y], person=person)
+                if present.count() == 1:
+                    if present[0].noncaver:
+                        f.write(f",-1")
+                    else:
+                        f.write(f",1")
+                else:
+                    f.write(",")
+            f.write("\n")
+    return render(
+        request, 
+        "controlPanel.html", 
+        {"expeditions": Expedition.objects.all(), 
+        "jobs_completed": f"GENERATED  {str(newfile)} {datetime.now().strftime("%Y-%m-%d %H:%M")
+}"}
+    )        
    
 def exportlogbook(request, year=None):
    """Constructs, from the database, a complete HTML formatted logbook
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -61,7 +61,7 @@ ENTRIES = {
    "2025": 27,
    "2024": 127,
    "2023": 131,
-    "2022": 94,
+    "2022": 93,
    "2019": 55,
    "2018": 98,
    "2017": 74,
--- a/parsers/people.py
+++ b/parsers/people.py
@@ -22,38 +22,40 @@ todo = """
 - [copy these from paper notes]
 """

-def parse_blurb(personline, header, person):
+def parse_blurb(personline, header):
    """create mugshot Photo instance
    Would be better if all this was done before the Person object was created in the db, then it would not
    need re-saving (which is slow)"""
+    person = {}
    ms_filename = personline[header["Mugshot"]]
    ms_path = Path(settings.EXPOWEB, "folk", ms_filename)

    if ms_filename:
        if not ms_path.is_file():
-            message = f"! INVALID mug_shot field '{ms_filename}' for {person.fullname}"
+            message = f"! INVALID mug_shot field '{ms_filename}' for {personline[header["Mugshot"]]}"
            print(message)
-            DataIssue.objects.create(parser="people", message=message, url=f"/person/{person.fullname}")
+            DataIssue.objects.create(parser="people", message=message, url=f"/person/{personline[header["fullname"]]}")
            return

    if ms_filename.startswith("i/"):
        # if person just has an image, add it. It has format 'i/adama2018.jpg'
-        person.mug_shot = str(Path("/folk", ms_filename))
-        person.blurb = None
+        person["mug_shot"] = str(Path("/folk", ms_filename))
+        person["blurb"] = None

    elif ms_filename.startswith("l/"):
        # it has the format 'l/ollybetts.htm' the file may contain <img src="../i/mymug.jpg"> images
+        person["blurbfile"] = str(Path("/folk", ms_filename))
        with open(ms_path, "r") as blurbfile:
            blrb = blurbfile.read()
        pblurb = re.search(r"<body>.*<hr", blrb, re.DOTALL)
        if pblurb:
-            person.mug_shot = None
+            person["mug_shot"] = None
            fragment = re.search("<body>(.*)<hr", blrb, re.DOTALL).group(1)
            fragment = fragment.replace('src="../i/', 'src="/folk/i/')
            fragment = fragment.replace("src='../i/", "src='/folk/i/")
            fragment = re.sub(r"<h.*>[^<]*</h.>", "", fragment)
            # replace src="../i/ with src="/folk/i
-            person.blurb = fragment
+            person["blurb"] = fragment
        else:
            message = f"! Blurb parse error in {ms_filename}"
            print(message)
@@ -66,7 +68,8 @@ def parse_blurb(personline, header, person):
        print(message)
        DataIssue.objects.create(parser="people", message=message, url="/folk/")
        
-    person.save()
+    return person
+

 slug_cache = {}
 def troggle_slugify(longname):
@@ -104,6 +107,8 @@ def load_people_expos():
    those created here, if it does not exist.
    """
    DataIssue.objects.filter(parser="people").delete()
+    Person.objects.all().delete()
+    PersonExpedition.objects.all().delete()

    persontab = open(os.path.join(settings.EXPOWEB, "folk", "folk.csv"))  # should really be EXPOFOLK I guess
    personreader = csv.reader(persontab)  # this is an iterator
@@ -120,8 +125,9 @@ def load_people_expos():
            otherAttribs = {"name": f"CUCC expo {year}"}
            e = Expedition.objects.create(**otherAttribs, **coUniqueAttribs)

-    print(" - Loading personexpeditions")
+    print(" - Loading persons and personexpeditions")
    
+    pe_list = []
    for personline in personreader:
        #   This is all horrible: refactor it.
        # CSV: Name,Lastname,Guest,VfHO member,Mugshot,..
@@ -130,13 +136,16 @@ def load_people_expos():
        plainname = re.sub(r"<.*?>", "", name) # now in slugify
        
        match = re.match(r"^([^(]*)(\(([^)]*)\))?", name) # removes nickname in brackets
-        displayname = match.group(1)
+        displayname = match.group(1).strip()
+        input_name = displayname
        slug = troggle_slugify(displayname)

        firstname = ""
        nick = ""

        rawlastname = personline[header["Lastname"]].strip()
+        input_surname = rawlastname
+
        if rawlastname == "":
            print(f"MISSING SURNAME FIELD for {name} - check against similar names in the list to see what you have done.")
        if matchlastname := re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", rawlastname):
@@ -148,6 +157,8 @@ def load_people_expos():
        splitnick = re.match(r"^([\w&;\s\-]+)(?:\(([^)]*)\))?", plainname)
        fullname = splitnick.group(1) # removes Nickname in brackets, but also cuts hyphenated names
        nick = splitnick.group(2) or "" 
+        if nick:
+            nick = nick.strip()

        fullname = fullname.strip()
        
@@ -164,21 +175,32 @@ def load_people_expos():
        else:
            vfho = True

-        # would be better to just create the python object, and only cmmit to db once all done inc blurb
+        # would be better to just create the python object, and only commit to db once all done inc blurb
        # and better to save all the Persons in a bulk update, then do all the PersonExpeditions
-        coUniqueAttribs = {"slug": slug}
-        otherAttribs = {"first_name": firstname, "last_name": (lastname or ""), "is_vfho": vfho, "fullname": fullname, "nickname": nick,"is_guest": (personline[header["Guest"]] == "1")}
-        person = Person.objects.create(**otherAttribs, **coUniqueAttribs)
+        blurb_fields = parse_blurb(personline=personline, header=header) 
        
-        parse_blurb(personline=personline, header=header, person=person) # saves to db too
+        attribs = {"slug": slug, "first_name": firstname, "last_name": (lastname or ""), 
+            "is_vfho": vfho, "fullname": fullname, "nickname": nick, "input_name": input_name, "input_surname": input_surname,
+            "is_guest": (personline[header["Guest"]] == "1")}
+        person = Person.objects.create(**attribs, **blurb_fields)

-        # make person expedition from table
+        #person.save()
+
+        #print(" - Loading personexpeditions")
+
+        # make person expedition 
        for year, attended in list(zip(headers, personline))[5:]:
            expedition = Expedition.objects.get(year=year)
-            if attended == "1" or attended == "-1":
-                coUniqueAttribs = {"person": person, "expedition": expedition}
-                # otherAttribs = {"is_guest": (personline[header["Guest"]] == "1")} 
-                pe = PersonExpedition.objects.create(**coUniqueAttribs)
+            
+            if attended in ("1", "-1"):
+                pe_list.append(PersonExpedition(person=person, expedition=expedition, noncaver=(attended == "-1")))
+                # pe = PersonExpedition.objects.create(
+                    # person=person,
+                    # expedition=expedition,
+                    # noncaver=(attended == "-1")
+                # )
+    PersonExpedition.objects.bulk_create(pe_list)
+
    print("", flush=True)
    ensure_users_are_persons()
    most_recent = Expedition.objects.all().first()
@@ -188,7 +210,7 @@ def check_new_signups(expedition):
    signups_clear = read_signups()
    # print(signups_clear)
    for slug in signups_clear:
-        print(f"check_new_signups: {slug}")
+        print(f"  - Checking signups {slug}")
        p = Person.objects.get(slug=slug)
        pe = PersonExpedition.objects.update_or_create(person=p, expedition=expedition)
        # print("ADDING ",pe, expedition)
@@ -196,7 +218,7 @@ def check_new_signups(expedition):

 def ensure_users_are_persons():
    # Just ensure this is up to date. 
-    print(f"# ensure_users_are_persons() - except for expo and expoadmin of course")
+    print(f"  - Ensure_users_are_persons() - except for expo and expoadmin of course")
    users = User.objects.all()
    for u in users:
        ps = Person.objects.filter(slug=u.username)
@@ -204,7 +226,7 @@ def ensure_users_are_persons():
            p = ps[0]
            p.user = u
            p.save()
-            print(f" - {p.user} {u=}")
+            print(f"     {p.user} {u=}")
            
 def who_is_this(year, possibleid):
    expo = Expedition.objects.filter(year=year)
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -676,6 +676,18 @@ class LoadingSurvex:
        SO we have to recognise the '*fix' too
        """
        # *fix|36|reference|36359.40|82216.08|2000.00\n
+        # *fix|36|36359.40|82216.08|2000.00\n
+        # Regex explanation:
+        # (?i)                  # Case-insensitive matching
+        # ^\s*[*]fix\s+         # Line starts with optional whitespace, then "*fix" followed by one or more spaces
+        # ([\w\d_.\-]+)         # Capture group 1: an identifier (alphanumeric, underscore, dot, or hyphen)
+        # \s+                   # One or more spaces
+        # (?:reference)?        # Optional literal word "reference" (non-capturing)
+        # \s*([\d\.]*)          # Capture group 2: optional number (digits and periods)
+        # \s+([\d\.]*)          # Capture group 3: another number (digits and periods)
+        # \s+([\d\.]*)          # Capture group 4: yet another number (digits and periods)
+        # \s*;?                 # Optional whitespace and optional semicolon
+        # (.*)$                 # Capture group 5: remainder of the line (any characters), a comment       
        rx_fixline =  re.compile(r"(?i)^\s*[*]fix\s+([\w\d_\.\-]+)\s+(?:reference)?\s*([\d\.]*)\s+([\d\.]*)\s+([\d\.]*)\s*;?(.*)$")

        line = line.replace("\n","")
--- a/templates/controlPanel.html
+++ b/templates/controlPanel.html
@@ -24,5 +24,7 @@
 <li><a href="/admin/">Django admin</a> - Deep magic access to all models and data <span style="color:red">{{error}}</span>
 </ul>

+<p>
+{{jobs_completed}}

 {% endblock %}    
--- a/urls.py
+++ b/urls.py
@@ -55,7 +55,7 @@ from troggle.core.views.logbooks import (
    person,
    personexpedition,
 )
-from troggle.core.views.other import controlpanel, exportlogbook, frontpage, todos, public_laptop
+from troggle.core.views.other import controlpanel, exportlogbook, frontpage, todos, public_laptop, folk_export
 from troggle.core.views.prospect import prospecting
 from troggle.core.views.user_registration import register, newregister, reset_done, ExpoPasswordResetForm
 from troggle.core.views.scans import allscans, cavewallets, scansingle, walletslistperson, walletslistyear
@@ -155,6 +155,7 @@ trogglepatterns = [
    re_path(r'^indxal.htm$', caveindex,      name="caveindex"), # ~420 hrefs to this url in expoweb files
    re_path(r'^people/?$',   notablepersons, name="notablepersons"),
    path('people_ids',   people_ids, name="people_ids"),
+    path('folk_export',   folk_export, name="folk_export"),
    path('caveslist',        caveslist,      name="caveslist"),

    path('entrances',      entranceindex,      name="entranceindex"),