From b5540fd54391e538b01f136842f6258c0ad382f7 Mon Sep 17 00:00:00 2001
From: Philip Sargent <philip.sargent@gmail.com>
Date: Mon, 21 Jul 2025 18:29:06 +0200
Subject: [PATCH] used cache instead of .get query, vastly faster

---
 parsers/people.py           | 83 ++++++++++++++++++++++++++-----------
 parsers/survex.py           | 23 +++++-----
 templates/controlPanel.html |  5 ++-
 3 files changed, 74 insertions(+), 37 deletions(-)

diff --git a/parsers/people.py b/parsers/people.py
index 31e50bf..e838237 100644
--- a/parsers/people.py
+++ b/parsers/people.py
@@ -1,6 +1,7 @@
 import csv
 import os
 import re
+import time
 from html import unescape
 from pathlib import Path
 
@@ -105,17 +106,33 @@ def load_people_expos():
     Given that we need to do stuff for the coming expo, well before we update the folk list,
     the Expedition object for the coming expo is created elsewhere - in addition to 
     those created here, if it does not exist.
+    
+    Refactored to separate out the creation of objects in the database to use bulk_create to
+    speed things up. Made little difference sadly.
     """
+    # import cProfile
+    # import pstats
+    # from pstats import SortKey
+
+    # pr = cProfile.Profile()
+    # pr.enable()
+    start = time.time()
     DataIssue.objects.filter(parser="people").delete()
     Person.objects.all().delete()
     PersonExpedition.objects.all().delete()
+    
+    splitnick_pattern = re.compile(r"^([\w&;\s\-]+)(?:\(([^)]*)\))?")
+    displayname_pattern = re.compile(r"^([^(]*)(\(([^)]*)\))?") # removes nickname in brackets
+    rawlastname_pattern = re.compile(r"^([\w&;\s]+)(?:\(([^)]*)\))?")
 
-    persontab = open(os.path.join(settings.EXPOWEB, "folk", "folk.csv"))  # should really be EXPOFOLK I guess
-    personreader = csv.reader(persontab)  # this is an iterator
+    with open (settings.EXPOWEB / "folk" / "folk.csv", "r") as folkfile:
+        folklines = folkfile.readlines() # list of the lines, read all into memory
+    personreader = csv.reader(folklines) # this is an iterator
     headers = next(personreader)
     header = dict(list(zip(headers, list(range(len(headers))))))
 
     years = headers[5:]
+    expos = {}
     nexpos = Expedition.objects.count()
     if nexpos <= 0:
         print(" - Creating expeditions")
@@ -124,10 +141,21 @@ def load_people_expos():
             coUniqueAttribs = {"year": year}
             otherAttribs = {"name": f"CUCC expo {year}"}
             e = Expedition.objects.create(**otherAttribs, **coUniqueAttribs)
-
+            expos[year] = e
+    else: # re-running a folk import without a complete reset
+        print(" - Cacheing expeditions")
+        for year in years:
+            year = year.strip()
+            e = Expedition.objects.get(year=year)
+            expos[year] = e
+    print("", flush=True)
     print(" - Loading persons and personexpeditions")
-    
+    print("  - Reading folk file")
+
     pe_list = []
+    prep_list = []
+    
+    p_list =[]
     for personline in personreader:
         #   This is all horrible: refactor it.
         # CSV: Name,Lastname,Guest,VfHO member,Mugshot,..
@@ -135,8 +163,7 @@ def load_people_expos():
         name = personline[header["Name"]]
         plainname = re.sub(r"<.*?>", "", name) # now in slugify
         
-        match = re.match(r"^([^(]*)(\(([^)]*)\))?", name) # removes nickname in brackets
-        displayname = match.group(1).strip()
+        displayname = displayname_pattern.match(name).group(1).strip()
         input_name = displayname
         slug = troggle_slugify(displayname)
 
@@ -148,13 +175,14 @@ def load_people_expos():
 
         if rawlastname == "":
             print(f"MISSING SURNAME FIELD for {name} - check against similar names in the list to see what you have done.")
-        if matchlastname := re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", rawlastname):
+        if matchlastname := rawlastname_pattern.match(rawlastname):
+        #re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", 
             lastname = matchlastname.group(1).strip()
         else:
             print(f"MATCH FAIL {personline=}\n     {slug=}\n     {name=}\n     {rawlastname=}")
             exit(1)
-
-        splitnick = re.match(r"^([\w&;\s\-]+)(?:\(([^)]*)\))?", plainname)
+        
+        splitnick = splitnick_pattern.match(plainname)
         fullname = splitnick.group(1) # removes Nickname in brackets, but also cuts hyphenated names
         nick = splitnick.group(2) or "" 
         if nick:
@@ -175,36 +203,41 @@ def load_people_expos():
         else:
             vfho = True
 
-        # would be better to just create the python object, and only commit to db once all done inc blurb
-        # and better to save all the Persons in a bulk update, then do all the PersonExpeditions
         blurb_fields = parse_blurb(personline=personline, header=header) 
         
         attribs = {"slug": slug, "first_name": firstname, "last_name": (lastname or ""), 
             "is_vfho": vfho, "fullname": fullname, "nickname": nick, "input_name": input_name, "input_surname": input_surname,
             "is_guest": (personline[header["Guest"]] == "1")}
-        person = Person.objects.create(**attribs, **blurb_fields)
-
-        #person.save()
-
-        #print(" - Loading personexpeditions")
+        p_list.append(Person(**attribs, **blurb_fields))
 
         # make person expedition 
         for year, attended in list(zip(headers, personline))[5:]:
-            expedition = Expedition.objects.get(year=year)
+            expedition = expos[str(year)]
             
             if attended in ("1", "-1"):
-                pe_list.append(PersonExpedition(person=person, expedition=expedition, noncaver=(attended == "-1")))
-                # pe = PersonExpedition.objects.create(
-                    # person=person,
-                    # expedition=expedition,
-                    # noncaver=(attended == "-1")
-                # )
+                prep_list.append((slug, expedition, (attended == "-1")))
+                # pe_list.append(PersonExpedition(person=person, expedition=expedition, noncaver=(attended == "-1")))
+    duration = time.time() - start
+    print(f"   - duration: {duration:5.1f} s")
+    print("  - Loading persons into db")
+    persons = Person.objects.bulk_create(p_list)
+    
+    print("  - Loading personexpeditions into db", flush=True)
+    for pe in prep_list:
+        slug, expedition, noncaver = pe
+        p = Person.objects.get(slug=slug)
+        pe_list.append(PersonExpedition(person=p, expedition=expedition, noncaver=noncaver))
     PersonExpedition.objects.bulk_create(pe_list)
 
-    print("", flush=True)
     ensure_users_are_persons()
     most_recent = Expedition.objects.all().first()
     check_new_signups(most_recent)
+    # pr.disable()
+    # with open("folk_reader.prof", "w") as f:
+        # ps = pstats.Stats(pr, stream=f)
+        # ps.sort_stats(SortKey.CUMULATIVE)
+        # ps.print_stats()
+
     
 def check_new_signups(expedition):
     signups_clear = read_signups()
@@ -226,7 +259,7 @@ def ensure_users_are_persons():
             p = ps[0]
             p.user = u
             p.save()
-            print(f"     {p.user} {u=}")
+            # print(f"     {p.user} {u=}")
             
 def who_is_this(year, possibleid):
     expo = Expedition.objects.filter(year=year)
diff --git a/parsers/survex.py b/parsers/survex.py
index 02e39bc..36861d6 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -2338,22 +2338,22 @@ def FindAndLoadSurvex():
     fcollate.write(f";*include {survexfileroot.path}\n")
     flinear.write(f"{svx_scan.depthinclude:2} {indent} *include {survexfileroot.path}\n")
 
-    import cProfile
-    import pstats
-    from pstats import SortKey
+    # import cProfile
+    # import pstats
+    # from pstats import SortKey
 
-    pr = cProfile.Profile()
-    pr.enable()
+    # pr = cProfile.Profile()
+    # pr.enable()
     svx_scan.svxpass = svx_scan.TREE
     # ----------------------------------------------------------------
     svx_scan.PushdownStackScan(survexblockroot, survexfileroot.path, finrootname, flinear, fcollate)
     # ----------------------------------------------------------------
     svx_scan.svxpass = ""
-    pr.disable()
-    with open("PushdownStackScan.prof", "w") as f:
-        ps = pstats.Stats(pr, stream=f)
-        ps.sort_stats(SortKey.CUMULATIVE)
-        ps.print_stats()
+    # pr.disable()
+    # with open("PushdownStackScan.prof", "w") as f:
+        # ps = pstats.Stats(pr, stream=f)
+        # ps.sort_stats(SortKey.CUMULATIVE)
+        # ps.print_stats()
 
     flinear.write(f"{svx_scan.depthinclude:2} {indent} *edulcni {survexfileroot.path}\n")
     fcollate.write(f";*edulcni {survexfileroot.path}\n")
@@ -2709,7 +2709,8 @@ def parse_one_file(fpath): # --------------------------------------in progress--
             print(f"  - Aborting file parsing & import into database.")
             return False
         print(f"  - Pre-existing survexfile {svxs}.")
-        existingsvx = SurvexFile.objects.get(path=fpath)
+        existingsvx = svxs[0]
+        #existingsvx = SurvexFile.objects.get(path=fpath)
         existingcave = existingsvx.cave
         print(f"  - survexfile id={existingsvx.id}  {existingsvx}  {existingcave}")
 
diff --git a/templates/controlPanel.html b/templates/controlPanel.html
index 99e44fc..2bf48d2 100644
--- a/templates/controlPanel.html
+++ b/templates/controlPanel.html
@@ -15,7 +15,10 @@
 <li><a href="/survexfilewild/{{year}}">Wild survex files</a> - survex files containing blocks with no related wallet
 <li><a href="/survexdir">Survex Directories</a> - Every Cave has an associated directory and a Primary survex file
 <li><a href="/surveximport">Survex import record</a> - indented *include and begin/end tree<br /><li><a href="/survexdebug">Survex debug report</a> - warnings and  details<br />
-<li><a href="/therionissues">Therion Import issues</a> - warnings from the recent data import<br /><br />
+<li><a href="/therionissues">Therion Import issues</a> - warnings from the recent data import<br />
+<li><a href="/people_ids">List of folk</a> - ordered by first name, registered people in red<br />
+
+<li><a href="/folk_export">Export new_folk.csv</a> - export from data in the database<br /><br />
 
 <li><a href="/kataster/1623-2002-08">Kataster renumber</a> - Rename a cave to a new kataster number <span style="color:red">{{error}}</span>