From b5540fd54391e538b01f136842f6258c0ad382f7 Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Mon, 21 Jul 2025 18:29:06 +0200 Subject: [PATCH] used cache instead of .get query, vastly faster --- parsers/people.py | 83 ++++++++++++++++++++++++++----------- parsers/survex.py | 23 +++++----- templates/controlPanel.html | 5 ++- 3 files changed, 74 insertions(+), 37 deletions(-) diff --git a/parsers/people.py b/parsers/people.py index 31e50bf..e838237 100644 --- a/parsers/people.py +++ b/parsers/people.py @@ -1,6 +1,7 @@ import csv import os import re +import time from html import unescape from pathlib import Path @@ -105,17 +106,33 @@ def load_people_expos(): Given that we need to do stuff for the coming expo, well before we update the folk list, the Expedition object for the coming expo is created elsewhere - in addition to those created here, if it does not exist. + + Refactored to separate out the creation of objects in the database to use bulk_create to + speed things up. Made little difference sadly. """ + # import cProfile + # import pstats + # from pstats import SortKey + + # pr = cProfile.Profile() + # pr.enable() + start = time.time() DataIssue.objects.filter(parser="people").delete() Person.objects.all().delete() PersonExpedition.objects.all().delete() + + splitnick_pattern = re.compile(r"^([\w&;\s\-]+)(?:\(([^)]*)\))?") + displayname_pattern = re.compile(r"^([^(]*)(\(([^)]*)\))?") # removes nickname in brackets + rawlastname_pattern = re.compile(r"^([\w&;\s]+)(?:\(([^)]*)\))?") - persontab = open(os.path.join(settings.EXPOWEB, "folk", "folk.csv")) # should really be EXPOFOLK I guess - personreader = csv.reader(persontab) # this is an iterator + with open (settings.EXPOWEB / "folk" / "folk.csv", "r") as folkfile: + folklines = folkfile.readlines() # list of the lines, read all into memory + personreader = csv.reader(folklines) # this is an iterator headers = next(personreader) header = dict(list(zip(headers, list(range(len(headers)))))) years = headers[5:] + expos = {} nexpos = Expedition.objects.count() if nexpos <= 0: print(" - Creating expeditions") @@ -124,10 +141,21 @@ def load_people_expos(): coUniqueAttribs = {"year": year} otherAttribs = {"name": f"CUCC expo {year}"} e = Expedition.objects.create(**otherAttribs, **coUniqueAttribs) - + expos[year] = e + else: # re-running a folk import without a complete reset + print(" - Cacheing expeditions") + for year in years: + year = year.strip() + e = Expedition.objects.get(year=year) + expos[year] = e + print("", flush=True) print(" - Loading persons and personexpeditions") - + print(" - Reading folk file") + pe_list = [] + prep_list = [] + + p_list =[] for personline in personreader: # This is all horrible: refactor it. # CSV: Name,Lastname,Guest,VfHO member,Mugshot,.. @@ -135,8 +163,7 @@ def load_people_expos(): name = personline[header["Name"]] plainname = re.sub(r"<.*?>", "", name) # now in slugify - match = re.match(r"^([^(]*)(\(([^)]*)\))?", name) # removes nickname in brackets - displayname = match.group(1).strip() + displayname = displayname_pattern.match(name).group(1).strip() input_name = displayname slug = troggle_slugify(displayname) @@ -148,13 +175,14 @@ def load_people_expos(): if rawlastname == "": print(f"MISSING SURNAME FIELD for {name} - check against similar names in the list to see what you have done.") - if matchlastname := re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", rawlastname): + if matchlastname := rawlastname_pattern.match(rawlastname): + #re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", lastname = matchlastname.group(1).strip() else: print(f"MATCH FAIL {personline=}\n {slug=}\n {name=}\n {rawlastname=}") exit(1) - - splitnick = re.match(r"^([\w&;\s\-]+)(?:\(([^)]*)\))?", plainname) + + splitnick = splitnick_pattern.match(plainname) fullname = splitnick.group(1) # removes Nickname in brackets, but also cuts hyphenated names nick = splitnick.group(2) or "" if nick: @@ -175,36 +203,41 @@ def load_people_expos(): else: vfho = True - # would be better to just create the python object, and only commit to db once all done inc blurb - # and better to save all the Persons in a bulk update, then do all the PersonExpeditions blurb_fields = parse_blurb(personline=personline, header=header) attribs = {"slug": slug, "first_name": firstname, "last_name": (lastname or ""), "is_vfho": vfho, "fullname": fullname, "nickname": nick, "input_name": input_name, "input_surname": input_surname, "is_guest": (personline[header["Guest"]] == "1")} - person = Person.objects.create(**attribs, **blurb_fields) - - #person.save() - - #print(" - Loading personexpeditions") + p_list.append(Person(**attribs, **blurb_fields)) # make person expedition for year, attended in list(zip(headers, personline))[5:]: - expedition = Expedition.objects.get(year=year) + expedition = expos[str(year)] if attended in ("1", "-1"): - pe_list.append(PersonExpedition(person=person, expedition=expedition, noncaver=(attended == "-1"))) - # pe = PersonExpedition.objects.create( - # person=person, - # expedition=expedition, - # noncaver=(attended == "-1") - # ) + prep_list.append((slug, expedition, (attended == "-1"))) + # pe_list.append(PersonExpedition(person=person, expedition=expedition, noncaver=(attended == "-1"))) + duration = time.time() - start + print(f" - duration: {duration:5.1f} s") + print(" - Loading persons into db") + persons = Person.objects.bulk_create(p_list) + + print(" - Loading personexpeditions into db", flush=True) + for pe in prep_list: + slug, expedition, noncaver = pe + p = Person.objects.get(slug=slug) + pe_list.append(PersonExpedition(person=p, expedition=expedition, noncaver=noncaver)) PersonExpedition.objects.bulk_create(pe_list) - print("", flush=True) ensure_users_are_persons() most_recent = Expedition.objects.all().first() check_new_signups(most_recent) + # pr.disable() + # with open("folk_reader.prof", "w") as f: + # ps = pstats.Stats(pr, stream=f) + # ps.sort_stats(SortKey.CUMULATIVE) + # ps.print_stats() + def check_new_signups(expedition): signups_clear = read_signups() @@ -226,7 +259,7 @@ def ensure_users_are_persons(): p = ps[0] p.user = u p.save() - print(f" {p.user} {u=}") + # print(f" {p.user} {u=}") def who_is_this(year, possibleid): expo = Expedition.objects.filter(year=year) diff --git a/parsers/survex.py b/parsers/survex.py index 02e39bc..36861d6 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -2338,22 +2338,22 @@ def FindAndLoadSurvex(): fcollate.write(f";*include {survexfileroot.path}\n") flinear.write(f"{svx_scan.depthinclude:2} {indent} *include {survexfileroot.path}\n") - import cProfile - import pstats - from pstats import SortKey + # import cProfile + # import pstats + # from pstats import SortKey - pr = cProfile.Profile() - pr.enable() + # pr = cProfile.Profile() + # pr.enable() svx_scan.svxpass = svx_scan.TREE # ---------------------------------------------------------------- svx_scan.PushdownStackScan(survexblockroot, survexfileroot.path, finrootname, flinear, fcollate) # ---------------------------------------------------------------- svx_scan.svxpass = "" - pr.disable() - with open("PushdownStackScan.prof", "w") as f: - ps = pstats.Stats(pr, stream=f) - ps.sort_stats(SortKey.CUMULATIVE) - ps.print_stats() + # pr.disable() + # with open("PushdownStackScan.prof", "w") as f: + # ps = pstats.Stats(pr, stream=f) + # ps.sort_stats(SortKey.CUMULATIVE) + # ps.print_stats() flinear.write(f"{svx_scan.depthinclude:2} {indent} *edulcni {survexfileroot.path}\n") fcollate.write(f";*edulcni {survexfileroot.path}\n") @@ -2709,7 +2709,8 @@ def parse_one_file(fpath): # --------------------------------------in progress-- print(f" - Aborting file parsing & import into database.") return False print(f" - Pre-existing survexfile {svxs}.") - existingsvx = SurvexFile.objects.get(path=fpath) + existingsvx = svxs[0] + #existingsvx = SurvexFile.objects.get(path=fpath) existingcave = existingsvx.cave print(f" - survexfile id={existingsvx.id} {existingsvx} {existingcave}") diff --git a/templates/controlPanel.html b/templates/controlPanel.html index 99e44fc..2bf48d2 100644 --- a/templates/controlPanel.html +++ b/templates/controlPanel.html @@ -15,7 +15,10 @@
  • Wild survex files - survex files containing blocks with no related wallet
  • Survex Directories - Every Cave has an associated directory and a Primary survex file
  • Survex import record - indented *include and begin/end tree
  • Survex debug report - warnings and details
    -
  • Therion Import issues - warnings from the recent data import

    +
  • Therion Import issues - warnings from the recent data import
    +
  • List of folk - ordered by first name, registered people in red
    + +
  • Export new_folk.csv - export from data in the database

  • Kataster renumber - Rename a cave to a new kataster number {{error}}