2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2025-12-17 17:27:08 +00:00

used cache instead of .get query, vastly faster

This commit is contained in:
2025-07-21 18:29:06 +02:00
parent 2039501672
commit b5540fd543
3 changed files with 74 additions and 37 deletions

View File

@@ -1,6 +1,7 @@
import csv import csv
import os import os
import re import re
import time
from html import unescape from html import unescape
from pathlib import Path from pathlib import Path
@@ -105,17 +106,33 @@ def load_people_expos():
Given that we need to do stuff for the coming expo, well before we update the folk list, Given that we need to do stuff for the coming expo, well before we update the folk list,
the Expedition object for the coming expo is created elsewhere - in addition to the Expedition object for the coming expo is created elsewhere - in addition to
those created here, if it does not exist. those created here, if it does not exist.
Refactored to separate out the creation of objects in the database to use bulk_create to
speed things up. Made little difference sadly.
""" """
# import cProfile
# import pstats
# from pstats import SortKey
# pr = cProfile.Profile()
# pr.enable()
start = time.time()
DataIssue.objects.filter(parser="people").delete() DataIssue.objects.filter(parser="people").delete()
Person.objects.all().delete() Person.objects.all().delete()
PersonExpedition.objects.all().delete() PersonExpedition.objects.all().delete()
splitnick_pattern = re.compile(r"^([\w&;\s\-]+)(?:\(([^)]*)\))?")
displayname_pattern = re.compile(r"^([^(]*)(\(([^)]*)\))?") # removes nickname in brackets
rawlastname_pattern = re.compile(r"^([\w&;\s]+)(?:\(([^)]*)\))?")
persontab = open(os.path.join(settings.EXPOWEB, "folk", "folk.csv")) # should really be EXPOFOLK I guess with open (settings.EXPOWEB / "folk" / "folk.csv", "r") as folkfile:
personreader = csv.reader(persontab) # this is an iterator folklines = folkfile.readlines() # list of the lines, read all into memory
personreader = csv.reader(folklines) # this is an iterator
headers = next(personreader) headers = next(personreader)
header = dict(list(zip(headers, list(range(len(headers)))))) header = dict(list(zip(headers, list(range(len(headers))))))
years = headers[5:] years = headers[5:]
expos = {}
nexpos = Expedition.objects.count() nexpos = Expedition.objects.count()
if nexpos <= 0: if nexpos <= 0:
print(" - Creating expeditions") print(" - Creating expeditions")
@@ -124,10 +141,21 @@ def load_people_expos():
coUniqueAttribs = {"year": year} coUniqueAttribs = {"year": year}
otherAttribs = {"name": f"CUCC expo {year}"} otherAttribs = {"name": f"CUCC expo {year}"}
e = Expedition.objects.create(**otherAttribs, **coUniqueAttribs) e = Expedition.objects.create(**otherAttribs, **coUniqueAttribs)
expos[year] = e
else: # re-running a folk import without a complete reset
print(" - Cacheing expeditions")
for year in years:
year = year.strip()
e = Expedition.objects.get(year=year)
expos[year] = e
print("", flush=True)
print(" - Loading persons and personexpeditions") print(" - Loading persons and personexpeditions")
print(" - Reading folk file")
pe_list = [] pe_list = []
prep_list = []
p_list =[]
for personline in personreader: for personline in personreader:
# This is all horrible: refactor it. # This is all horrible: refactor it.
# CSV: Name,Lastname,Guest,VfHO member,Mugshot,.. # CSV: Name,Lastname,Guest,VfHO member,Mugshot,..
@@ -135,8 +163,7 @@ def load_people_expos():
name = personline[header["Name"]] name = personline[header["Name"]]
plainname = re.sub(r"<.*?>", "", name) # now in slugify plainname = re.sub(r"<.*?>", "", name) # now in slugify
match = re.match(r"^([^(]*)(\(([^)]*)\))?", name) # removes nickname in brackets displayname = displayname_pattern.match(name).group(1).strip()
displayname = match.group(1).strip()
input_name = displayname input_name = displayname
slug = troggle_slugify(displayname) slug = troggle_slugify(displayname)
@@ -148,13 +175,14 @@ def load_people_expos():
if rawlastname == "": if rawlastname == "":
print(f"MISSING SURNAME FIELD for {name} - check against similar names in the list to see what you have done.") print(f"MISSING SURNAME FIELD for {name} - check against similar names in the list to see what you have done.")
if matchlastname := re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", rawlastname): if matchlastname := rawlastname_pattern.match(rawlastname):
#re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?",
lastname = matchlastname.group(1).strip() lastname = matchlastname.group(1).strip()
else: else:
print(f"MATCH FAIL {personline=}\n {slug=}\n {name=}\n {rawlastname=}") print(f"MATCH FAIL {personline=}\n {slug=}\n {name=}\n {rawlastname=}")
exit(1) exit(1)
splitnick = re.match(r"^([\w&;\s\-]+)(?:\(([^)]*)\))?", plainname) splitnick = splitnick_pattern.match(plainname)
fullname = splitnick.group(1) # removes Nickname in brackets, but also cuts hyphenated names fullname = splitnick.group(1) # removes Nickname in brackets, but also cuts hyphenated names
nick = splitnick.group(2) or "" nick = splitnick.group(2) or ""
if nick: if nick:
@@ -175,36 +203,41 @@ def load_people_expos():
else: else:
vfho = True vfho = True
# would be better to just create the python object, and only commit to db once all done inc blurb
# and better to save all the Persons in a bulk update, then do all the PersonExpeditions
blurb_fields = parse_blurb(personline=personline, header=header) blurb_fields = parse_blurb(personline=personline, header=header)
attribs = {"slug": slug, "first_name": firstname, "last_name": (lastname or ""), attribs = {"slug": slug, "first_name": firstname, "last_name": (lastname or ""),
"is_vfho": vfho, "fullname": fullname, "nickname": nick, "input_name": input_name, "input_surname": input_surname, "is_vfho": vfho, "fullname": fullname, "nickname": nick, "input_name": input_name, "input_surname": input_surname,
"is_guest": (personline[header["Guest"]] == "1")} "is_guest": (personline[header["Guest"]] == "1")}
person = Person.objects.create(**attribs, **blurb_fields) p_list.append(Person(**attribs, **blurb_fields))
#person.save()
#print(" - Loading personexpeditions")
# make person expedition # make person expedition
for year, attended in list(zip(headers, personline))[5:]: for year, attended in list(zip(headers, personline))[5:]:
expedition = Expedition.objects.get(year=year) expedition = expos[str(year)]
if attended in ("1", "-1"): if attended in ("1", "-1"):
pe_list.append(PersonExpedition(person=person, expedition=expedition, noncaver=(attended == "-1"))) prep_list.append((slug, expedition, (attended == "-1")))
# pe = PersonExpedition.objects.create( # pe_list.append(PersonExpedition(person=person, expedition=expedition, noncaver=(attended == "-1")))
# person=person, duration = time.time() - start
# expedition=expedition, print(f" - duration: {duration:5.1f} s")
# noncaver=(attended == "-1") print(" - Loading persons into db")
# ) persons = Person.objects.bulk_create(p_list)
print(" - Loading personexpeditions into db", flush=True)
for pe in prep_list:
slug, expedition, noncaver = pe
p = Person.objects.get(slug=slug)
pe_list.append(PersonExpedition(person=p, expedition=expedition, noncaver=noncaver))
PersonExpedition.objects.bulk_create(pe_list) PersonExpedition.objects.bulk_create(pe_list)
print("", flush=True)
ensure_users_are_persons() ensure_users_are_persons()
most_recent = Expedition.objects.all().first() most_recent = Expedition.objects.all().first()
check_new_signups(most_recent) check_new_signups(most_recent)
# pr.disable()
# with open("folk_reader.prof", "w") as f:
# ps = pstats.Stats(pr, stream=f)
# ps.sort_stats(SortKey.CUMULATIVE)
# ps.print_stats()
def check_new_signups(expedition): def check_new_signups(expedition):
signups_clear = read_signups() signups_clear = read_signups()
@@ -226,7 +259,7 @@ def ensure_users_are_persons():
p = ps[0] p = ps[0]
p.user = u p.user = u
p.save() p.save()
print(f" {p.user} {u=}") # print(f" {p.user} {u=}")
def who_is_this(year, possibleid): def who_is_this(year, possibleid):
expo = Expedition.objects.filter(year=year) expo = Expedition.objects.filter(year=year)

View File

@@ -2338,22 +2338,22 @@ def FindAndLoadSurvex():
fcollate.write(f";*include {survexfileroot.path}\n") fcollate.write(f";*include {survexfileroot.path}\n")
flinear.write(f"{svx_scan.depthinclude:2} {indent} *include {survexfileroot.path}\n") flinear.write(f"{svx_scan.depthinclude:2} {indent} *include {survexfileroot.path}\n")
import cProfile # import cProfile
import pstats # import pstats
from pstats import SortKey # from pstats import SortKey
pr = cProfile.Profile() # pr = cProfile.Profile()
pr.enable() # pr.enable()
svx_scan.svxpass = svx_scan.TREE svx_scan.svxpass = svx_scan.TREE
# ---------------------------------------------------------------- # ----------------------------------------------------------------
svx_scan.PushdownStackScan(survexblockroot, survexfileroot.path, finrootname, flinear, fcollate) svx_scan.PushdownStackScan(survexblockroot, survexfileroot.path, finrootname, flinear, fcollate)
# ---------------------------------------------------------------- # ----------------------------------------------------------------
svx_scan.svxpass = "" svx_scan.svxpass = ""
pr.disable() # pr.disable()
with open("PushdownStackScan.prof", "w") as f: # with open("PushdownStackScan.prof", "w") as f:
ps = pstats.Stats(pr, stream=f) # ps = pstats.Stats(pr, stream=f)
ps.sort_stats(SortKey.CUMULATIVE) # ps.sort_stats(SortKey.CUMULATIVE)
ps.print_stats() # ps.print_stats()
flinear.write(f"{svx_scan.depthinclude:2} {indent} *edulcni {survexfileroot.path}\n") flinear.write(f"{svx_scan.depthinclude:2} {indent} *edulcni {survexfileroot.path}\n")
fcollate.write(f";*edulcni {survexfileroot.path}\n") fcollate.write(f";*edulcni {survexfileroot.path}\n")
@@ -2709,7 +2709,8 @@ def parse_one_file(fpath): # --------------------------------------in progress--
print(f" - Aborting file parsing & import into database.") print(f" - Aborting file parsing & import into database.")
return False return False
print(f" - Pre-existing survexfile {svxs}.") print(f" - Pre-existing survexfile {svxs}.")
existingsvx = SurvexFile.objects.get(path=fpath) existingsvx = svxs[0]
#existingsvx = SurvexFile.objects.get(path=fpath)
existingcave = existingsvx.cave existingcave = existingsvx.cave
print(f" - survexfile id={existingsvx.id} {existingsvx} {existingcave}") print(f" - survexfile id={existingsvx.id} {existingsvx} {existingcave}")

View File

@@ -15,7 +15,10 @@
<li><a href="/survexfilewild/{{year}}">Wild survex files</a> - survex files containing blocks with no related wallet <li><a href="/survexfilewild/{{year}}">Wild survex files</a> - survex files containing blocks with no related wallet
<li><a href="/survexdir">Survex Directories</a> - Every Cave has an associated directory and a Primary survex file <li><a href="/survexdir">Survex Directories</a> - Every Cave has an associated directory and a Primary survex file
<li><a href="/surveximport">Survex import record</a> - indented *include and begin/end tree<br /><li><a href="/survexdebug">Survex debug report</a> - warnings and details<br /> <li><a href="/surveximport">Survex import record</a> - indented *include and begin/end tree<br /><li><a href="/survexdebug">Survex debug report</a> - warnings and details<br />
<li><a href="/therionissues">Therion Import issues</a> - warnings from the recent data import<br /><br /> <li><a href="/therionissues">Therion Import issues</a> - warnings from the recent data import<br />
<li><a href="/people_ids">List of folk</a> - ordered by first name, registered people in red<br />
<li><a href="/folk_export">Export new_folk.csv</a> - export from data in the database<br /><br />
<li><a href="/kataster/1623-2002-08">Kataster renumber</a> - Rename a cave to a new kataster number <span style="color:red">{{error}}</span> <li><a href="/kataster/1623-2002-08">Kataster renumber</a> - Rename a cave to a new kataster number <span style="color:red">{{error}}</span>