2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2025-12-17 17:27:08 +00:00

used cache instead of .get query, vastly faster

This commit is contained in:
2025-07-21 18:29:06 +02:00
parent 2039501672
commit b5540fd543
3 changed files with 74 additions and 37 deletions

View File

@@ -1,6 +1,7 @@
import csv
import os
import re
import time
from html import unescape
from pathlib import Path
@@ -105,17 +106,33 @@ def load_people_expos():
Given that we need to do stuff for the coming expo, well before we update the folk list,
the Expedition object for the coming expo is created elsewhere - in addition to
those created here, if it does not exist.
Refactored to separate out the creation of objects in the database to use bulk_create to
speed things up. Made little difference sadly.
"""
# import cProfile
# import pstats
# from pstats import SortKey
# pr = cProfile.Profile()
# pr.enable()
start = time.time()
DataIssue.objects.filter(parser="people").delete()
Person.objects.all().delete()
PersonExpedition.objects.all().delete()
persontab = open(os.path.join(settings.EXPOWEB, "folk", "folk.csv")) # should really be EXPOFOLK I guess
personreader = csv.reader(persontab) # this is an iterator
splitnick_pattern = re.compile(r"^([\w&;\s\-]+)(?:\(([^)]*)\))?")
displayname_pattern = re.compile(r"^([^(]*)(\(([^)]*)\))?") # removes nickname in brackets
rawlastname_pattern = re.compile(r"^([\w&;\s]+)(?:\(([^)]*)\))?")
with open (settings.EXPOWEB / "folk" / "folk.csv", "r") as folkfile:
folklines = folkfile.readlines() # list of the lines, read all into memory
personreader = csv.reader(folklines) # this is an iterator
headers = next(personreader)
header = dict(list(zip(headers, list(range(len(headers))))))
years = headers[5:]
expos = {}
nexpos = Expedition.objects.count()
if nexpos <= 0:
print(" - Creating expeditions")
@@ -124,10 +141,21 @@ def load_people_expos():
coUniqueAttribs = {"year": year}
otherAttribs = {"name": f"CUCC expo {year}"}
e = Expedition.objects.create(**otherAttribs, **coUniqueAttribs)
expos[year] = e
else: # re-running a folk import without a complete reset
print(" - Cacheing expeditions")
for year in years:
year = year.strip()
e = Expedition.objects.get(year=year)
expos[year] = e
print("", flush=True)
print(" - Loading persons and personexpeditions")
print(" - Reading folk file")
pe_list = []
prep_list = []
p_list =[]
for personline in personreader:
# This is all horrible: refactor it.
# CSV: Name,Lastname,Guest,VfHO member,Mugshot,..
@@ -135,8 +163,7 @@ def load_people_expos():
name = personline[header["Name"]]
plainname = re.sub(r"<.*?>", "", name) # now in slugify
match = re.match(r"^([^(]*)(\(([^)]*)\))?", name) # removes nickname in brackets
displayname = match.group(1).strip()
displayname = displayname_pattern.match(name).group(1).strip()
input_name = displayname
slug = troggle_slugify(displayname)
@@ -148,13 +175,14 @@ def load_people_expos():
if rawlastname == "":
print(f"MISSING SURNAME FIELD for {name} - check against similar names in the list to see what you have done.")
if matchlastname := re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", rawlastname):
if matchlastname := rawlastname_pattern.match(rawlastname):
#re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?",
lastname = matchlastname.group(1).strip()
else:
print(f"MATCH FAIL {personline=}\n {slug=}\n {name=}\n {rawlastname=}")
exit(1)
splitnick = re.match(r"^([\w&;\s\-]+)(?:\(([^)]*)\))?", plainname)
splitnick = splitnick_pattern.match(plainname)
fullname = splitnick.group(1) # removes Nickname in brackets, but also cuts hyphenated names
nick = splitnick.group(2) or ""
if nick:
@@ -175,36 +203,41 @@ def load_people_expos():
else:
vfho = True
# would be better to just create the python object, and only commit to db once all done inc blurb
# and better to save all the Persons in a bulk update, then do all the PersonExpeditions
blurb_fields = parse_blurb(personline=personline, header=header)
attribs = {"slug": slug, "first_name": firstname, "last_name": (lastname or ""),
"is_vfho": vfho, "fullname": fullname, "nickname": nick, "input_name": input_name, "input_surname": input_surname,
"is_guest": (personline[header["Guest"]] == "1")}
person = Person.objects.create(**attribs, **blurb_fields)
#person.save()
#print(" - Loading personexpeditions")
p_list.append(Person(**attribs, **blurb_fields))
# make person expedition
for year, attended in list(zip(headers, personline))[5:]:
expedition = Expedition.objects.get(year=year)
expedition = expos[str(year)]
if attended in ("1", "-1"):
pe_list.append(PersonExpedition(person=person, expedition=expedition, noncaver=(attended == "-1")))
# pe = PersonExpedition.objects.create(
# person=person,
# expedition=expedition,
# noncaver=(attended == "-1")
# )
prep_list.append((slug, expedition, (attended == "-1")))
# pe_list.append(PersonExpedition(person=person, expedition=expedition, noncaver=(attended == "-1")))
duration = time.time() - start
print(f" - duration: {duration:5.1f} s")
print(" - Loading persons into db")
persons = Person.objects.bulk_create(p_list)
print(" - Loading personexpeditions into db", flush=True)
for pe in prep_list:
slug, expedition, noncaver = pe
p = Person.objects.get(slug=slug)
pe_list.append(PersonExpedition(person=p, expedition=expedition, noncaver=noncaver))
PersonExpedition.objects.bulk_create(pe_list)
print("", flush=True)
ensure_users_are_persons()
most_recent = Expedition.objects.all().first()
check_new_signups(most_recent)
# pr.disable()
# with open("folk_reader.prof", "w") as f:
# ps = pstats.Stats(pr, stream=f)
# ps.sort_stats(SortKey.CUMULATIVE)
# ps.print_stats()
def check_new_signups(expedition):
signups_clear = read_signups()
@@ -226,7 +259,7 @@ def ensure_users_are_persons():
p = ps[0]
p.user = u
p.save()
print(f" {p.user} {u=}")
# print(f" {p.user} {u=}")
def who_is_this(year, possibleid):
expo = Expedition.objects.filter(year=year)

View File

@@ -2338,22 +2338,22 @@ def FindAndLoadSurvex():
fcollate.write(f";*include {survexfileroot.path}\n")
flinear.write(f"{svx_scan.depthinclude:2} {indent} *include {survexfileroot.path}\n")
import cProfile
import pstats
from pstats import SortKey
# import cProfile
# import pstats
# from pstats import SortKey
pr = cProfile.Profile()
pr.enable()
# pr = cProfile.Profile()
# pr.enable()
svx_scan.svxpass = svx_scan.TREE
# ----------------------------------------------------------------
svx_scan.PushdownStackScan(survexblockroot, survexfileroot.path, finrootname, flinear, fcollate)
# ----------------------------------------------------------------
svx_scan.svxpass = ""
pr.disable()
with open("PushdownStackScan.prof", "w") as f:
ps = pstats.Stats(pr, stream=f)
ps.sort_stats(SortKey.CUMULATIVE)
ps.print_stats()
# pr.disable()
# with open("PushdownStackScan.prof", "w") as f:
# ps = pstats.Stats(pr, stream=f)
# ps.sort_stats(SortKey.CUMULATIVE)
# ps.print_stats()
flinear.write(f"{svx_scan.depthinclude:2} {indent} *edulcni {survexfileroot.path}\n")
fcollate.write(f";*edulcni {survexfileroot.path}\n")
@@ -2709,7 +2709,8 @@ def parse_one_file(fpath): # --------------------------------------in progress--
print(f" - Aborting file parsing & import into database.")
return False
print(f" - Pre-existing survexfile {svxs}.")
existingsvx = SurvexFile.objects.get(path=fpath)
existingsvx = svxs[0]
#existingsvx = SurvexFile.objects.get(path=fpath)
existingcave = existingsvx.cave
print(f" - survexfile id={existingsvx.id} {existingsvx} {existingcave}")

View File

@@ -15,7 +15,10 @@
<li><a href="/survexfilewild/{{year}}">Wild survex files</a> - survex files containing blocks with no related wallet
<li><a href="/survexdir">Survex Directories</a> - Every Cave has an associated directory and a Primary survex file
<li><a href="/surveximport">Survex import record</a> - indented *include and begin/end tree<br /><li><a href="/survexdebug">Survex debug report</a> - warnings and details<br />
<li><a href="/therionissues">Therion Import issues</a> - warnings from the recent data import<br /><br />
<li><a href="/therionissues">Therion Import issues</a> - warnings from the recent data import<br />
<li><a href="/people_ids">List of folk</a> - ordered by first name, registered people in red<br />
<li><a href="/folk_export">Export new_folk.csv</a> - export from data in the database<br /><br />
<li><a href="/kataster/1623-2002-08">Kataster renumber</a> - Rename a cave to a new kataster number <span style="color:red">{{error}}</span>