mirror of
https://expo.survex.com/repositories/troggle/.git
synced 2025-12-17 17:27:08 +00:00
used cache instead of .get query, vastly faster
This commit is contained in:
@@ -1,6 +1,7 @@
|
|||||||
import csv
|
import csv
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import time
|
||||||
from html import unescape
|
from html import unescape
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
@@ -105,17 +106,33 @@ def load_people_expos():
|
|||||||
Given that we need to do stuff for the coming expo, well before we update the folk list,
|
Given that we need to do stuff for the coming expo, well before we update the folk list,
|
||||||
the Expedition object for the coming expo is created elsewhere - in addition to
|
the Expedition object for the coming expo is created elsewhere - in addition to
|
||||||
those created here, if it does not exist.
|
those created here, if it does not exist.
|
||||||
|
|
||||||
|
Refactored to separate out the creation of objects in the database to use bulk_create to
|
||||||
|
speed things up. Made little difference sadly.
|
||||||
"""
|
"""
|
||||||
|
# import cProfile
|
||||||
|
# import pstats
|
||||||
|
# from pstats import SortKey
|
||||||
|
|
||||||
|
# pr = cProfile.Profile()
|
||||||
|
# pr.enable()
|
||||||
|
start = time.time()
|
||||||
DataIssue.objects.filter(parser="people").delete()
|
DataIssue.objects.filter(parser="people").delete()
|
||||||
Person.objects.all().delete()
|
Person.objects.all().delete()
|
||||||
PersonExpedition.objects.all().delete()
|
PersonExpedition.objects.all().delete()
|
||||||
|
|
||||||
|
splitnick_pattern = re.compile(r"^([\w&;\s\-]+)(?:\(([^)]*)\))?")
|
||||||
|
displayname_pattern = re.compile(r"^([^(]*)(\(([^)]*)\))?") # removes nickname in brackets
|
||||||
|
rawlastname_pattern = re.compile(r"^([\w&;\s]+)(?:\(([^)]*)\))?")
|
||||||
|
|
||||||
persontab = open(os.path.join(settings.EXPOWEB, "folk", "folk.csv")) # should really be EXPOFOLK I guess
|
with open (settings.EXPOWEB / "folk" / "folk.csv", "r") as folkfile:
|
||||||
personreader = csv.reader(persontab) # this is an iterator
|
folklines = folkfile.readlines() # list of the lines, read all into memory
|
||||||
|
personreader = csv.reader(folklines) # this is an iterator
|
||||||
headers = next(personreader)
|
headers = next(personreader)
|
||||||
header = dict(list(zip(headers, list(range(len(headers))))))
|
header = dict(list(zip(headers, list(range(len(headers))))))
|
||||||
|
|
||||||
years = headers[5:]
|
years = headers[5:]
|
||||||
|
expos = {}
|
||||||
nexpos = Expedition.objects.count()
|
nexpos = Expedition.objects.count()
|
||||||
if nexpos <= 0:
|
if nexpos <= 0:
|
||||||
print(" - Creating expeditions")
|
print(" - Creating expeditions")
|
||||||
@@ -124,10 +141,21 @@ def load_people_expos():
|
|||||||
coUniqueAttribs = {"year": year}
|
coUniqueAttribs = {"year": year}
|
||||||
otherAttribs = {"name": f"CUCC expo {year}"}
|
otherAttribs = {"name": f"CUCC expo {year}"}
|
||||||
e = Expedition.objects.create(**otherAttribs, **coUniqueAttribs)
|
e = Expedition.objects.create(**otherAttribs, **coUniqueAttribs)
|
||||||
|
expos[year] = e
|
||||||
|
else: # re-running a folk import without a complete reset
|
||||||
|
print(" - Cacheing expeditions")
|
||||||
|
for year in years:
|
||||||
|
year = year.strip()
|
||||||
|
e = Expedition.objects.get(year=year)
|
||||||
|
expos[year] = e
|
||||||
|
print("", flush=True)
|
||||||
print(" - Loading persons and personexpeditions")
|
print(" - Loading persons and personexpeditions")
|
||||||
|
print(" - Reading folk file")
|
||||||
|
|
||||||
pe_list = []
|
pe_list = []
|
||||||
|
prep_list = []
|
||||||
|
|
||||||
|
p_list =[]
|
||||||
for personline in personreader:
|
for personline in personreader:
|
||||||
# This is all horrible: refactor it.
|
# This is all horrible: refactor it.
|
||||||
# CSV: Name,Lastname,Guest,VfHO member,Mugshot,..
|
# CSV: Name,Lastname,Guest,VfHO member,Mugshot,..
|
||||||
@@ -135,8 +163,7 @@ def load_people_expos():
|
|||||||
name = personline[header["Name"]]
|
name = personline[header["Name"]]
|
||||||
plainname = re.sub(r"<.*?>", "", name) # now in slugify
|
plainname = re.sub(r"<.*?>", "", name) # now in slugify
|
||||||
|
|
||||||
match = re.match(r"^([^(]*)(\(([^)]*)\))?", name) # removes nickname in brackets
|
displayname = displayname_pattern.match(name).group(1).strip()
|
||||||
displayname = match.group(1).strip()
|
|
||||||
input_name = displayname
|
input_name = displayname
|
||||||
slug = troggle_slugify(displayname)
|
slug = troggle_slugify(displayname)
|
||||||
|
|
||||||
@@ -148,13 +175,14 @@ def load_people_expos():
|
|||||||
|
|
||||||
if rawlastname == "":
|
if rawlastname == "":
|
||||||
print(f"MISSING SURNAME FIELD for {name} - check against similar names in the list to see what you have done.")
|
print(f"MISSING SURNAME FIELD for {name} - check against similar names in the list to see what you have done.")
|
||||||
if matchlastname := re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", rawlastname):
|
if matchlastname := rawlastname_pattern.match(rawlastname):
|
||||||
|
#re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?",
|
||||||
lastname = matchlastname.group(1).strip()
|
lastname = matchlastname.group(1).strip()
|
||||||
else:
|
else:
|
||||||
print(f"MATCH FAIL {personline=}\n {slug=}\n {name=}\n {rawlastname=}")
|
print(f"MATCH FAIL {personline=}\n {slug=}\n {name=}\n {rawlastname=}")
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
splitnick = re.match(r"^([\w&;\s\-]+)(?:\(([^)]*)\))?", plainname)
|
splitnick = splitnick_pattern.match(plainname)
|
||||||
fullname = splitnick.group(1) # removes Nickname in brackets, but also cuts hyphenated names
|
fullname = splitnick.group(1) # removes Nickname in brackets, but also cuts hyphenated names
|
||||||
nick = splitnick.group(2) or ""
|
nick = splitnick.group(2) or ""
|
||||||
if nick:
|
if nick:
|
||||||
@@ -175,36 +203,41 @@ def load_people_expos():
|
|||||||
else:
|
else:
|
||||||
vfho = True
|
vfho = True
|
||||||
|
|
||||||
# would be better to just create the python object, and only commit to db once all done inc blurb
|
|
||||||
# and better to save all the Persons in a bulk update, then do all the PersonExpeditions
|
|
||||||
blurb_fields = parse_blurb(personline=personline, header=header)
|
blurb_fields = parse_blurb(personline=personline, header=header)
|
||||||
|
|
||||||
attribs = {"slug": slug, "first_name": firstname, "last_name": (lastname or ""),
|
attribs = {"slug": slug, "first_name": firstname, "last_name": (lastname or ""),
|
||||||
"is_vfho": vfho, "fullname": fullname, "nickname": nick, "input_name": input_name, "input_surname": input_surname,
|
"is_vfho": vfho, "fullname": fullname, "nickname": nick, "input_name": input_name, "input_surname": input_surname,
|
||||||
"is_guest": (personline[header["Guest"]] == "1")}
|
"is_guest": (personline[header["Guest"]] == "1")}
|
||||||
person = Person.objects.create(**attribs, **blurb_fields)
|
p_list.append(Person(**attribs, **blurb_fields))
|
||||||
|
|
||||||
#person.save()
|
|
||||||
|
|
||||||
#print(" - Loading personexpeditions")
|
|
||||||
|
|
||||||
# make person expedition
|
# make person expedition
|
||||||
for year, attended in list(zip(headers, personline))[5:]:
|
for year, attended in list(zip(headers, personline))[5:]:
|
||||||
expedition = Expedition.objects.get(year=year)
|
expedition = expos[str(year)]
|
||||||
|
|
||||||
if attended in ("1", "-1"):
|
if attended in ("1", "-1"):
|
||||||
pe_list.append(PersonExpedition(person=person, expedition=expedition, noncaver=(attended == "-1")))
|
prep_list.append((slug, expedition, (attended == "-1")))
|
||||||
# pe = PersonExpedition.objects.create(
|
# pe_list.append(PersonExpedition(person=person, expedition=expedition, noncaver=(attended == "-1")))
|
||||||
# person=person,
|
duration = time.time() - start
|
||||||
# expedition=expedition,
|
print(f" - duration: {duration:5.1f} s")
|
||||||
# noncaver=(attended == "-1")
|
print(" - Loading persons into db")
|
||||||
# )
|
persons = Person.objects.bulk_create(p_list)
|
||||||
|
|
||||||
|
print(" - Loading personexpeditions into db", flush=True)
|
||||||
|
for pe in prep_list:
|
||||||
|
slug, expedition, noncaver = pe
|
||||||
|
p = Person.objects.get(slug=slug)
|
||||||
|
pe_list.append(PersonExpedition(person=p, expedition=expedition, noncaver=noncaver))
|
||||||
PersonExpedition.objects.bulk_create(pe_list)
|
PersonExpedition.objects.bulk_create(pe_list)
|
||||||
|
|
||||||
print("", flush=True)
|
|
||||||
ensure_users_are_persons()
|
ensure_users_are_persons()
|
||||||
most_recent = Expedition.objects.all().first()
|
most_recent = Expedition.objects.all().first()
|
||||||
check_new_signups(most_recent)
|
check_new_signups(most_recent)
|
||||||
|
# pr.disable()
|
||||||
|
# with open("folk_reader.prof", "w") as f:
|
||||||
|
# ps = pstats.Stats(pr, stream=f)
|
||||||
|
# ps.sort_stats(SortKey.CUMULATIVE)
|
||||||
|
# ps.print_stats()
|
||||||
|
|
||||||
|
|
||||||
def check_new_signups(expedition):
|
def check_new_signups(expedition):
|
||||||
signups_clear = read_signups()
|
signups_clear = read_signups()
|
||||||
@@ -226,7 +259,7 @@ def ensure_users_are_persons():
|
|||||||
p = ps[0]
|
p = ps[0]
|
||||||
p.user = u
|
p.user = u
|
||||||
p.save()
|
p.save()
|
||||||
print(f" {p.user} {u=}")
|
# print(f" {p.user} {u=}")
|
||||||
|
|
||||||
def who_is_this(year, possibleid):
|
def who_is_this(year, possibleid):
|
||||||
expo = Expedition.objects.filter(year=year)
|
expo = Expedition.objects.filter(year=year)
|
||||||
|
|||||||
@@ -2338,22 +2338,22 @@ def FindAndLoadSurvex():
|
|||||||
fcollate.write(f";*include {survexfileroot.path}\n")
|
fcollate.write(f";*include {survexfileroot.path}\n")
|
||||||
flinear.write(f"{svx_scan.depthinclude:2} {indent} *include {survexfileroot.path}\n")
|
flinear.write(f"{svx_scan.depthinclude:2} {indent} *include {survexfileroot.path}\n")
|
||||||
|
|
||||||
import cProfile
|
# import cProfile
|
||||||
import pstats
|
# import pstats
|
||||||
from pstats import SortKey
|
# from pstats import SortKey
|
||||||
|
|
||||||
pr = cProfile.Profile()
|
# pr = cProfile.Profile()
|
||||||
pr.enable()
|
# pr.enable()
|
||||||
svx_scan.svxpass = svx_scan.TREE
|
svx_scan.svxpass = svx_scan.TREE
|
||||||
# ----------------------------------------------------------------
|
# ----------------------------------------------------------------
|
||||||
svx_scan.PushdownStackScan(survexblockroot, survexfileroot.path, finrootname, flinear, fcollate)
|
svx_scan.PushdownStackScan(survexblockroot, survexfileroot.path, finrootname, flinear, fcollate)
|
||||||
# ----------------------------------------------------------------
|
# ----------------------------------------------------------------
|
||||||
svx_scan.svxpass = ""
|
svx_scan.svxpass = ""
|
||||||
pr.disable()
|
# pr.disable()
|
||||||
with open("PushdownStackScan.prof", "w") as f:
|
# with open("PushdownStackScan.prof", "w") as f:
|
||||||
ps = pstats.Stats(pr, stream=f)
|
# ps = pstats.Stats(pr, stream=f)
|
||||||
ps.sort_stats(SortKey.CUMULATIVE)
|
# ps.sort_stats(SortKey.CUMULATIVE)
|
||||||
ps.print_stats()
|
# ps.print_stats()
|
||||||
|
|
||||||
flinear.write(f"{svx_scan.depthinclude:2} {indent} *edulcni {survexfileroot.path}\n")
|
flinear.write(f"{svx_scan.depthinclude:2} {indent} *edulcni {survexfileroot.path}\n")
|
||||||
fcollate.write(f";*edulcni {survexfileroot.path}\n")
|
fcollate.write(f";*edulcni {survexfileroot.path}\n")
|
||||||
@@ -2709,7 +2709,8 @@ def parse_one_file(fpath): # --------------------------------------in progress--
|
|||||||
print(f" - Aborting file parsing & import into database.")
|
print(f" - Aborting file parsing & import into database.")
|
||||||
return False
|
return False
|
||||||
print(f" - Pre-existing survexfile {svxs}.")
|
print(f" - Pre-existing survexfile {svxs}.")
|
||||||
existingsvx = SurvexFile.objects.get(path=fpath)
|
existingsvx = svxs[0]
|
||||||
|
#existingsvx = SurvexFile.objects.get(path=fpath)
|
||||||
existingcave = existingsvx.cave
|
existingcave = existingsvx.cave
|
||||||
print(f" - survexfile id={existingsvx.id} {existingsvx} {existingcave}")
|
print(f" - survexfile id={existingsvx.id} {existingsvx} {existingcave}")
|
||||||
|
|
||||||
|
|||||||
@@ -15,7 +15,10 @@
|
|||||||
<li><a href="/survexfilewild/{{year}}">Wild survex files</a> - survex files containing blocks with no related wallet
|
<li><a href="/survexfilewild/{{year}}">Wild survex files</a> - survex files containing blocks with no related wallet
|
||||||
<li><a href="/survexdir">Survex Directories</a> - Every Cave has an associated directory and a Primary survex file
|
<li><a href="/survexdir">Survex Directories</a> - Every Cave has an associated directory and a Primary survex file
|
||||||
<li><a href="/surveximport">Survex import record</a> - indented *include and begin/end tree<br /><li><a href="/survexdebug">Survex debug report</a> - warnings and details<br />
|
<li><a href="/surveximport">Survex import record</a> - indented *include and begin/end tree<br /><li><a href="/survexdebug">Survex debug report</a> - warnings and details<br />
|
||||||
<li><a href="/therionissues">Therion Import issues</a> - warnings from the recent data import<br /><br />
|
<li><a href="/therionissues">Therion Import issues</a> - warnings from the recent data import<br />
|
||||||
|
<li><a href="/people_ids">List of folk</a> - ordered by first name, registered people in red<br />
|
||||||
|
|
||||||
|
<li><a href="/folk_export">Export new_folk.csv</a> - export from data in the database<br /><br />
|
||||||
|
|
||||||
<li><a href="/kataster/1623-2002-08">Kataster renumber</a> - Rename a cave to a new kataster number <span style="color:red">{{error}}</span>
|
<li><a href="/kataster/1623-2002-08">Kataster renumber</a> - Rename a cave to a new kataster number <span style="color:red">{{error}}</span>
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user