mirror of
https://expo.survex.com/repositories/troggle/.git
synced 2025-12-15 13:57:12 +00:00
533 lines
20 KiB
Python
533 lines
20 KiB
Python
import csv
|
|
import os
|
|
import re
|
|
import time
|
|
from html import unescape
|
|
from pathlib import Path
|
|
|
|
from django.conf import settings
|
|
from django.contrib.auth.models import User
|
|
from django.db import models
|
|
from unidecode import unidecode
|
|
|
|
from troggle.core.models.troggle import DataIssue, Expedition, Person, PersonExpedition
|
|
from troggle.core.views.signup import read_signups
|
|
|
|
"""These functions do not match how the stand-alone folk script works. So the script produces an HTML file which has
|
|
href links to pages in troggle which troggle does not think are right.
|
|
The standalone script needs to be renedred defucnt, and all the parsing needs to be in troggle. Either that,
|
|
or they should use the same code by importing a module.
|
|
"""
|
|
|
|
todo = """
|
|
- [copy these from paper notes]
|
|
"""
|
|
|
|
def parse_blurb(personline, header):
|
|
"""Read the mugshot and blurbfile strings and checks that
|
|
the correspond to real files.
|
|
|
|
Fixes the format of <img> tags inside a blurbfile
|
|
and loads the resulting blurb into the database"""
|
|
person = {}
|
|
|
|
for folk_field in ["Mugshot", "Blurbfile"]:
|
|
|
|
ms_filename = personline[header[folk_field]]
|
|
ms_path = settings.EXPOWEB / "folk" / ms_filename
|
|
|
|
if ms_filename:
|
|
if not ms_path.is_file():
|
|
message = f"! INVALID {folk_field} field '{ms_filename}' for {personline[header[folk_field]]}"
|
|
print(message)
|
|
DataIssue.objects.create(parser="people", message=message, url=f"/person/{personline[header['Name']]}")
|
|
return
|
|
|
|
|
|
person[folk_field.lower()] = str(Path("/folk", ms_filename))
|
|
|
|
if (folk_field == "Blurbfile") and ms_filename:
|
|
# it has the format 'l/ollybetts.htm' the file may contain <img src="../i/mymug.jpg"> images
|
|
# print(f"{personline[header["Name"]]}\n-- {folk_field} -- {ms_path}")
|
|
with open(ms_path, "r") as blurbfile:
|
|
blrb = blurbfile.read()
|
|
pblurb = re.search(r"<body>.*<hr", blrb, re.DOTALL)
|
|
if pblurb:
|
|
fragment = re.search("<body>(.*)<hr", blrb, re.DOTALL).group(1)
|
|
fragment = fragment.replace('src="../i/', 'src="/folk/i/')
|
|
fragment = fragment.replace("src='../i/", "src='/folk/i/")
|
|
fragment = re.sub(r"<h.*>[^<]*</h.>", "", fragment)
|
|
person["blurb"] = fragment
|
|
else:
|
|
message = f"! Blurb parse error in {ms_filename}"
|
|
print(message)
|
|
DataIssue.objects.create(parser="people", message=message, url=f"/person/{personline[header['Name']]}")
|
|
|
|
return person
|
|
|
|
|
|
slug_cache = {}
|
|
def troggle_slugify(longname):
|
|
"""Uniqueness enforcement too. Yes we have had two "Dave Johnson"s
|
|
This function copied intact to expoweb/scripts/make-folklist.py
|
|
"""
|
|
slug = longname.strip().lower().replace(" ","-")
|
|
slug = re.sub(r'\([^\)]*\)','',slug) # remove nickname in brackets
|
|
slug = slug.replace('é', 'e')
|
|
slug = slug.replace('á', 'a')
|
|
slug = slug.replace('ä', 'a')
|
|
slug = slug.replace('&', '') # otherwise just remove the &
|
|
slug = slug.replace(';', '') # otherwise just remove the ;
|
|
slug = slug.replace("'", "") # otherwise just remove the ', no O'Reilly problem # NEW
|
|
slug = re.sub(r'<[^>]*>','',slug) # remove <span-lang = "hu"> and any HTML tags
|
|
slug=slug.strip("-") # remove spare hyphens
|
|
|
|
if len(slug) > 40: # slugfield is 50 chars
|
|
slug = slug[:40]
|
|
if slug in slug_cache:
|
|
slug_cache[slug] += 1
|
|
slug = f"{slug}_{slug_cache[slug]}"
|
|
slug_cache[slug] = 1
|
|
|
|
return slug
|
|
|
|
def load_people_expos():
|
|
"""This is where the folk.csv file is parsed to read people's names.
|
|
|
|
This is ALSO where all the Expedition objects get created. So this is the point at which troggle
|
|
gets told what expeditions exist.
|
|
|
|
Given that we need to do stuff for the coming expo, well before we update the folk list,
|
|
the Expedition object for the coming expo is created elsewhere - in addition to
|
|
those created here, if it does not exist.
|
|
|
|
Refactored to separate out the creation of objects in the database to use bulk_create to
|
|
speed things up. Made little difference sadly.
|
|
"""
|
|
# import cProfile
|
|
# import pstats
|
|
# from pstats import SortKey
|
|
|
|
# pr = cProfile.Profile()
|
|
# pr.enable()
|
|
start = time.time()
|
|
DataIssue.objects.filter(parser="people").delete()
|
|
Person.objects.all().delete()
|
|
PersonExpedition.objects.all().delete()
|
|
|
|
years_begin = 6
|
|
|
|
splitnick_pattern = re.compile(r"^([\w&;\s\-]+)(?:\(([^)]*)\))?")
|
|
displayname_pattern = re.compile(r"^([^(]*)(\(([^)]*)\))?") # removes nickname in brackets
|
|
rawlastname_pattern = re.compile(r"^([\w&;\s]+)(?:\(([^)]*)\))?")
|
|
|
|
with open (settings.EXPOWEB / "folk" / "folk.csv", "r") as folkfile:
|
|
folklines = folkfile.readlines() # list of the lines, read all into memory
|
|
personreader = csv.reader(folklines) # this is an iterator
|
|
headers = next(personreader)
|
|
header = dict(list(zip(headers, list(range(len(headers))))))
|
|
|
|
years = headers[years_begin:]
|
|
Expedition.objects.all().delete()
|
|
expos = {}
|
|
nexpos = Expedition.objects.count()
|
|
if nexpos <= 0:
|
|
print(" - Creating expeditions")
|
|
for year in years:
|
|
iy = int(year)
|
|
year = year.strip()
|
|
coUniqueAttribs = {"year": year}
|
|
otherAttribs = {"name": f"CUCC expo {year}"}
|
|
e = Expedition.objects.create(**otherAttribs, **coUniqueAttribs)
|
|
expos[year] = e
|
|
else: # re-running a folk import without a complete reset
|
|
print(" - Cacheing expeditions")
|
|
for year in years:
|
|
iy = int(year)
|
|
year = year.strip()
|
|
e = Expedition.objects.get(year=year)
|
|
expos[year] = e
|
|
print("", flush=True)
|
|
print(" - Loading persons and personexpeditions")
|
|
print(" - Reading folk file")
|
|
|
|
pe_list = []
|
|
prep_list = []
|
|
|
|
p_list =[]
|
|
for personline in personreader:
|
|
# This is all horrible: refactor it.
|
|
# CSV: Name,Lastname,Guest,VfHO member,Mugshot,..
|
|
# e.g: Olly Betts (Ol),Betts,,,l/ollybetts.htm,
|
|
name = personline[header["Name"]]
|
|
plainname = re.sub(r"<.*?>", "", name) # now in slugify
|
|
|
|
displayname = displayname_pattern.match(name).group(1).strip()
|
|
input_name = displayname
|
|
slug = troggle_slugify(displayname)
|
|
|
|
firstname = ""
|
|
nick = ""
|
|
|
|
rawlastname = personline[header["Lastname"]].strip()
|
|
input_surname = rawlastname
|
|
|
|
if rawlastname == "":
|
|
print(f"MISSING SURNAME FIELD for {name} - check against similar names in the list to see what you have done.")
|
|
if matchlastname := rawlastname_pattern.match(rawlastname):
|
|
#re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?",
|
|
lastname = matchlastname.group(1).strip()
|
|
else:
|
|
print(f"MATCH FAIL {personline=}\n {slug=}\n {name=}\n {rawlastname=}")
|
|
exit(1)
|
|
|
|
splitnick = splitnick_pattern.match(plainname)
|
|
fullname = splitnick.group(1) # removes Nickname in brackets, but also cuts hyphenated names
|
|
nick = splitnick.group(2) or ""
|
|
if nick:
|
|
nick = nick.strip()
|
|
|
|
fullname = fullname.strip()
|
|
|
|
names = fullname.split(" ") # This may have more than one, e.g. "Adeleide de Diesback"
|
|
firstname = names[0]
|
|
if len(names) == 1:
|
|
lastname = "" # wookey special code
|
|
|
|
#restore fullname to be the whole string
|
|
fullname = displayname
|
|
|
|
if personline[header["VfHO member"]] == "":
|
|
vfho = False
|
|
else:
|
|
vfho = True
|
|
|
|
blurb_fields = parse_blurb(personline=personline, header=header)
|
|
|
|
attribs = {"slug": slug, "first_name": firstname, "last_name": (lastname or ""),
|
|
"is_vfho": vfho, "fullname": fullname, "nickname": nick, "input_name": input_name, "input_surname": input_surname,
|
|
"is_guest": (personline[header["Guest"]] == "1")}
|
|
p_list.append(Person(**attribs, **blurb_fields))
|
|
|
|
# make person expedition
|
|
for year, attended in list(zip(headers, personline))[years_begin:]:
|
|
expedition = expos[str(year)]
|
|
|
|
if attended in ("1", "-1"):
|
|
prep_list.append((slug, expedition, (attended == "-1")))
|
|
# pe_list.append(PersonExpedition(person=person, expedition=expedition, noncaver=(attended == "-1")))
|
|
duration = time.time() - start
|
|
print(f" - duration: {duration:5.1f} s")
|
|
print(" - Loading persons into db")
|
|
persons = Person.objects.bulk_create(p_list)
|
|
|
|
print(" - Loading personexpeditions into db", flush=True)
|
|
for pe in prep_list:
|
|
slug, expedition, noncaver = pe
|
|
p = Person.objects.get(slug=slug)
|
|
pe_list.append(PersonExpedition(person=p, expedition=expedition, noncaver=noncaver))
|
|
PersonExpedition.objects.bulk_create(pe_list)
|
|
|
|
ensure_users_are_persons()
|
|
most_recent = Expedition.objects.all().first()
|
|
print(most_recent)
|
|
check_new_signups(most_recent)
|
|
# pr.disable()
|
|
# with open("folk_reader.prof", "w") as f:
|
|
# ps = pstats.Stats(pr, stream=f)
|
|
# ps.sort_stats(SortKey.CUMULATIVE)
|
|
# ps.print_stats()
|
|
|
|
|
|
def check_new_signups(expedition):
|
|
signups_clear = read_signups()
|
|
# print(signups_clear)
|
|
for slug in signups_clear:
|
|
print(f" - Checking signups {slug}")
|
|
p = Person.objects.get(slug=slug)
|
|
pe = PersonExpedition.objects.update_or_create(person=p, expedition=expedition)
|
|
# print("ADDING ",pe, expedition)
|
|
|
|
|
|
def ensure_users_are_persons():
|
|
# Just ensure this is up to date.
|
|
print(f" - Ensure_users_are_persons() - except for expo and expoadmin of course")
|
|
users = User.objects.all()
|
|
for u in users:
|
|
ps = Person.objects.filter(slug=u.username)
|
|
if len(ps) >= 1:
|
|
p = ps[0]
|
|
p.user = u
|
|
p.save()
|
|
# print(f" {p.user} {u=}")
|
|
|
|
def who_is_this(year, possibleid):
|
|
expo = Expedition.objects.filter(year=year)
|
|
personexpedition = GetPersonExpeditionNameLookup(expo)[possibleid.lower()]
|
|
if personexpedition:
|
|
return personexpedition.person
|
|
else:
|
|
return None
|
|
|
|
def when_on_expo(name):
|
|
"""Returns a list of PersonExpedition objects for the string, if recognised as a name
|
|
"""
|
|
person_expos = []
|
|
expos = Expedition.objects.all()
|
|
for expo in expos:
|
|
expoers = GetPersonExpeditionNameLookup(expo)
|
|
if name in expoers:
|
|
person_expos.append(expoers[name])
|
|
print(f"{name} => {expoers[name]}")
|
|
|
|
return person_expos
|
|
|
|
|
|
global foreign_friends
|
|
foreign_friends = [
|
|
"Aiko",
|
|
"Arndt Karger",
|
|
"Dominik Jauch",
|
|
"Florian Gruner",
|
|
"Fritz Mammel",
|
|
"Gunter Graf",
|
|
"Helmut Stopka-Ebeler",
|
|
"K. Jäger",
|
|
"Kai Schwekend",
|
|
"Karl Gaisberger",
|
|
"Marcus Scheuermann",
|
|
"Marcus Scheuerman",
|
|
"Mark Morgan",
|
|
"P. Jeutter",
|
|
"R. Seebacher",
|
|
"Regina Kaiser",
|
|
"Robert Seebacher",
|
|
"S. Steinberger",
|
|
"Sepp Steinberger",
|
|
"Thilo Müller",
|
|
"Uli Schütz",
|
|
"Wieland Scheuerle",
|
|
]
|
|
|
|
def known_foreigner(id):
|
|
"""If this someone from ARGE or a known Austrian? Name has to be exact, no soft matching
|
|
"""
|
|
global foreign_friends
|
|
|
|
if id in foreign_friends:
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
|
|
# Refactor. The dict GetPersonExpeditionNameLookup(expo) indexes by name and has values of personexpedition
|
|
# This is convoluted, the personexpedition concept is unnecessary, should it just retunr person??
|
|
# Or better, query with a string and return a list of personexpeditions
|
|
|
|
Gpersonexpeditionnamelookup = {}
|
|
|
|
|
|
def GetPersonExpeditionNameLookup(expedition):
|
|
"""Yes this should all be in an editable text file, not in the body of the code. Sorry.
|
|
|
|
This uses the existing database records of everone on an expedition to construct a dictionary
|
|
indexedby every possible pseudonym or alias that the person might be known by.
|
|
|
|
This dictionary is used when parsing logbooks and survex files to identify who is being
|
|
referred to, when the name written in the logbook is e.g. "Mike TA" == "Mike The Animal"
|
|
== "Mike Rickardson".
|
|
"""
|
|
global Gpersonexpeditionnamelookup
|
|
|
|
def apply_initials(variations, a, l):
|
|
variations.append(a + l)
|
|
variations.append(a + " " + l)
|
|
variations.append(a + " " + l[0])
|
|
variations.append(a + l[0])
|
|
variations.append(a + " " + l[0] + ".")
|
|
variations.append(a[0] + " " + l)
|
|
variations.append(a[0] + ". " + l)
|
|
variations.append(a[0] + l)
|
|
variations.append(a[0] + l[0]) # initials e.g. gb or bl
|
|
return variations
|
|
|
|
def apply_variations(f, l, n=""):
|
|
"""Be generous in guessing possible matches. Any duplicates will be ruled as invalid."""
|
|
f = f.lower()
|
|
l = l.lower()
|
|
variations = []
|
|
variations.append(f)
|
|
variations.append(l)
|
|
variations = apply_initials(variations, f, l)
|
|
if n:
|
|
variations.append(n)
|
|
variations = apply_initials(variations, n, f)
|
|
variations = apply_initials(variations, n, l)
|
|
return variations
|
|
|
|
res = Gpersonexpeditionnamelookup.get(expedition.name)
|
|
|
|
if res:
|
|
return res
|
|
|
|
res = {}
|
|
duplicates = set()
|
|
|
|
# print("Calculating GetPersonExpeditionNameLookup for " + expedition.year)
|
|
personexpeditions = PersonExpedition.objects.filter(expedition=expedition)
|
|
short = {}
|
|
dellist = []
|
|
for personexpedition in personexpeditions:
|
|
possnames = []
|
|
f = unidecode(unescape(personexpedition.person.first_name.lower().strip()))
|
|
l = unidecode(unescape(personexpedition.person.last_name.lower().strip()))
|
|
full = unidecode(unescape(personexpedition.person.fullname.lower().strip()))
|
|
n = unidecode(unescape(personexpedition.person.nickname.lower().strip()))
|
|
if full not in possnames:
|
|
possnames.append(full)
|
|
if n not in possnames:
|
|
possnames.append(n)
|
|
|
|
if l:
|
|
possnames += apply_variations(f, l, n)
|
|
|
|
|
|
if f == "Adeleide".lower():
|
|
possnames += apply_variations("Adelaide", l)
|
|
if f == "Adelaide".lower():
|
|
possnames += apply_variations("Adeleide", l)
|
|
|
|
if f == "Robert".lower():
|
|
possnames += apply_variations("Bob", l)
|
|
if f == "Rob".lower():
|
|
possnames += apply_variations("Robert", l)
|
|
|
|
if f == "Thomas".lower():
|
|
possnames += apply_variations("Tom", l)
|
|
if f == "Tom".lower():
|
|
possnames += apply_variations("Thomas", l)
|
|
|
|
if f == "Lizzy".lower():
|
|
possnames += apply_variations("Lizzie", l)
|
|
if f == "Lizzie".lower():
|
|
possnames += apply_variations("Lizzy", l)
|
|
|
|
if f == "Phil".lower(): # needed when Phil is used with a surname initial, so default short-form does not work.
|
|
possnames += apply_variations("Philip", l)
|
|
if f == "Philip".lower():
|
|
possnames += apply_variations("Phil", l)
|
|
|
|
if f == "Andrew".lower():
|
|
possnames += apply_variations("Andy", l)
|
|
if f == "Andy".lower():
|
|
possnames += apply_variations("Andrew", l)
|
|
|
|
if f == "Michael".lower():
|
|
possnames += apply_variations("Mike", l)
|
|
if f == "Mike".lower():
|
|
possnames += apply_variations("Michael", l)
|
|
|
|
if f == "David".lower():
|
|
possnames += apply_variations("Dave", l)
|
|
if f == "Dave".lower():
|
|
possnames += apply_variations("David", l)
|
|
|
|
if f == "Peter".lower():
|
|
possnames += apply_variations("Pete", l)
|
|
if f == "Pete".lower():
|
|
possnames += apply_variations("Peter", l)
|
|
|
|
if f == "Tobias".lower():
|
|
possnames += apply_variations("Toby", l)
|
|
if f == "Toby".lower():
|
|
possnames += apply_variations("Tobias", l)
|
|
|
|
if f == "Olly".lower():
|
|
possnames += apply_variations("Oliver", l)
|
|
if f == "Oliver".lower():
|
|
possnames += apply_variations("Olly", l)
|
|
|
|
if f == "Ollie".lower():
|
|
possnames += apply_variations("Oliver", l)
|
|
if f == "Oliver".lower():
|
|
possnames += apply_variations("Ollie", l)
|
|
|
|
if f == "Becka".lower():
|
|
possnames += apply_variations("Rebecca", l)
|
|
|
|
if f"{f} {l}" == "Andy Waddington".lower():
|
|
possnames += apply_variations("aer", "waddington")
|
|
if f"{f} {l}" == "Phil Underwood".lower():
|
|
possnames += apply_variations("phil", "underpants")
|
|
if f"{f} {l}" == "Naomi Griffiths".lower():
|
|
possnames += apply_variations("naomi", "makins")
|
|
if f"{f} {l}" == "Tina White".lower():
|
|
possnames += apply_variations("tina", "richardson")
|
|
if f"{f} {l}" == "Cat Hulse".lower():
|
|
possnames += apply_variations("catherine", "hulse")
|
|
possnames += apply_variations("cat", "henry")
|
|
if f"{f} {l}" == "Jess Stirrups".lower():
|
|
possnames += apply_variations("jessica", "stirrups")
|
|
if f"{f} {l}" == "Nat Dalton".lower():
|
|
possnames += apply_variations("nathanael", "dalton") # correct. He has a weird spelling.
|
|
if f"{f} {l}" == "Mike Richardson".lower():
|
|
possnames.append("mta")
|
|
possnames.append("miketa")
|
|
possnames.append("mike the animal")
|
|
possnames.append("animal")
|
|
if f"{f} {l}" == "Eric Landgraf".lower():
|
|
possnames.append("eric c.landgraf")
|
|
possnames.append("eric c. landgraf")
|
|
possnames.append("eric c landgraf")
|
|
if f"{f} {l}" == "Nadia Raeburn".lower():
|
|
possnames.append("tinywoman")
|
|
possnames.append("nadia rc")
|
|
possnames.append("nadia raeburn-cherradi")
|
|
|
|
if f"{f} {l}" == "Phil Wigglesworth".lower():
|
|
possnames.append("wiggy")
|
|
if f"{f} {l}" == "Philip Banister".lower():
|
|
possnames.append("crofton")
|
|
if f"{f} {l}" == "Elaine Oliver".lower():
|
|
possnames.append("cavingpig")
|
|
if f"{f} {l}" == "Tom Crossley".lower():
|
|
possnames.append("tcacrossley")
|
|
if f"{f} {l}" == "Rob Watson".lower():
|
|
possnames.append("nobrotson")
|
|
if f"{f} {l}" == "Todd Rye".lower():
|
|
possnames.append("samouse1")
|
|
if f"{f} {l}" == "Jono Lester".lower():
|
|
possnames.append("ILoveCaves")
|
|
if f"{f} {l}" == "Joel Stobbart".lower():
|
|
possnames.append("El Stobbarto")
|
|
if f"{f} {l}" == "Rob Watson".lower():
|
|
possnames.append("nobrotson")
|
|
if f"{f} {l}" == "hannah urquhart".lower():
|
|
possnames.append("hannah ug")
|
|
|
|
for i in [3, 4, 5, 6]:
|
|
lim = min(i, len(f) + 1) # short form, e.g. Dan for Daniel.
|
|
if f[:lim] not in short:
|
|
short[f[:lim]] = personexpedition
|
|
else:
|
|
dellist.append(f[:lim])
|
|
|
|
possnames = set(possnames) # remove duplicates
|
|
for possname in possnames:
|
|
if possname in res:
|
|
duplicates.add(possname)
|
|
else:
|
|
res[possname] = personexpedition
|
|
|
|
for possname in duplicates:
|
|
del res[possname]
|
|
|
|
for possname in dellist:
|
|
if possname in short: # always true ?
|
|
del short[possname]
|
|
for shortname in short:
|
|
res[shortname] = short[shortname]
|
|
|
|
Gpersonexpeditionnamelookup[expedition.name] = res
|
|
return res
|