mirror of
https://expo.survex.com/repositories/troggle/.git
synced 2024-11-21 23:01:52 +00:00
426 lines
16 KiB
Python
426 lines
16 KiB
Python
import csv
|
|
import os
|
|
import re
|
|
from html import unescape
|
|
from pathlib import Path
|
|
from unidecode import unidecode
|
|
|
|
from django.conf import settings
|
|
|
|
from troggle.core.models.troggle import DataIssue, Expedition, Person, PersonExpedition
|
|
|
|
"""These functions do not match how the stand-alone folk script works. So the script produces an HTML file which has
|
|
href links to pages in troggle which troggle does not think are right.
|
|
The standalone script needs to be renedred defucnt, and all the parsing needs to be in troggle. Either that,
|
|
or they should use the same code by importing a module.
|
|
"""
|
|
|
|
|
|
def parse_blurb(personline, header, person):
|
|
"""create mugshot Photo instance
|
|
Would be better if all this was done before the Person object was created in the db, then it would not
|
|
need re-saving (which is slow)"""
|
|
ms_filename = personline[header["Mugshot"]]
|
|
ms_path = Path(settings.EXPOWEB, "folk", ms_filename)
|
|
|
|
if ms_filename:
|
|
if not ms_path.is_file():
|
|
message = f"! INVALID mug_shot field '{ms_filename}' for {person.fullname}"
|
|
print(message)
|
|
DataIssue.objects.create(parser="people", message=message, url=f"/person/{person.fullname}")
|
|
return
|
|
|
|
if ms_filename.startswith("i/"):
|
|
# if person just has an image, add it. It has format 'i/adama2018.jpg'
|
|
person.mug_shot = str(Path("/folk", ms_filename))
|
|
person.blurb = None
|
|
|
|
elif ms_filename.startswith("l/"):
|
|
# it has the format 'l/ollybetts.htm' the file may contain <img src="../i/mymug.jpg"> images
|
|
with open(ms_path, "r") as blurbfile:
|
|
blrb = blurbfile.read()
|
|
pblurb = re.search(r"<body>.*<hr", blrb, re.DOTALL)
|
|
if pblurb:
|
|
person.mug_shot = None
|
|
fragment = re.search("<body>(.*)<hr", blrb, re.DOTALL).group(1)
|
|
fragment = fragment.replace('src="../i/', 'src="/folk/i/')
|
|
fragment = fragment.replace("src='../i/", "src='/folk/i/")
|
|
fragment = re.sub(r"<h.*>[^<]*</h.>", "", fragment)
|
|
# replace src="../i/ with src="/folk/i
|
|
person.blurb = fragment
|
|
else:
|
|
message = f"! Blurb parse error in {ms_filename}"
|
|
print(message)
|
|
DataIssue.objects.create(parser="people", message=message, url="/folk/")
|
|
|
|
elif ms_filename == "":
|
|
pass
|
|
else:
|
|
message = f"! Unrecognised type of file at mug_shot field '{ms_filename}' for {person.fullname}"
|
|
print(message)
|
|
DataIssue.objects.create(parser="people", message=message, url="/folk/")
|
|
|
|
person.save()
|
|
|
|
slug_cache = {}
|
|
def troggle_slugify(longname):
|
|
"""Uniqueness enforcement too. Yes we have had two "Dave Johnson"s
|
|
This function copied intact to expoweb/scripts/make-folklist.py
|
|
"""
|
|
slug = longname.strip().lower().replace(" ","-")
|
|
slug = re.sub(r'\([^\)]*\)','',slug) # remove nickname in brackets
|
|
slug = slug.replace('é', 'e')
|
|
slug = slug.replace('á', 'a')
|
|
slug = slug.replace('ä', 'a')
|
|
slug = slug.replace('&', '') # otherwise just remove the &
|
|
slug = slug.replace(';', '') # otherwise just remove the ;
|
|
slug = re.sub(r'<[^>]*>','',slug) # remove <span-lang = "hu">
|
|
|
|
if len(slug) > 40: # slugfield is 50 chars
|
|
slug = slug[:40]
|
|
if slug in slug_cache:
|
|
slug_cache[slug] += 1
|
|
slug = f"{slug}_{slug_cache[slug]}"
|
|
slug_cache[slug] = 1
|
|
|
|
return slug
|
|
|
|
def load_people_expos():
|
|
"""This is where the folk.csv file is parsed to read people's names.
|
|
Which it gets wrong for people like Lydia-Clare Leather and various 'von' and 'de' middle 'names'
|
|
and McLean and Mclean and McAdam - interaction with the url parser in urls.py too
|
|
|
|
This is ALSO where all the Expedition objects get created. So this is the point at which troggle
|
|
gets told what expeditions exist.
|
|
|
|
Given that we need to do stuff for the coming expo, well before we update the folk list,
|
|
the Expedition object for the coming expo is created elsewhere - in addition to
|
|
those created here, if it does not exist.
|
|
"""
|
|
DataIssue.objects.filter(parser="people").delete()
|
|
|
|
persontab = open(os.path.join(settings.EXPOWEB, "folk", "folk.csv")) # should really be EXPOFOLK I guess
|
|
personreader = csv.reader(persontab) # this is an iterator
|
|
headers = next(personreader)
|
|
header = dict(list(zip(headers, list(range(len(headers))))))
|
|
|
|
years = headers[5:]
|
|
nexpos = Expedition.objects.count()
|
|
if nexpos <= 0:
|
|
print(" - Creating expeditions")
|
|
for year in years:
|
|
coUniqueAttribs = {"year": year}
|
|
otherAttribs = {"name": f"CUCC expo {year}"}
|
|
e = Expedition.objects.create(**otherAttribs, **coUniqueAttribs)
|
|
|
|
print(" - Loading personexpeditions")
|
|
|
|
for personline in personreader:
|
|
# This is all horrible: refactor it.
|
|
name = personline[header["Name"]]
|
|
plainname = re.sub(r"<.*?>", "", name) # now in slugify
|
|
|
|
match = re.match(r"^([^(]*)(\(([^)]*)\))?", name) # removes nickname in brackets
|
|
displayname = match.group(1)
|
|
slug = troggle_slugify(displayname)
|
|
|
|
firstname = ""
|
|
nick = ""
|
|
|
|
rawlastname = personline[header["Lastname"]].strip()
|
|
matchlastname = re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", rawlastname)
|
|
lastname = matchlastname.group(1).strip()
|
|
|
|
splitnick = re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", plainname)
|
|
fullname = splitnick.group(1) # removes Nickname in brackets, but also cuts hyphenated names
|
|
nick = splitnick.group(2) or ""
|
|
|
|
fullname = fullname.strip()
|
|
|
|
names = fullname.split(" ") # This may have more than one, e.g. "Adeleide de Diesback"
|
|
firstname = names[0]
|
|
if len(names) == 1:
|
|
lastname = "" # wookey special code
|
|
|
|
#restore fullname to be the whole string
|
|
fullname = displayname
|
|
|
|
if personline[header["VfHO member"]] == "":
|
|
vfho = False
|
|
else:
|
|
vfho = True
|
|
|
|
# would be better to just create the python object, and only cmmit to db once all done inc blurb
|
|
# and better to save all the Persons in a bulk update, then do all the PersonExpeditions
|
|
coUniqueAttribs = {"slug": slug}
|
|
otherAttribs = {"first_name": firstname, "last_name": (lastname or ""), "is_vfho": vfho, "fullname": fullname, "nickname": nick,"is_guest": (personline[header["Guest"]] == "1")}
|
|
person = Person.objects.create(**otherAttribs, **coUniqueAttribs)
|
|
|
|
parse_blurb(personline=personline, header=header, person=person) # saves to db too
|
|
|
|
# make person expedition from table
|
|
for year, attended in list(zip(headers, personline))[5:]:
|
|
expedition = Expedition.objects.get(year=year)
|
|
if attended == "1" or attended == "-1":
|
|
coUniqueAttribs = {"person": person, "expedition": expedition}
|
|
# otherAttribs = {"is_guest": (personline[header["Guest"]] == "1")}
|
|
pe = PersonExpedition.objects.create(**coUniqueAttribs)
|
|
print("", flush=True)
|
|
|
|
|
|
def who_is_this(year, possibleid):
|
|
expo = Expedition.objects.filter(year=year)
|
|
personexpedition = GetPersonExpeditionNameLookup(expo)[possibleid.lower()]
|
|
if personexpedition:
|
|
return personexpedition.person
|
|
else:
|
|
return None
|
|
|
|
|
|
def when_on_expo(name):
|
|
"""Returns a list of PersonExpedition objects for the string, if recognised as a name
|
|
"""
|
|
person_expos = []
|
|
expos = Expedition.objects.all()
|
|
for expo in expos:
|
|
expoers = GetPersonExpeditionNameLookup(expo)
|
|
if name in expoers:
|
|
person_expos.append(expoers[name])
|
|
print(f"{name} => {expoers[name]}")
|
|
|
|
return person_expos
|
|
|
|
|
|
global foreign_friends
|
|
foreign_friends = [
|
|
"Aiko",
|
|
"Arndt Karger",
|
|
"Dominik Jauch",
|
|
"Florian Gruner",
|
|
"Fritz Mammel",
|
|
"Gunter Graf",
|
|
"Helmut Stopka-Ebeler",
|
|
"K. Jäger",
|
|
"Kai Schwekend",
|
|
"Karl Gaisberger",
|
|
"Marcus Scheuermann",
|
|
"Marcus Scheuerman",
|
|
"Mark Morgan",
|
|
"P. Jeutter",
|
|
"R. Seebacher",
|
|
"Regina Kaiser",
|
|
"Robert Seebacher",
|
|
"S. Steinberger",
|
|
"Sepp Steinberger",
|
|
"Thilo Müller",
|
|
"Uli Schütz",
|
|
"Wieland Scheuerle",
|
|
]
|
|
|
|
def known_foreigner(id):
|
|
"""If this someone from ARGE or a known Austrian? Name has to be exact, no soft matching
|
|
"""
|
|
global foreign_friends
|
|
|
|
if id in foreign_friends:
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
|
|
# Refactor. The dict GetPersonExpeditionNameLookup(expo) indexes by name and has values of personexpedition
|
|
# This is convoluted, the personexpedition concept is unnecessary, should it just retunr person??
|
|
# Or better, query with a string and return a list of personexpeditions
|
|
|
|
Gpersonexpeditionnamelookup = {}
|
|
|
|
|
|
def GetPersonExpeditionNameLookup(expedition):
|
|
"""Yes this should all be in an editable text file, not in the body of the code. Sorry.
|
|
"""
|
|
global Gpersonexpeditionnamelookup
|
|
|
|
def apply_variations(f, l):
|
|
"""Be generous in guessing possible matches. Any duplicates will be ruled as invalid."""
|
|
f = f.lower()
|
|
l = l.lower()
|
|
variations = []
|
|
variations.append(f)
|
|
variations.append(l)
|
|
variations.append(f + l)
|
|
variations.append(f + " " + l)
|
|
variations.append(f + " " + l[0])
|
|
variations.append(f + l[0])
|
|
variations.append(f + " " + l[0] + ".")
|
|
variations.append(f[0] + " " + l)
|
|
variations.append(f[0] + ". " + l)
|
|
variations.append(f[0] + l)
|
|
variations.append(f[0] + l[0]) # initials e.g. gb or bl
|
|
return variations
|
|
|
|
res = Gpersonexpeditionnamelookup.get(expedition.name)
|
|
|
|
if res:
|
|
return res
|
|
|
|
res = {}
|
|
duplicates = set()
|
|
|
|
# print("Calculating GetPersonExpeditionNameLookup for " + expedition.year)
|
|
personexpeditions = PersonExpedition.objects.filter(expedition=expedition)
|
|
short = {}
|
|
dellist = []
|
|
for personexpedition in personexpeditions:
|
|
possnames = []
|
|
f = unidecode(unescape(personexpedition.person.first_name.lower().strip()))
|
|
l = unidecode(unescape(personexpedition.person.last_name.lower().strip()))
|
|
full = unidecode(unescape(personexpedition.person.fullname.lower().strip()))
|
|
n = unidecode(unescape(personexpedition.person.nickname.lower().strip()))
|
|
if full not in possnames:
|
|
possnames.append(full)
|
|
if n not in possnames:
|
|
possnames.append(n)
|
|
|
|
if l:
|
|
possnames += apply_variations(f, l)
|
|
|
|
if n:
|
|
possnames += apply_variations(n, l)
|
|
|
|
if f == "Adeleide".lower():
|
|
possnames += apply_variations("Adelaide", l)
|
|
if f == "Adelaide".lower():
|
|
possnames += apply_variations("Adeleide", l)
|
|
|
|
if f == "Robert".lower():
|
|
possnames += apply_variations("Bob", l)
|
|
if f == "Rob".lower():
|
|
possnames += apply_variations("Robert", l)
|
|
|
|
if f == "Thomas".lower():
|
|
possnames += apply_variations("Tom", l)
|
|
if f == "Tom".lower():
|
|
possnames += apply_variations("Thomas", l)
|
|
|
|
if f == "Lizzy".lower():
|
|
possnames += apply_variations("Lizzie", l)
|
|
if f == "Lizzie".lower():
|
|
possnames += apply_variations("Lizzy", l)
|
|
|
|
if f == "Phil".lower(): # needed when Phil is used with a surname initial, so default short-form does not work.
|
|
possnames += apply_variations("Philip", l)
|
|
if f == "Philip".lower():
|
|
possnames += apply_variations("Phil", l)
|
|
|
|
if f == "Andrew".lower():
|
|
possnames += apply_variations("Andy", l)
|
|
if f == "Andy".lower():
|
|
possnames += apply_variations("Andrew", l)
|
|
|
|
if f == "Michael".lower():
|
|
possnames += apply_variations("Mike", l)
|
|
if f == "Mike".lower():
|
|
possnames += apply_variations("Michael", l)
|
|
|
|
if f == "David".lower():
|
|
possnames += apply_variations("Dave", l)
|
|
if f == "Dave".lower():
|
|
possnames += apply_variations("David", l)
|
|
|
|
if f == "Peter".lower():
|
|
possnames += apply_variations("Pete", l)
|
|
if f == "Pete".lower():
|
|
possnames += apply_variations("Peter", l)
|
|
|
|
if f == "Tobias".lower():
|
|
possnames += apply_variations("Toby", l)
|
|
if f == "Toby".lower():
|
|
possnames += apply_variations("Tobias", l)
|
|
|
|
if f == "Olly".lower():
|
|
possnames += apply_variations("Oliver", l)
|
|
if f == "Oliver".lower():
|
|
possnames += apply_variations("Olly", l)
|
|
|
|
if f == "Ollie".lower():
|
|
possnames += apply_variations("Oliver", l)
|
|
if f == "Oliver".lower():
|
|
possnames += apply_variations("Ollie", l)
|
|
|
|
if f == "Becka".lower():
|
|
possnames += apply_variations("Rebecca", l)
|
|
|
|
if f"{f} {l}" == "Andy Waddington".lower():
|
|
possnames += apply_variations("aer", "waddington")
|
|
if f"{f} {l}" == "Phil Underwood".lower():
|
|
possnames += apply_variations("phil", "underpants")
|
|
if f"{f} {l}" == "Naomi Griffiths".lower():
|
|
possnames += apply_variations("naomi", "makins")
|
|
if f"{f} {l}" == "Tina White".lower():
|
|
possnames += apply_variations("tina", "richardson")
|
|
if f"{f} {l}" == "Cat Hulse".lower():
|
|
possnames += apply_variations("catherine", "hulse")
|
|
possnames += apply_variations("cat", "henry")
|
|
if f"{f} {l}" == "Jess Stirrups".lower():
|
|
possnames += apply_variations("jessica", "stirrups")
|
|
if f"{f} {l}" == "Nat Dalton".lower():
|
|
possnames += apply_variations("nathanael", "dalton") # correct. He has a weird spelling.
|
|
if f"{f} {l}" == "Mike Richardson".lower():
|
|
possnames.append("mta")
|
|
possnames.append("miketa")
|
|
possnames.append("mike the animal")
|
|
possnames.append("animal")
|
|
if f"{f} {l}" == "Eric Landgraf".lower():
|
|
possnames.append("eric c.landgraf")
|
|
possnames.append("eric c. landgraf")
|
|
possnames.append("eric c landgraf")
|
|
if f"{f} {l}" == "Nadia Raeburn".lower():
|
|
possnames.append("tinywoman")
|
|
possnames.append("nadia rc")
|
|
possnames.append("nadia raeburn-cherradi")
|
|
|
|
if f"{f} {l}" == "Phil Wigglesworth".lower():
|
|
possnames.append("wiggy")
|
|
if f"{f} {l}" == "Philip Banister".lower():
|
|
possnames.append("crofton")
|
|
if f"{f} {l}" == "Elaine Oliver".lower():
|
|
possnames.append("cavingpig")
|
|
if f"{f} {l}" == "Tom Crossley".lower():
|
|
possnames.append("tcacrossley")
|
|
if f"{f} {l}" == "Rob Watson".lower():
|
|
possnames.append("nobrotson")
|
|
if f"{f} {l}" == "Todd Rye".lower():
|
|
possnames.append("samouse1")
|
|
if f"{f} {l}" == "Jono Lester".lower():
|
|
possnames.append("ILoveCaves")
|
|
if f"{f} {l}" == "Joel Stobbart".lower():
|
|
possnames.append("El Stobbarto")
|
|
if f"{f} {l}" == "Rob Watson".lower():
|
|
possnames.append("nobrotson")
|
|
|
|
for i in [3, 4, 5, 6]:
|
|
lim = min(i, len(f) + 1) # short form, e.g. Dan for Daniel.
|
|
if f[:lim] not in short:
|
|
short[f[:lim]] = personexpedition
|
|
else:
|
|
dellist.append(f[:lim])
|
|
|
|
possnames = set(possnames) # remove duplicates
|
|
for possname in possnames:
|
|
if possname in res:
|
|
duplicates.add(possname)
|
|
else:
|
|
res[possname] = personexpedition
|
|
|
|
for possname in duplicates:
|
|
del res[possname]
|
|
|
|
for possname in dellist:
|
|
if possname in short: # always true ?
|
|
del short[possname]
|
|
for shortname in short:
|
|
res[shortname] = short[shortname]
|
|
|
|
Gpersonexpeditionnamelookup[expedition.name] = res
|
|
return res
|