2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2025-12-15 07:47:13 +00:00

fixing accented names

This commit is contained in:
2023-10-01 17:53:25 +03:00
parent d8aad0ba2b
commit f161ed3cf4
2 changed files with 14 additions and 3 deletions

View File

@@ -65,14 +65,24 @@ def parse_blurb(personline, header, person):
slug_cache = {}
def troggle_slugify(longname):
"""Uniqueness enforcement too. Yes we have had two "Dave Johnson"s
This function copied intact to expoweb/scripts/make-folklist.py
"""
slug = longname.strip().lower().replace(" ","-")
slug = re.sub('\([^\)]*\)','',slug) # remove nickname in brackets
slug = slug.replace('é', 'e')
slug = slug.replace('á', 'a')
slug = slug.replace('ä', 'a')
slug = slug.replace('&', '') # otherwise just remove the &
slug = slug.replace(';', '') # otherwise just remove the ;
slug = re.sub('<[^>]*>','',slug) # remove <span-lang = "hu">
if len(slug) > 40: # slugfield is 50 chars
slug = slug[:40]
if slug in slug_cache:
slug_cache[slug] += 1
slug = f"{slug}_{slug_cache[slug]}"
slug_cache[slug] = 1
return slug
def load_people_expos():
@@ -99,8 +109,9 @@ def load_people_expos():
print(" - Loading personexpeditions")
for personline in personreader:
# This is all horrible: refactor it.
name = personline[header["Name"]]
name = re.sub(r"<.*?>", "", name)
plainname = re.sub(r"<.*?>", "", name) # now in slugify
match = re.match(r"^([^(]*)(\(([^)]*)\))?", name) # removes nickname in brackets
displayname = match.group(1)
@@ -113,7 +124,7 @@ def load_people_expos():
matchlastname = re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", rawlastname)
lastname = matchlastname.group(1).strip()
splitnick = re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", name)
splitnick = re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", plainname)
fullname = splitnick.group(1) # removes Nickname in brackets, but also cuts hyphenated names
nick = splitnick.group(2) or ""