fixing accented names

This commit is contained in:
Philip Sargent 2023-10-01 17:54:11 +03:00
parent 99566fe519
commit 3525095691
2 changed files with 17 additions and 8 deletions

View File

Before

Width:  |  Height:  |  Size: 26 KiB

After

Width:  |  Height:  |  Size: 26 KiB

View File

@ -28,12 +28,25 @@ def troggle_slugify(longname):
This function copied instact from troggle/parsers/people/py
"""
slug = longname.strip().lower().replace(" ","-")
slug = re.sub('\([^\)]*\)','',slug) # remove nickname in brackets
slug = slug.replace('é', 'e')
slug = slug.replace('á', 'a')
slug = slug.replace('ä', 'a')
slug = slug.replace('&', '') # otherwise just remove the &
slug = slug.replace(';', '') # otherwise just remove the ;
slug = re.sub('<[^>]*>','',slug) # remove <span-lang = "hu">
if len(slug) > 40: # slugfield is 50 chars
slug = slug[:40]
if slug in slug_cache:
slug_cache[slug] += 1
slug = f"{slug}_{slug_cache[slug]}"
slug_cache[slug] = 1
# t = longname.find("&")
# s = longname.find(">")
# if t > -1 or s > -1:
# sys.stderr.write(f"{t}-{s} '{longname}' {slug}\n")
return slug
for r in lines[1:]:
@ -57,20 +70,16 @@ for r in lines[1:]:
output += '<tr><td>'
if expos_caving == 0: output += '<span class="didntcave">'
# Middle names still not working: MorvenBeranek-Stanley , Sarah Livia Zerbes
# Need to manage names e.g. <span lang=""sv"">Leif Hornsved (Nickname)</span> => LeifHornsved
namevis = name.replace('""', '"').strip('"')
# Need to manage e.g.Olaf K&auml;hler
# Need to manage e.g. Bal%26aacute%3BzsIzapy
nameurl = namevis.replace(' ', '')
nameurl = nameurl.replace('&', '%26')
nameurl = nameurl.replace(';', '%3B')
nameurl = re.sub('<[^>]*>','',nameurl)
nameurl = re.sub('\([^\)]*\)','',nameurl) # mostly not needed, but is for Wookey
# nameurl = namevis.replace(' ', '')
# nameurl = nameurl.replace('&', '%26')
# nameurl = nameurl.replace(';', '%3B')
# new url using slug
nameurl = re.sub('\([^\)]*\)','',name)
nameurl = troggle_slugify(nameurl)
nameurl = troggle_slugify(namevis)
namelink= "<a href='/person/" + nameurl + "'>" + namevis + "</a>"
output += namelink
#output += name.replace('""', '"')