troggle/parsers/people.py

import csv, re, datetime, os, shutil
from html import unescape
from unidecode import unidecode
from pathlib import Path

from django.conf import settings

from troggle.core.models.troggle import Expedition, Person, PersonExpedition
from troggle.core.models.troggle import DataIssue
from troggle.core.utils import save_carefully, TROG

'''These functions do not match how the stand-alone folk script works. So the script produces an HTML file which has 
href links to pages in troggle which troggle does not think are right.
The standalone script needs to be renedred defucnt, and all the parsing needs to be in troggle. Either that,
or they should use the same code by importing a module.
'''

def parse_blurb(personline, header, person):
    """create mugshot Photo instance"""
    ms_filename = personline[header["Mugshot"]]
    ms_path = Path(settings.EXPOWEB, "folk", ms_filename)
    
    if ms_filename:
        if not ms_path.is_file():
            message = f"! INVALID mug_shot field '{ms_filename}' for {person.fullname}"
            print(message)
            DataIssue.objects.create(parser='people', message=message, url=f"/person/{person.fullname}")
            return
    
    if ms_filename.startswith('i/'):
        #if person just has an image, add it. It has format 'i/adama2018.jpg'
        person.mug_shot = str(Path("/folk", ms_filename))
        person.blurb = None

    elif ms_filename.startswith('l/'): 
        # it has the format 'l/ollybetts.htm' the file may contain <img src="../i/mymug.jpg"> images
        with open(ms_path,'r') as blurbfile:
            blrb = blurbfile.read()
        pblurb=re.search(r'<body>.*<hr',blrb,re.DOTALL)
        if pblurb:
            person.mug_shot = None           
            fragment= re.search('<body>(.*)<hr',blrb,re.DOTALL).group(1) 
            fragment = fragment.replace('src="../i/', 'src="/folk/i/')
            fragment = fragment.replace("src='../i/", "src='/folk/i/")
            fragment = re.sub(r'<h.*>[^<]*</h.>', '', fragment)
            # replace src="../i/ with src="/folk/i
            person.blurb = fragment
        else:
            message = f"! Blurb parse error in {ms_filename}"
            print(message)
            DataIssue.objects.create(parser='people', message=message, url="/folk/")

    elif ms_filename == '':
        pass
    else:
        message = f"! Unrecognised type of file at mug_shot field '{ms_filename}' for {person.fullname}"
        print(message)
        DataIssue.objects.create(parser='people', message=message, url="/folk/")

    person.save()

def load_people_expos():
    '''This is where the folk.csv file is parsed to read people's names. 
    Which it gets wrong for people like Lydia-Clare Leather and various 'von' and 'de' middle 'names'
    and McLean and Mclean and McAdam - interaction with the url parser in urls.py too
    '''
    DataIssue.objects.filter(parser='people').delete()
    
    persontab = open(os.path.join(settings.EXPOWEB, "folk", "folk.csv")) # should really be EXPOFOLK I guess
    personreader = csv.reader(persontab) # this is an iterator
    headers = next(personreader)
    header = dict(list(zip(headers, list(range(len(headers))))))
    
    # make expeditions
    print(" - Loading expeditions")
    years = headers[5:]
    
    for year in years:
        lookupAttribs = {'year':year}
        nonLookupAttribs = {'name':"CUCC expo %s" % year}
        
        save_carefully(Expedition, lookupAttribs, nonLookupAttribs)

    # make persons
    print(" - Loading personexpeditions")

    for personline in personreader:
        name = personline[header["Name"]]
        name = re.sub(r"<.*?>", "", name)

        firstname = ""
        nickname = ""

        rawlastname = personline[header["Lastname"]].strip()
        matchlastname = re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", rawlastname)
        lastname = matchlastname.group(1).strip()

        splitnick = re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", name)
        fullname = splitnick.group(1)

        nickname = splitnick.group(2) or ""

        fullname = fullname.strip()
        names = fullname.split(' ')
        firstname = names[0]
        if len(names) == 1:
            lastname = ""

        if personline[header["VfHO member"]] =='':
            vfho = False
        else:
            vfho = True

        lookupAttribs={'first_name':firstname, 'last_name':(lastname or "")}
        nonLookupAttribs={'is_vfho':vfho, 'fullname':fullname, 'nickname':nickname}
        person, created = save_carefully(Person, lookupAttribs, nonLookupAttribs)

        parse_blurb(personline=personline, header=header, person=person)
    
        # make person expedition from table
        for year, attended in list(zip(headers, personline))[5:]:
            expedition = Expedition.objects.get(year=year)
            if attended == "1" or attended == "-1":
                lookupAttribs = {'person':person, 'expedition':expedition}
                nonLookupAttribs = {'nickname':nickname, 'is_guest':(personline[header["Guest"]] == "1")}
                save_carefully(PersonExpedition, lookupAttribs, nonLookupAttribs)
    print("", flush=True)

def who_is_this(year,possibleid):
    expo = Expedition.objects.filter(year=year)
    personexpedition =  GetPersonExpeditionNameLookup(expo)[possibleid.lower()]
    if personexpedition:
        return personexpedition.person
    else:
        return None
    
# Refactor. The dict GetPersonExpeditionNameLookup(expo) indexes by name and has values of personexpedition
# This is convoluted, the whole personexpedition concept is unnecessary.

Gpersonexpeditionnamelookup = { }
def GetPersonExpeditionNameLookup(expedition):
    global Gpersonexpeditionnamelookup
    
    def apply_variations(f, l):
        f = f.lower()
        l = l.lower()
        variations = []
        variations.append(f)
        variations.append(l)
        variations.append(f + l)
        variations.append(f + " " + l)
        variations.append(f + " " + l[0])
        variations.append(f + l[0])
        variations.append(f[0] + " " + l)
        variations.append(f[0] + l)
        variations.append(f[0] + l[0]) # initials e.g. gb or bl
        return variations
    
    res = Gpersonexpeditionnamelookup.get(expedition.name)
    
    if res:
        return res
    
    res = { }
    duplicates = set()
    
    #print("Calculating GetPersonExpeditionNameLookup for " + expedition.year)
    personexpeditions = PersonExpedition.objects.filter(expedition=expedition)
    short = {}
    dellist = []
    for personexpedition in personexpeditions:
        possnames = [ ]
        f = unidecode(unescape(personexpedition.person.first_name.lower()))
        l = unidecode(unescape(personexpedition.person.last_name.lower()))
        full = unidecode(unescape(personexpedition.person.fullname.lower()))
        n = unidecode(unescape(personexpedition.nickname.lower()))
        if full not in possnames:
            possnames.append(full)
        if n not in possnames:
            possnames.append(n)
        
        if l:
            possnames += apply_variations(f,l)

            if n:
                possnames += apply_variations(n, l)
                
            if f == "Robert".lower():
                possnames += apply_variations("Bob", l)
            if f == "Andrew".lower():
                possnames += apply_variations("Andy", l)
            if f == "Andy".lower():
                possnames += apply_variations("Andrew", l)
            if f == "Michael".lower():
                possnames += apply_variations("Mike", l)
            if f == "David".lower():
                possnames += apply_variations("Dave", l)
            if f == "Dave".lower():
                possnames += apply_variations("David", l)
            if f == "Peter".lower():
                possnames += apply_variations("Pete", l)
            if f == "Pete".lower():
                possnames += apply_variations("Peter", l)
            if f == "Olly".lower():
                possnames += apply_variations("Oliver", l)
            if f == "Oliver".lower():
                possnames += apply_variations("Olly", l)
               
            if f == "Becka".lower():
                possnames += apply_variations("Rebecca", l)
         
            if f'{f} {l}' == "Andy Waddington".lower():
                possnames += apply_variations("aer", "waddington")
            if f'{f} {l}' == "Phil Underwood".lower():
                possnames += apply_variations("phil", "underpants")
            if f'{f} {l}' == "Naomi Griffiths".lower():
                possnames += apply_variations("naomi", "makins")
            if f'{f} {l}' == "Tina White".lower():
                possnames += apply_variations("tina", "richardson")
            if f'{f} {l}' == "Cat Hulse".lower():
                possnames += apply_variations("catherine", "hulse")
                possnames += apply_variations("cat", "henry")
            if f'{f} {l}' == "Jess Stirrups".lower():
                possnames += apply_variations("jessica", "stirrups")
            if f'{f} {l}' == "Nat Dalton".lower():
                possnames += apply_variations("nathanael", "dalton") # correct. He has a weird spelling.
            if f'{f} {l}' == "Mike Richardson".lower():
                possnames.append("mta")
                possnames.append("miketa")
                possnames.append("mike the animal")
                possnames.append("animal")
            if f'{f} {l}' == "Eric Landgraf".lower():
                possnames.append("eric c.landgraf")
                possnames.append("eric c. landgraf")
                possnames.append("eric c landgraf")
            if f'{f} {l}' == "Nadia Raeburn".lower():
                possnames.append("nadia rc")
                possnames.append("nadia raeburn-cherradi")
             
        for i in [3, 4, 5, 6]:
            lim = min(i, len(f)+1)  # short form, e.g. Dan for Daniel. 
            if f[:lim] not in short:
                short[f[:lim]]= personexpedition 
            else:
                dellist.append(f[:lim])
 
        possnames = set(possnames) # remove duplicates
        for possname in possnames:
            if possname in res:
                duplicates.add(possname)
            else:
                res[possname] = personexpedition
        
    for possname in duplicates:
        del res[possname]
        
    for possname in dellist:
        if possname in short: #always true ?
            del short[possname]
    for shortname in short:
        res[shortname] = short[shortname]
        
    
    Gpersonexpeditionnamelookup[expedition.name] = res
    return res
[svn] Fix leftover from expo -> core rename, and add databaseReset.py to README.txt 2009-07-02 22:31:28 +01:00			`import csv, re, datetime, os, shutil`
WORKING both py3.9.10 & 3.8.10 (dj2.2.25) 2022-03-02 23:19:48 +00:00			`from html import unescape`
Allow comments against names in logbooks in brackets Convert accent chars in names into simple chars as this is what people enter in the logbook 2019-07-11 12:29:38 +01:00			`from unidecode import unidecode`
enabled mugshots & blurb in people pages 2021-04-15 17:51:01 +01:00			`from pathlib import Path`
[svn] Photo file handling and mugshots parsing sorted. Made URL settings more relative, less redundant. Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8246 by aaron @ 2/18/2009 6:45 AM 2009-05-13 05:53:37 +01:00
moving save_carefully() 2021-04-13 00:11:08 +01:00			`from django.conf import settings`

fixing typos and changes in importing 2021-04-13 01:13:08 +01:00			`from troggle.core.models.troggle import Expedition, Person, PersonExpedition`
enabled mugshots & blurb in people pages 2021-04-15 17:51:01 +01:00			`from troggle.core.models.troggle import DataIssue`
			`from troggle.core.utils import save_carefully, TROG`
moving save_carefully() 2021-04-13 00:11:08 +01:00
			`'''These functions do not match how the stand-alone folk script works. So the script produces an HTML file which has`
Importing old logbooks 2021-02-06 00:18:48 +00:00			`href links to pages in troggle which troggle does not think are right.`
			`The standalone script needs to be renedred defucnt, and all the parsing needs to be in troggle. Either that,`
			`or they should use the same code by importing a module.`
			`'''`

enabled mugshots & blurb in people pages 2021-04-15 17:51:01 +01:00			`def parse_blurb(personline, header, person):`
[svn] Brief code cleanup. 2009-07-03 05:31:49 +01:00			`"""create mugshot Photo instance"""`
enabled mugshots & blurb in people pages 2021-04-15 17:51:01 +01:00			`ms_filename = personline[header["Mugshot"]]`
			`ms_path = Path(settings.EXPOWEB, "folk", ms_filename)`

			`if ms_filename:`
			`if not ms_path.is_file():`
			`message = f"! INVALID mug_shot field '{ms_filename}' for {person.fullname}"`
			`print(message)`
			`DataIssue.objects.create(parser='people', message=message, url=f"/person/{person.fullname}")`
			`return`

			`if ms_filename.startswith('i/'):`
			`#if person just has an image, add it. It has format 'i/adama2018.jpg'`
			`person.mug_shot = str(Path("/folk", ms_filename))`
			`person.blurb = None`

			`elif ms_filename.startswith('l/'):`
			`# it has the format 'l/ollybetts.htm' the file may contain <img src="../i/mymug.jpg"> images`
			`with open(ms_path,'r') as blurbfile:`
			`blrb = blurbfile.read()`
			`pblurb=re.search(r'<body>.*<hr',blrb,re.DOTALL)`
			`if pblurb:`
			`person.mug_shot = None`
			`fragment= re.search('<body>(.*)<hr',blrb,re.DOTALL).group(1)`
			`fragment = fragment.replace('src="../i/', 'src="/folk/i/')`
			`fragment = fragment.replace("src='../i/", "src='/folk/i/")`
			`fragment = re.sub(r'<h.>[^<]</h.>', '', fragment)`
			`# replace src="../i/ with src="/folk/i`
			`person.blurb = fragment`
			`else:`
			`message = f"! Blurb parse error in {ms_filename}"`
			`print(message)`
			`DataIssue.objects.create(parser='people', message=message, url="/folk/")`

			`elif ms_filename == '':`
Remove PHOTOS_ROOT and DPhoto class 2020-05-15 21:32:55 +01:00			`pass`
enabled mugshots & blurb in people pages 2021-04-15 17:51:01 +01:00			`else:`
			`message = f"! Unrecognised type of file at mug_shot field '{ms_filename}' for {person.fullname}"`
			`print(message)`
			`DataIssue.objects.create(parser='people', message=message, url="/folk/")`

[svn] Photo file handling and mugshots parsing sorted. Made URL settings more relative, less redundant. Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8246 by aaron @ 2/18/2009 6:45 AM 2009-05-13 05:53:37 +01:00			`person.save()`
[svn] Julian playing with the logbooks and expoyears Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8180 by julian @ 1/18/2009 3:59 PM 2009-05-13 05:35:59 +01:00
enabled mugshots & blurb in people pages 2021-04-15 17:51:01 +01:00			`def load_people_expos():`
			`'''This is where the folk.csv file is parsed to read people's names.`
			`Which it gets wrong for people like Lydia-Clare Leather and various 'von' and 'de' middle 'names'`
			`and McLean and Mclean and McAdam - interaction with the url parser in urls.py too`
			`'''`
			`DataIssue.objects.filter(parser='people').delete()`
[svn] Julian playing with the logbooks and expoyears Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8180 by julian @ 1/18/2009 3:59 PM 2009-05-13 05:35:59 +01:00
enabled mugshots & blurb in people pages 2021-04-15 17:51:01 +01:00			`persontab = open(os.path.join(settings.EXPOWEB, "folk", "folk.csv")) # should really be EXPOFOLK I guess`
			`personreader = csv.reader(persontab) # this is an iterator`
Convert codebase for python3 usage 2020-05-24 01:57:06 +01:00			`headers = next(personreader)`
			`header = dict(list(zip(headers, list(range(len(headers))))))`
[svn] Julian playing with the logbooks and expoyears Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8180 by julian @ 1/18/2009 3:59 PM 2009-05-13 05:35:59 +01:00
[svn] yorkshire work with tunnel integration Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8232 by julian @ 1/29/2009 11:40 PM 2009-05-13 05:48:47 +01:00			`# make expeditions`
Thorough spring clean and profiling 2020-04-27 23:51:41 +01:00			`print(" - Loading expeditions")`
[svn] Julian playing with the logbooks and expoyears Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8180 by julian @ 1/18/2009 3:59 PM 2009-05-13 05:35:59 +01:00			`years = headers[5:]`
[svn] Photo file handling and mugshots parsing sorted. Made URL settings more relative, less redundant. Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8246 by aaron @ 2/18/2009 6:45 AM 2009-05-13 05:53:37 +01:00
[svn] Julian playing with the logbooks and expoyears Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8180 by julian @ 1/18/2009 3:59 PM 2009-05-13 05:35:59 +01:00			`for year in years:`
[svn] 2009-05-19 06:32:42 +01:00			`lookupAttribs = {'year':year}`
			`nonLookupAttribs = {'name':"CUCC expo %s" % year}`

fixing typos and changes in importing 2021-04-13 01:13:08 +01:00			`save_carefully(Expedition, lookupAttribs, nonLookupAttribs)`
[svn] Julian playing with the logbooks and expoyears Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8180 by julian @ 1/18/2009 3:59 PM 2009-05-13 05:35:59 +01:00
[svn] yorkshire work with tunnel integration Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8232 by julian @ 1/29/2009 11:40 PM 2009-05-13 05:48:47 +01:00			`# make persons`
Thorough spring clean and profiling 2020-04-27 23:51:41 +01:00			`print(" - Loading personexpeditions")`
[svn] Julian playing with the logbooks and expoyears Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8180 by julian @ 1/18/2009 3:59 PM 2009-05-13 05:35:59 +01:00
[svn] yorkshire work with tunnel integration Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8232 by julian @ 1/29/2009 11:40 PM 2009-05-13 05:48:47 +01:00			`for personline in personreader:`
			`name = personline[header["Name"]]`
Update new management command for DB reset Switch to content_type from mimetype Make DB reset not nuke so much Tidy logbook parser 2019-03-30 13:58:38 +00:00			`name = re.sub(r"<.*?>", "", name)`
Support html and wiki logbook entrys Move nearest_station to nearest_station_name and make nearest_station a foreign key to SurvexStation Lots of tidying 2019-03-31 15:39:53 +01:00
Updating caves and entrances is no longer nuclear! Big overhaul of people processing, fullname added to the model lastname is now names -1 unless you only have one (yes you Wookey) this allows for Jon Arne Toft and Wookey to live it the same DB names can now have html chars in them, this should be real unicode but that can only happen when we go to Python 3! 2019-04-19 22:52:54 +01:00			`firstname = ""`
			`nickname = ""`

			`rawlastname = personline[header["Lastname"]].strip()`
			`matchlastname = re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", rawlastname)`
			`lastname = matchlastname.group(1).strip()`

			`splitnick = re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", name)`
			`fullname = splitnick.group(1)`

			`nickname = splitnick.group(2) or ""`

			`fullname = fullname.strip()`
			`names = fullname.split(' ')`
			`firstname = names[0]`
			`if len(names) == 1:`
			`lastname = ""`

Fully working dj 1.11.29 2020-06-19 16:39:05 +01:00			`if personline[header["VfHO member"]] =='':`
			`vfho = False`
			`else:`
			`vfho = True`

Updating caves and entrances is no longer nuclear! Big overhaul of people processing, fullname added to the model lastname is now names -1 unless you only have one (yes you Wookey) this allows for Jon Arne Toft and Wookey to live it the same DB names can now have html chars in them, this should be real unicode but that can only happen when we go to Python 3! 2019-04-19 22:52:54 +01:00			`lookupAttribs={'first_name':firstname, 'last_name':(lastname or "")}`
new report to make aliases visible 2022-10-08 22:17:53 +01:00			`nonLookupAttribs={'is_vfho':vfho, 'fullname':fullname, 'nickname':nickname}`
fixing typos and changes in importing 2021-04-13 01:13:08 +01:00			`person, created = save_carefully(Person, lookupAttribs, nonLookupAttribs)`
Support html and wiki logbook entrys Move nearest_station to nearest_station_name and make nearest_station a foreign key to SurvexStation Lots of tidying 2019-03-31 15:39:53 +01:00
enabled mugshots & blurb in people pages 2021-04-15 17:51:01 +01:00			`parse_blurb(personline=personline, header=header, person=person)`
[svn] Julian playing with the logbooks and expoyears Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8180 by julian @ 1/18/2009 3:59 PM 2009-05-13 05:35:59 +01:00
[svn] yorkshire work with tunnel integration Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8232 by julian @ 1/29/2009 11:40 PM 2009-05-13 05:48:47 +01:00			`# make person expedition from table`
Convert codebase for python3 usage 2020-05-24 01:57:06 +01:00			`for year, attended in list(zip(headers, personline))[5:]:`
fixing typos and changes in importing 2021-04-13 01:13:08 +01:00			`expedition = Expedition.objects.get(year=year)`
[svn] Julian playing with the logbooks and expoyears Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8180 by julian @ 1/18/2009 3:59 PM 2009-05-13 05:35:59 +01:00			`if attended == "1" or attended == "-1":`
[svn] 2009-05-19 06:32:42 +01:00			`lookupAttribs = {'person':person, 'expedition':expedition}`
fix schema and try cache caves import 2020-07-06 20:27:31 +01:00			`nonLookupAttribs = {'nickname':nickname, 'is_guest':(personline[header["Guest"]] == "1")}`
fixing typos and changes in importing 2021-04-13 01:13:08 +01:00			`save_carefully(PersonExpedition, lookupAttribs, nonLookupAttribs)`
Imports in control panel work again 2021-04-27 20:44:24 +01:00			`print("", flush=True)`
[svn] Julian playing with the logbooks and expoyears Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8180 by julian @ 1/18/2009 3:59 PM 2009-05-13 05:35:59 +01:00
new report to make aliases visible 2022-10-08 22:17:53 +01:00			`def who_is_this(year,possibleid):`
			`expo = Expedition.objects.filter(year=year)`
			`personexpedition = GetPersonExpeditionNameLookup(expo)[possibleid.lower()]`
			`if personexpedition:`
			`return personexpedition.person`
			`else:`
			`return None`

small chnages to name resolution 2022-10-07 21:47:05 +01:00			`# Refactor. The dict GetPersonExpeditionNameLookup(expo) indexes by name and has values of personexpedition`
			`# This is convoluted, the whole personexpedition concept is unnecessary.`

[svn] survey block object Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8199 by julian @ 1/19/2009 12:22 AM 2009-05-13 05:39:52 +01:00			`Gpersonexpeditionnamelookup = { }`
			`def GetPersonExpeditionNameLookup(expedition):`
			`global Gpersonexpeditionnamelookup`
Widen the recognizer capabilities for names 2022-10-09 00:29:53 +01:00
			`def apply_variations(f, l):`
			`f = f.lower()`
			`l = l.lower()`
			`variations = []`
			`variations.append(f)`
			`variations.append(l)`
More fixes to name resolution checking 2022-10-09 21:50:32 +01:00			`variations.append(f + l)`
Widen the recognizer capabilities for names 2022-10-09 00:29:53 +01:00			`variations.append(f + " " + l)`
			`variations.append(f + " " + l[0])`
			`variations.append(f + l[0])`
			`variations.append(f[0] + " " + l)`
			`variations.append(f[0] + l)`
			`variations.append(f[0] + l[0]) # initials e.g. gb or bl`
			`return variations`

[svn] survey block object Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8199 by julian @ 1/19/2009 12:22 AM 2009-05-13 05:39:52 +01:00			`res = Gpersonexpeditionnamelookup.get(expedition.name)`
Widen the recognizer capabilities for names 2022-10-09 00:29:53 +01:00
[svn] survey block object Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8199 by julian @ 1/19/2009 12:22 AM 2009-05-13 05:39:52 +01:00			`if res:`
			`return res`

[svn] yorkshire work with tunnel integration Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8232 by julian @ 1/29/2009 11:40 PM 2009-05-13 05:48:47 +01:00			`res = { }`
[svn] survey block object Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8199 by julian @ 1/19/2009 12:22 AM 2009-05-13 05:39:52 +01:00			`duplicates = set()`

add mysql startup documentation 2020-05-14 19:37:46 +01:00			`#print("Calculating GetPersonExpeditionNameLookup for " + expedition.year)`
fixing typos and changes in importing 2021-04-13 01:13:08 +01:00			`personexpeditions = PersonExpedition.objects.filter(expedition=expedition)`
abbrv. names now accepted when parsing logbooks, survex 2022-10-07 23:52:10 +01:00			`short = {}`
			`dellist = []`
[svn] survey block object Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8199 by julian @ 1/19/2009 12:22 AM 2009-05-13 05:39:52 +01:00			`for personexpedition in personexpeditions:`
			`possnames = [ ]`
WORKING both py3.9.10 & 3.8.10 (dj2.2.25) 2022-03-02 23:19:48 +00:00			`f = unidecode(unescape(personexpedition.person.first_name.lower()))`
			`l = unidecode(unescape(personexpedition.person.last_name.lower()))`
			`full = unidecode(unescape(personexpedition.person.fullname.lower()))`
Widen the recognizer capabilities for names 2022-10-09 00:29:53 +01:00			`n = unidecode(unescape(personexpedition.nickname.lower()))`
Updating caves and entrances is no longer nuclear! Big overhaul of people processing, fullname added to the model lastname is now names -1 unless you only have one (yes you Wookey) this allows for Jon Arne Toft and Wookey to live it the same DB names can now have html chars in them, this should be real unicode but that can only happen when we go to Python 3! 2019-04-19 22:52:54 +01:00			`if full not in possnames:`
			`possnames.append(full)`
Widen the recognizer capabilities for names 2022-10-09 00:29:53 +01:00			`if n not in possnames:`
			`possnames.append(n)`
[svn] survey block object Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8199 by julian @ 1/19/2009 12:22 AM 2009-05-13 05:39:52 +01:00
Widen the recognizer capabilities for names 2022-10-09 00:29:53 +01:00			`if l:`
			`possnames += apply_variations(f,l)`

			`if n:`
			`possnames += apply_variations(n, l)`
More fixes to name resolution checking 2022-10-09 21:50:32 +01:00
Widen the recognizer capabilities for names 2022-10-09 00:29:53 +01:00			`if f == "Robert".lower():`
			`possnames += apply_variations("Bob", l)`
			`if f == "Andrew".lower():`
			`possnames += apply_variations("Andy", l)`
			`if f == "Andy".lower():`
			`possnames += apply_variations("Andrew", l)`
			`if f == "Michael".lower():`
			`possnames += apply_variations("Mike", l)`
			`if f == "David".lower():`
			`possnames += apply_variations("Dave", l)`
More fixes to name resolution checking 2022-10-09 21:50:32 +01:00			`if f == "Dave".lower():`
			`possnames += apply_variations("David", l)`
Widen the recognizer capabilities for names 2022-10-09 00:29:53 +01:00			`if f == "Peter".lower():`
			`possnames += apply_variations("Pete", l)`
More fixes to name resolution checking 2022-10-09 21:50:32 +01:00			`if f == "Pete".lower():`
			`possnames += apply_variations("Peter", l)`
			`if f == "Olly".lower():`
			`possnames += apply_variations("Oliver", l)`
			`if f == "Oliver".lower():`
			`possnames += apply_variations("Olly", l)`

Widen the recognizer capabilities for names 2022-10-09 00:29:53 +01:00			`if f == "Becka".lower():`
			`possnames += apply_variations("Rebecca", l)`

			`if f'{f} {l}' == "Andy Waddington".lower():`
More fixes to name resolution checking 2022-10-09 21:50:32 +01:00			`possnames += apply_variations("aer", "waddington")`
Widen the recognizer capabilities for names 2022-10-09 00:29:53 +01:00			`if f'{f} {l}' == "Phil Underwood".lower():`
More fixes to name resolution checking 2022-10-09 21:50:32 +01:00			`possnames += apply_variations("phil", "underpants")`
Widen the recognizer capabilities for names 2022-10-09 00:29:53 +01:00			`if f'{f} {l}' == "Naomi Griffiths".lower():`
More fixes to name resolution checking 2022-10-09 21:50:32 +01:00			`possnames += apply_variations("naomi", "makins")`
			`if f'{f} {l}' == "Tina White".lower():`
			`possnames += apply_variations("tina", "richardson")`
Widen the recognizer capabilities for names 2022-10-09 00:29:53 +01:00			`if f'{f} {l}' == "Cat Hulse".lower():`
More fixes to name resolution checking 2022-10-09 21:50:32 +01:00			`possnames += apply_variations("catherine", "hulse")`
			`possnames += apply_variations("cat", "henry")`
Widen the recognizer capabilities for names 2022-10-09 00:29:53 +01:00			`if f'{f} {l}' == "Jess Stirrups".lower():`
More fixes to name resolution checking 2022-10-09 21:50:32 +01:00			`possnames += apply_variations("jessica", "stirrups")`
Widen the recognizer capabilities for names 2022-10-09 00:29:53 +01:00			`if f'{f} {l}' == "Nat Dalton".lower():`
More fixes to name resolution checking 2022-10-09 21:50:32 +01:00			`possnames += apply_variations("nathanael", "dalton") # correct. He has a weird spelling.`
Widen the recognizer capabilities for names 2022-10-09 00:29:53 +01:00			`if f'{f} {l}' == "Mike Richardson".lower():`
More fixes to name resolution checking 2022-10-09 21:50:32 +01:00			`possnames.append("mta")`
			`possnames.append("miketa")`
			`possnames.append("mike the animal")`
			`possnames.append("animal")`
Widen the recognizer capabilities for names 2022-10-09 00:29:53 +01:00			`if f'{f} {l}' == "Eric Landgraf".lower():`
More fixes to name resolution checking 2022-10-09 21:50:32 +01:00			`possnames.append("eric c.landgraf")`
			`possnames.append("eric c. landgraf")`
			`possnames.append("eric c landgraf")`
			`if f'{f} {l}' == "Nadia Raeburn".lower():`
			`possnames.append("nadia rc")`
			`possnames.append("nadia raeburn-cherradi")`

			`for i in [3, 4, 5, 6]:`
Widen the recognizer capabilities for names 2022-10-09 00:29:53 +01:00			`lim = min(i, len(f)+1) # short form, e.g. Dan for Daniel.`
			`if f[:lim] not in short:`
			`short[f[:lim]]= personexpedition`
			`else:`
			`dellist.append(f[:lim])`

			`possnames = set(possnames) # remove duplicates`
[svn] survey block object Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8199 by julian @ 1/19/2009 12:22 AM 2009-05-13 05:39:52 +01:00			`for possname in possnames:`
			`if possname in res:`
			`duplicates.add(possname)`
			`else:`
			`res[possname] = personexpedition`

			`for possname in duplicates:`
			`del res[possname]`
abbrv. names now accepted when parsing logbooks, survex 2022-10-07 23:52:10 +01:00
			`for possname in dellist:`
			`if possname in short: #always true ?`
			`del short[possname]`
			`for shortname in short:`
			`res[shortname] = short[shortname]`
Widen the recognizer capabilities for names 2022-10-09 00:29:53 +01:00
[svn] survey block object Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8199 by julian @ 1/19/2009 12:22 AM 2009-05-13 05:39:52 +01:00
			`Gpersonexpeditionnamelookup[expedition.name] = res`
			`return res`