enabled mugshots & blurb in people pages

This commit is contained in:
Philip Sargent
2021-04-15 17:51:01 +01:00
parent 7124d978d3
commit 27491c933a
8 changed files with 86 additions and 57 deletions

View File

@@ -1,11 +1,13 @@
import csv, re, datetime, os, shutil
from html.parser import HTMLParser
from unidecode import unidecode
from pathlib import Path
from django.conf import settings
from troggle.core.models.troggle import Expedition, Person, PersonExpedition
from troggle.core.utils import save_carefully
from troggle.core.models.troggle import DataIssue
from troggle.core.utils import save_carefully, TROG
'''These functions do not match how the stand-alone folk script works. So the script produces an HTML file which has
href links to pages in troggle which troggle does not think are right.
@@ -13,32 +15,59 @@ The standalone script needs to be renedred defucnt, and all the parsing needs to
or they should use the same code by importing a module.
'''
def parseMugShotAndBlurb(personline, header, person):
def parse_blurb(personline, header, person):
"""create mugshot Photo instance"""
mugShotFilename=personline[header["Mugshot"]]
mugShotPath = os.path.join(settings.EXPOWEB, "folk", mugShotFilename)
if mugShotPath[-3:]=='jpg': #if person just has an image, add it
#saveMugShot(mugShotPath=mugShotPath, mugShotFilename=mugShotFilename, person=person)
ms_filename = personline[header["Mugshot"]]
ms_path = Path(settings.EXPOWEB, "folk", ms_filename)
if ms_filename:
if not ms_path.is_file():
message = f"! INVALID mug_shot field '{ms_filename}' for {person.fullname}"
print(message)
DataIssue.objects.create(parser='people', message=message, url=f"/person/{person.fullname}")
return
if ms_filename.startswith('i/'):
#if person just has an image, add it. It has format 'i/adama2018.jpg'
person.mug_shot = str(Path("/folk", ms_filename))
person.blurb = None
elif ms_filename.startswith('l/'):
# it has the format 'l/ollybetts.htm' the file may contain <img src="../i/mymug.jpg"> images
with open(ms_path,'r') as blurbfile:
blrb = blurbfile.read()
pblurb=re.search(r'<body>.*<hr',blrb,re.DOTALL)
if pblurb:
person.mug_shot = None
fragment= re.search('<body>(.*)<hr',blrb,re.DOTALL).group(1)
fragment = fragment.replace('src="../i/', 'src="/folk/i/')
fragment = fragment.replace("src='../i/", "src='/folk/i/")
fragment = re.sub(r'<h.*>[^<]*</h.>', '', fragment)
# replace src="../i/ with src="/folk/i
person.blurb = fragment
else:
message = f"! Blurb parse error in {ms_filename}"
print(message)
DataIssue.objects.create(parser='people', message=message, url="/folk/")
elif ms_filename == '':
pass
elif mugShotPath[-3:]=='htm': #if person has an html page, find the image(s) and add it. Also, add the text from the html page to the "blurb" field in his model instance.
personPageOld=open(mugShotPath,'r').read()
if not person.blurb:
pblurb=re.search('<body>.*<hr',personPageOld,re.DOTALL)
if pblurb:
#this needs to be refined, take care of the HTML and make sure it doesn't match beyond the blurb.
#Only finds the first image, not all of them
person.blurb=re.search('<body>.*<hr',personPageOld,re.DOTALL).group()
else:
print("ERROR: --------------- Broken link or Blurb parse error in ", mugShotFilename)
#for mugShotFilename in re.findall('i/.*?jpg',personPageOld,re.DOTALL):
# mugShotPath = os.path.join(settings.EXPOWEB, "folk", mugShotFilename)
# saveMugShot(mugShotPath=mugShotPath, mugShotFilename=mugShotFilename, person=person)
else:
message = f"! Unrecognised type of file at mug_shot field '{ms_filename}' for {person.fullname}"
print(message)
DataIssue.objects.create(parser='people', message=message, url="/folk/")
person.save()
def LoadPersonsExpos():
def load_people_expos():
'''This is where the folk.csv file is parsed to read people's names.
Which it gets wrong for people like Lydia-Clare Leather and various 'von' and 'de' middle 'names'
and McLean and Mclean and McAdam - interaction with the url parser in urls.py too
'''
DataIssue.objects.filter(parser='people').delete()
persontab = open(os.path.join(settings.EXPOWEB, "folk", "folk.csv"))
personreader = csv.reader(persontab)
persontab = open(os.path.join(settings.EXPOWEB, "folk", "folk.csv")) # should really be EXPOFOLK I guess
personreader = csv.reader(persontab) # this is an iterator
headers = next(personreader)
header = dict(list(zip(headers, list(range(len(headers))))))
@@ -86,7 +115,7 @@ def LoadPersonsExpos():
nonLookupAttribs={'is_vfho':vfho, 'fullname':fullname}
person, created = save_carefully(Person, lookupAttribs, nonLookupAttribs)
parseMugShotAndBlurb(personline=personline, header=header, person=person)
parse_blurb(personline=personline, header=header, person=person)
# make person expedition from table
for year, attended in list(zip(headers, personline))[5:]: