enabled mugshots & blurb in people pages

This commit is contained in:
Philip Sargent
2021-04-15 17:51:01 +01:00
parent 7124d978d3
commit 27491c933a
8 changed files with 86 additions and 57 deletions

View File

@@ -27,7 +27,7 @@ def import_people():
print("-- Importing People (folk.csv) to ",end="")
print(django.db.connections.databases['default']['NAME'])
with transaction.atomic():
troggle.parsers.people.LoadPersonsExpos()
troggle.parsers.people.load_people_expos()
def import_surveyscans():
print("-- Importing Survey Scans")

View File

@@ -1,11 +1,13 @@
import csv, re, datetime, os, shutil
from html.parser import HTMLParser
from unidecode import unidecode
from pathlib import Path
from django.conf import settings
from troggle.core.models.troggle import Expedition, Person, PersonExpedition
from troggle.core.utils import save_carefully
from troggle.core.models.troggle import DataIssue
from troggle.core.utils import save_carefully, TROG
'''These functions do not match how the stand-alone folk script works. So the script produces an HTML file which has
href links to pages in troggle which troggle does not think are right.
@@ -13,32 +15,59 @@ The standalone script needs to be renedred defucnt, and all the parsing needs to
or they should use the same code by importing a module.
'''
def parseMugShotAndBlurb(personline, header, person):
def parse_blurb(personline, header, person):
"""create mugshot Photo instance"""
mugShotFilename=personline[header["Mugshot"]]
mugShotPath = os.path.join(settings.EXPOWEB, "folk", mugShotFilename)
if mugShotPath[-3:]=='jpg': #if person just has an image, add it
#saveMugShot(mugShotPath=mugShotPath, mugShotFilename=mugShotFilename, person=person)
ms_filename = personline[header["Mugshot"]]
ms_path = Path(settings.EXPOWEB, "folk", ms_filename)
if ms_filename:
if not ms_path.is_file():
message = f"! INVALID mug_shot field '{ms_filename}' for {person.fullname}"
print(message)
DataIssue.objects.create(parser='people', message=message, url=f"/person/{person.fullname}")
return
if ms_filename.startswith('i/'):
#if person just has an image, add it. It has format 'i/adama2018.jpg'
person.mug_shot = str(Path("/folk", ms_filename))
person.blurb = None
elif ms_filename.startswith('l/'):
# it has the format 'l/ollybetts.htm' the file may contain <img src="../i/mymug.jpg"> images
with open(ms_path,'r') as blurbfile:
blrb = blurbfile.read()
pblurb=re.search(r'<body>.*<hr',blrb,re.DOTALL)
if pblurb:
person.mug_shot = None
fragment= re.search('<body>(.*)<hr',blrb,re.DOTALL).group(1)
fragment = fragment.replace('src="../i/', 'src="/folk/i/')
fragment = fragment.replace("src='../i/", "src='/folk/i/")
fragment = re.sub(r'<h.*>[^<]*</h.>', '', fragment)
# replace src="../i/ with src="/folk/i
person.blurb = fragment
else:
message = f"! Blurb parse error in {ms_filename}"
print(message)
DataIssue.objects.create(parser='people', message=message, url="/folk/")
elif ms_filename == '':
pass
elif mugShotPath[-3:]=='htm': #if person has an html page, find the image(s) and add it. Also, add the text from the html page to the "blurb" field in his model instance.
personPageOld=open(mugShotPath,'r').read()
if not person.blurb:
pblurb=re.search('<body>.*<hr',personPageOld,re.DOTALL)
if pblurb:
#this needs to be refined, take care of the HTML and make sure it doesn't match beyond the blurb.
#Only finds the first image, not all of them
person.blurb=re.search('<body>.*<hr',personPageOld,re.DOTALL).group()
else:
print("ERROR: --------------- Broken link or Blurb parse error in ", mugShotFilename)
#for mugShotFilename in re.findall('i/.*?jpg',personPageOld,re.DOTALL):
# mugShotPath = os.path.join(settings.EXPOWEB, "folk", mugShotFilename)
# saveMugShot(mugShotPath=mugShotPath, mugShotFilename=mugShotFilename, person=person)
else:
message = f"! Unrecognised type of file at mug_shot field '{ms_filename}' for {person.fullname}"
print(message)
DataIssue.objects.create(parser='people', message=message, url="/folk/")
person.save()
def LoadPersonsExpos():
def load_people_expos():
'''This is where the folk.csv file is parsed to read people's names.
Which it gets wrong for people like Lydia-Clare Leather and various 'von' and 'de' middle 'names'
and McLean and Mclean and McAdam - interaction with the url parser in urls.py too
'''
DataIssue.objects.filter(parser='people').delete()
persontab = open(os.path.join(settings.EXPOWEB, "folk", "folk.csv"))
personreader = csv.reader(persontab)
persontab = open(os.path.join(settings.EXPOWEB, "folk", "folk.csv")) # should really be EXPOFOLK I guess
personreader = csv.reader(persontab) # this is an iterator
headers = next(personreader)
header = dict(list(zip(headers, list(range(len(headers))))))
@@ -86,7 +115,7 @@ def LoadPersonsExpos():
nonLookupAttribs={'is_vfho':vfho, 'fullname':fullname}
person, created = save_carefully(Person, lookupAttribs, nonLookupAttribs)
parseMugShotAndBlurb(personline=personline, header=header, person=person)
parse_blurb(personline=personline, header=header, person=person)
# make person expedition from table
for year, attended in list(zip(headers, personline))[5:]:

View File

@@ -576,6 +576,7 @@ class LoadingSurvex():
return self.caveslist[cavepath.lower()]
# TO DO - some of this is already done in generating self.caveslist so simplify this
# esp. as it is in a loop.
# TO DO recognise cave if different name, e.g. gruenstein == 281
path_match = self.rx_cave.search(cavepath)
if path_match:
sluggy = '{}-{}'.format(path_match.group(1), path_match.group(2))
@@ -608,9 +609,15 @@ class LoadingSurvex():
"""Ignore surface, kataser and gps *include survex files
"""
if headpath in self.ignorenoncave:
#message = f" - {headpath} is <ignorenoncave> (while creating '{includelabel}' sfile & sdirectory)"
#print("\n"+message)
#print("\n"+message,file=sys.stderr)
return
for i in self.ignoreprefix:
if headpath.startswith(i):
#message = f" - {headpath} starts with <ignoreprefix> (while creating '{includelabel}' sfile & sdirectory)"
#print("\n"+message)
#print("\n"+message,file=sys.stderr)
return
message = " ! {} is not a cave. (while creating '{}' sfile & sdirectory)".format(headpath, includelabel)
print("\n"+message)