Validation of mugshot or blrub file added

This commit is contained in:
Philip Sargent 2020-04-01 19:58:31 +01:00
parent 98eb9173ee
commit 53b797fb53

View File

@ -38,7 +38,13 @@ def parseMugShotAndBlurb(personline, header, person):
elif mugShotPath[-3:]=='htm': #if person has an html page, find the image(s) and add it. Also, add the text from the html page to the "blurb" field in his model instance. elif mugShotPath[-3:]=='htm': #if person has an html page, find the image(s) and add it. Also, add the text from the html page to the "blurb" field in his model instance.
personPageOld=open(mugShotPath,'r').read() personPageOld=open(mugShotPath,'r').read()
if not person.blurb: if not person.blurb:
person.blurb=re.search('<body>.*<hr',personPageOld,re.DOTALL).group() #this needs to be refined, take care of the HTML and make sure it doesn't match beyond the blurb pblurb=re.search('<body>.*<hr',personPageOld,re.DOTALL)
if pblurb:
#this needs to be refined, take care of the HTML and make sure it doesn't match beyond the blurb.
#Only finds the first image, not all of them
person.blurb=re.search('<body>.*<hr',personPageOld,re.DOTALL).group()
else:
print "ERROR: --------------- Broken link or Blurb parse error in ", mugShotFilename
for mugShotFilename in re.findall('i/.*?jpg',personPageOld,re.DOTALL): for mugShotFilename in re.findall('i/.*?jpg',personPageOld,re.DOTALL):
mugShotPath = os.path.join(settings.EXPOWEB, "folk", mugShotFilename) mugShotPath = os.path.join(settings.EXPOWEB, "folk", mugShotFilename)
saveMugShot(mugShotPath=mugShotPath, mugShotFilename=mugShotFilename, person=person) saveMugShot(mugShotPath=mugShotPath, mugShotFilename=mugShotFilename, person=person)