2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2024-11-25 16:51:54 +00:00

Updating caves and entrances is no longer nuclear!

Big overhaul of people processing, fullname added to the model
lastname is now names -1 unless you only have one (yes you Wookey)
this allows for Jon Arne Toft and Wookey to live it the same DB
names can now have html chars in them, this should be real unicode but that can
only happen when we go to Python 3!
This commit is contained in:
Sam Wenham 2019-04-19 22:52:54 +01:00
parent 2f9870644b
commit 768ec83037
5 changed files with 55 additions and 34 deletions

View File

@ -57,7 +57,7 @@ class TroggleModel(models.Model):
class Meta: class Meta:
abstract = True abstract = True
class TroggleImageModel(ImageModel): class TroggleImageModel(models.Model):
new_since_parsing = models.BooleanField(default=False, editable=False) new_since_parsing = models.BooleanField(default=False, editable=False)
def object_name(self): def object_name(self):
@ -123,16 +123,13 @@ class ExpeditionDay(TroggleModel):
class Person(TroggleModel): class Person(TroggleModel):
first_name = models.CharField(max_length=100) first_name = models.CharField(max_length=100)
last_name = models.CharField(max_length=100) last_name = models.CharField(max_length=100)
fullname = models.CharField(max_length=200)
is_vfho = models.BooleanField(help_text="VFHO is the Vereines für Höhlenkunde in Obersteier, a nearby Austrian caving club.", default=False) is_vfho = models.BooleanField(help_text="VFHO is the Vereines für Höhlenkunde in Obersteier, a nearby Austrian caving club.", default=False)
mug_shot = models.CharField(max_length=100, blank=True,null=True) mug_shot = models.CharField(max_length=100, blank=True,null=True)
blurb = models.TextField(blank=True,null=True) blurb = models.TextField(blank=True,null=True)
#href = models.CharField(max_length=200) #href = models.CharField(max_length=200)
orderref = models.CharField(max_length=200) # for alphabetic orderref = models.CharField(max_length=200) # for alphabetic
#the below have been removed and made methods. I'm not sure what the b in bisnotable stands for. - AC 16 Feb
#notability = models.FloatField() # for listing the top 20 people
#bisnotable = models.BooleanField(default=False)
user = models.OneToOneField(User, null=True, blank=True) user = models.OneToOneField(User, null=True, blank=True)
def get_absolute_url(self): def get_absolute_url(self):
return urlparse.urljoin(settings.URL_ROOT,reverse('person',kwargs={'first_name':self.first_name,'last_name':self.last_name})) return urlparse.urljoin(settings.URL_ROOT,reverse('person',kwargs={'first_name':self.first_name,'last_name':self.last_name}))

View File

@ -42,7 +42,7 @@ def make_dirs():
def import_caves(): def import_caves():
import parsers.caves import parsers.caves
print("importing caves") print("Importing Caves")
parsers.caves.readcaves() parsers.caves.readcaves()
def import_people(): def import_people():
@ -195,9 +195,6 @@ if __name__ == "__main__":
elif "scans" in sys.argv: elif "scans" in sys.argv:
import_surveyscans() import_surveyscans()
elif "caves" in sys.argv: elif "caves" in sys.argv:
# reload_db()
# make_dirs()
# pageredirects()
import_caves() import_caves()
elif "people" in sys.argv: elif "people" in sys.argv:
import_people() import_people()

View File

@ -6,16 +6,18 @@ import re
def readcaves(): def readcaves():
area_1623 = models.Area(short_name = "1623", parent = None)
area_1623.save() # Clear the cave data issues as we are reloading
area_1626 = models.Area(short_name = "1626", parent = None) models.DataIssue.objects.filter(parser='caves').delete()
area_1626.save()
print("Reading Entrances") area_1623 = models.Area.objects.update_or_create(short_name = "1623", parent = None)
area_1626 = models.Area.objects.update_or_create(short_name = "1626", parent = None)
print(" - Reading Entrances")
#print "list of <Slug> <Filename>" #print "list of <Slug> <Filename>"
for filename in os.walk(settings.ENTRANCEDESCRIPTIONS).next()[2]: #Should be a better way of getting a list of files for filename in os.walk(settings.ENTRANCEDESCRIPTIONS).next()[2]: #Should be a better way of getting a list of files
if filename.endswith('.html'): if filename.endswith('.html'):
readentrance(filename) readentrance(filename)
print ("Reading Caves") print (" - Reading Caves")
for filename in os.walk(settings.CAVEDESCRIPTIONS).next()[2]: #Should be a better way of getting a list of files for filename in os.walk(settings.CAVEDESCRIPTIONS).next()[2]: #Should be a better way of getting a list of files
if filename.endswith('.html'): if filename.endswith('.html'):
readcave(filename) readcave(filename)
@ -51,7 +53,7 @@ def readentrance(filename):
bearings = getXML(entrancecontents, "bearings", maxItems = 1, context = context) bearings = getXML(entrancecontents, "bearings", maxItems = 1, context = context)
url = getXML(entrancecontents, "url", maxItems = 1, context = context) url = getXML(entrancecontents, "url", maxItems = 1, context = context)
if len(non_public) == 1 and len(slugs) >= 1 and len(name) >= 1 and len(entrance_description) == 1 and len(explorers) == 1 and len(map_description) == 1 and len(location_description) == 1 and len(approach) == 1 and len(underground_description) == 1 and len(marking) == 1 and len(marking_comment) == 1 and len(findability) == 1 and len(findability_description) == 1 and len(alt) == 1 and len(northing) == 1 and len(easting) == 1 and len(tag_station) == 1 and len(exact_station) == 1 and len(other_station) == 1 and len(other_description) == 1 and len(bearings) == 1 and len(url) == 1: if len(non_public) == 1 and len(slugs) >= 1 and len(name) >= 1 and len(entrance_description) == 1 and len(explorers) == 1 and len(map_description) == 1 and len(location_description) == 1 and len(approach) == 1 and len(underground_description) == 1 and len(marking) == 1 and len(marking_comment) == 1 and len(findability) == 1 and len(findability_description) == 1 and len(alt) == 1 and len(northing) == 1 and len(easting) == 1 and len(tag_station) == 1 and len(exact_station) == 1 and len(other_station) == 1 and len(other_description) == 1 and len(bearings) == 1 and len(url) == 1:
e = models.Entrance(name = name[0], e, state = models.Entrance.objects.update_or_create(name = name[0],
non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]], non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]],
entrance_description = entrance_description[0], entrance_description = entrance_description[0],
explorers = explorers[0], explorers = explorers[0],
@ -75,14 +77,12 @@ def readentrance(filename):
url = url[0], url = url[0],
filename = filename, filename = filename,
cached_primary_slug = slugs[0]) cached_primary_slug = slugs[0])
e.save()
primary = True primary = True
for slug in slugs: for slug in slugs:
#print slug, filename #print slug, filename
cs = models.EntranceSlug(entrance = e, cs = models.EntranceSlug.objects.update_or_create(entrance = e,
slug = slug, slug = slug,
primary = primary) primary = primary)
cs.save()
primary = False primary = False
def readcave(filename): def readcave(filename):
@ -117,7 +117,7 @@ def readcave(filename):
url = getXML(cavecontents, "url", maxItems = 1, context = context) url = getXML(cavecontents, "url", maxItems = 1, context = context)
entrances = getXML(cavecontents, "entrance", context = context) entrances = getXML(cavecontents, "entrance", context = context)
if len(non_public) == 1 and len(slugs) >= 1 and len(official_name) == 1 and len(areas) >= 1 and len(kataster_code) == 1 and len(kataster_number) == 1 and len(unofficial_number) == 1 and len(explorers) == 1 and len(underground_description) == 1 and len(equipment) == 1 and len(references) == 1 and len(survey) == 1 and len(kataster_status) == 1 and len(underground_centre_line) == 1 and len(notes) == 1 and len(length) == 1 and len(depth) == 1 and len(extent) == 1 and len(survex_file) == 1 and len(description_file ) == 1 and len(url) == 1 and len(entrances) >= 1: if len(non_public) == 1 and len(slugs) >= 1 and len(official_name) == 1 and len(areas) >= 1 and len(kataster_code) == 1 and len(kataster_number) == 1 and len(unofficial_number) == 1 and len(explorers) == 1 and len(underground_description) == 1 and len(equipment) == 1 and len(references) == 1 and len(survey) == 1 and len(kataster_status) == 1 and len(underground_centre_line) == 1 and len(notes) == 1 and len(length) == 1 and len(depth) == 1 and len(extent) == 1 and len(survex_file) == 1 and len(description_file ) == 1 and len(url) == 1 and len(entrances) >= 1:
c = models.Cave(non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]], c, state = models.Cave.objects.update_or_create(non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]],
official_name = official_name[0], official_name = official_name[0],
kataster_code = kataster_code[0], kataster_code = kataster_code[0],
kataster_number = kataster_number[0], kataster_number = kataster_number[0],
@ -137,7 +137,6 @@ def readcave(filename):
description_file = description_file[0], description_file = description_file[0],
url = url[0], url = url[0],
filename = filename) filename = filename)
c.save()
for area_slug in areas: for area_slug in areas:
area = models.Area.objects.filter(short_name = area_slug) area = models.Area.objects.filter(short_name = area_slug)
if area: if area:
@ -149,12 +148,13 @@ def readcave(filename):
primary = True primary = True
for slug in slugs: for slug in slugs:
try: try:
cs = models.CaveSlug(cave = c, cs = models.CaveSlug.objects.update_or_create(cave = c,
slug = slug, slug = slug,
primary = primary) primary = primary)
cs.save()
except: except:
print("Can't find text (slug): %s, skipping %s" % (slug, context)) message = "Can't find text (slug): %s, skipping %s" % (slug, context)
models.DataIssue.objects.create(parser='caves', message=message)
print(message)
primary = False primary = False
for entrance in entrances: for entrance in entrances:
@ -162,10 +162,11 @@ def readcave(filename):
letter = getXML(entrance, "letter", maxItems = 1, context = context)[0] letter = getXML(entrance, "letter", maxItems = 1, context = context)[0]
try: try:
entrance = models.Entrance.objects.get(entranceslug__slug = slug) entrance = models.Entrance.objects.get(entranceslug__slug = slug)
ce = models.CaveAndEntrance(cave = c, entrance_letter = letter, entrance = entrance) ce = models.CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance)
ce.save()
except: except:
print ("Entrance text (slug) %s missing %s" % (slug, context)) message = "Entrance text (slug) %s missing %s" % (slug, context)
models.DataIssue.objects.create(parser='caves', message=message)
print(message)
def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, context = ""): def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, context = ""):

View File

@ -65,11 +65,27 @@ def LoadPersonsExpos():
for personline in personreader: for personline in personreader:
name = personline[header["Name"]] name = personline[header["Name"]]
name = re.sub(r"<.*?>", "", name) name = re.sub(r"<.*?>", "", name)
mname = re.match(r"(\w+)(?:\s((?:van |ten )?\w+))?(?:\s\(([^)]*)\))?", name)
nickname = mname.group(3) or ""
lookupAttribs={'first_name':mname.group(1), 'last_name':(mname.group(2) or "")} firstname = ""
nonLookupAttribs={'is_vfho':personline[header["VfHO member"]],} nickname = ""
rawlastname = personline[header["Lastname"]].strip()
matchlastname = re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", rawlastname)
lastname = matchlastname.group(1).strip()
splitnick = re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", name)
fullname = splitnick.group(1)
nickname = splitnick.group(2) or ""
fullname = fullname.strip()
names = fullname.split(' ')
firstname = names[0]
if len(names) == 1:
lastname = ""
lookupAttribs={'first_name':firstname, 'last_name':(lastname or "")}
nonLookupAttribs={'is_vfho':personline[header["VfHO member"]], 'fullname':fullname}
person, created = save_carefully(models.Person, lookupAttribs, nonLookupAttribs) person, created = save_carefully(models.Person, lookupAttribs, nonLookupAttribs)
parseMugShotAndBlurb(personline=personline, header=header, person=person) parseMugShotAndBlurb(personline=personline, header=header, person=person)
@ -120,14 +136,24 @@ def GetPersonExpeditionNameLookup(expedition):
possnames = [ ] possnames = [ ]
f = personexpedition.person.first_name.lower() f = personexpedition.person.first_name.lower()
l = personexpedition.person.last_name.lower() l = personexpedition.person.last_name.lower()
full = personexpedition.person.fullname.lower()
if l: if l:
possnames.append(f + " " + l) possnames.append(f + " " + l)
possnames.append(f + " " + l[0]) possnames.append(f + " " + l[0])
possnames.append(f + l[0]) possnames.append(f + l[0])
possnames.append(f[0] + " " + l) possnames.append(f[0] + " " + l)
possnames.append(f) possnames.append(f)
if personexpedition.nickname: if full not in possnames:
possnames.append(full)
if personexpedition.nickname not in possnames:
possnames.append(personexpedition.nickname.lower()) possnames.append(personexpedition.nickname.lower())
if l:
# This allows for nickname to be used for short name eg Phil
# adding Phil Sargent to the list
if str(personexpedition.nickname.lower() + " " + l) not in possnames:
possnames.append(personexpedition.nickname.lower() + " " + l)
if str(personexpedition.nickname.lower() + " " + l[0]) not in possnames:
possnames.append(personexpedition.nickname.lower() + " " + l[0])
for possname in possnames: for possname in possnames:
if possname in res: if possname in res:

View File

@ -35,12 +35,12 @@ actualurlpatterns = patterns('',
url(r'^newqmnumber/?$', views_other.ajax_QM_number, ), url(r'^newqmnumber/?$', views_other.ajax_QM_number, ),
url(r'^lbo_suggestions/?$', logbook_entry_suggestions), url(r'^lbo_suggestions/?$', logbook_entry_suggestions),
#(r'^person/(?P<person_id>\d*)/?$', views_logbooks.person), #(r'^person/(?P<person_id>\d*)/?$', views_logbooks.person),
url(r'^person/(?P<first_name>[A-Z]*[a-z\-\']*)[^a-zA-Z]*(?P<last_name>[a-z\-\']*[^a-zA-Z]*[A-Z]*[a-z\-]*)/?', views_logbooks.person, name="person"), url(r'^person/(?P<first_name>[A-Z]*[a-z\-\'&;]*)[^a-zA-Z]*(?P<last_name>[a-z\-\']*[^a-zA-Z]*[A-Z]*[a-z\-&;]*)/?', views_logbooks.person, name="person"),
#url(r'^person/(\w+_\w+)$', views_logbooks.person, name="person"), #url(r'^person/(\w+_\w+)$', views_logbooks.person, name="person"),
url(r'^expedition/(\d+)$', views_logbooks.expedition, name="expedition"), url(r'^expedition/(\d+)$', views_logbooks.expedition, name="expedition"),
url(r'^expeditions/?$', views_logbooks.ExpeditionListView.as_view(), name="expeditions"), url(r'^expeditions/?$', views_logbooks.ExpeditionListView.as_view(), name="expeditions"),
url(r'^personexpedition/(?P<first_name>[A-Z]*[a-z]*)[^a-zA-Z]*(?P<last_name>[A-Z]*[a-zA-Z]*)/(?P<year>\d+)/?$', views_logbooks.personexpedition, name="personexpedition"), url(r'^personexpedition/(?P<first_name>[A-Z]*[a-z&;]*)[^a-zA-Z]*(?P<last_name>[A-Z]*[a-zA-Z&;]*)/(?P<year>\d+)/?$', views_logbooks.personexpedition, name="personexpedition"),
url(r'^logbookentry/(?P<date>.*)/(?P<slug>.*)/?$', views_logbooks.logbookentry,name="logbookentry"), url(r'^logbookentry/(?P<date>.*)/(?P<slug>.*)/?$', views_logbooks.logbookentry,name="logbookentry"),
url(r'^newlogbookentry/(?P<expeditionyear>.*)$', views_logbooks.newLogbookEntry, name="newLogBookEntry"), url(r'^newlogbookentry/(?P<expeditionyear>.*)$', views_logbooks.newLogbookEntry, name="newLogBookEntry"),
url(r'^editlogbookentry/(?P<expeditionyear>[^/]*)/(?P<pdate>[^/]*)/(?P<pslug>[^/]*)/$', views_logbooks.newLogbookEntry, name="editLogBookEntry"), url(r'^editlogbookentry/(?P<expeditionyear>[^/]*)/(?P<pdate>[^/]*)/(?P<pslug>[^/]*)/$', views_logbooks.newLogbookEntry, name="editLogBookEntry"),