diff --git a/core/models.py b/core/models.py index 223d447..f65efed 100644 --- a/core/models.py +++ b/core/models.py @@ -57,7 +57,7 @@ class TroggleModel(models.Model): class Meta: abstract = True -class TroggleImageModel(ImageModel): +class TroggleImageModel(models.Model): new_since_parsing = models.BooleanField(default=False, editable=False) def object_name(self): @@ -123,16 +123,13 @@ class ExpeditionDay(TroggleModel): class Person(TroggleModel): first_name = models.CharField(max_length=100) last_name = models.CharField(max_length=100) + fullname = models.CharField(max_length=200) is_vfho = models.BooleanField(help_text="VFHO is the Vereines für Höhlenkunde in Obersteier, a nearby Austrian caving club.", default=False) mug_shot = models.CharField(max_length=100, blank=True,null=True) blurb = models.TextField(blank=True,null=True) #href = models.CharField(max_length=200) orderref = models.CharField(max_length=200) # for alphabetic - - #the below have been removed and made methods. I'm not sure what the b in bisnotable stands for. - AC 16 Feb - #notability = models.FloatField() # for listing the top 20 people - #bisnotable = models.BooleanField(default=False) user = models.OneToOneField(User, null=True, blank=True) def get_absolute_url(self): return urlparse.urljoin(settings.URL_ROOT,reverse('person',kwargs={'first_name':self.first_name,'last_name':self.last_name})) diff --git a/databaseReset.py b/databaseReset.py index 7a5d0fa..fd9b83a 100644 --- a/databaseReset.py +++ b/databaseReset.py @@ -42,7 +42,7 @@ def make_dirs(): def import_caves(): import parsers.caves - print("importing caves") + print("Importing Caves") parsers.caves.readcaves() def import_people(): @@ -195,9 +195,6 @@ if __name__ == "__main__": elif "scans" in sys.argv: import_surveyscans() elif "caves" in sys.argv: - # reload_db() - # make_dirs() - # pageredirects() import_caves() elif "people" in sys.argv: import_people() diff --git a/parsers/caves.py b/parsers/caves.py index 2c28365..606007f 100644 --- a/parsers/caves.py +++ b/parsers/caves.py @@ -6,16 +6,18 @@ import re def readcaves(): - area_1623 = models.Area(short_name = "1623", parent = None) - area_1623.save() - area_1626 = models.Area(short_name = "1626", parent = None) - area_1626.save() - print("Reading Entrances") + + # Clear the cave data issues as we are reloading + models.DataIssue.objects.filter(parser='caves').delete() + + area_1623 = models.Area.objects.update_or_create(short_name = "1623", parent = None) + area_1626 = models.Area.objects.update_or_create(short_name = "1626", parent = None) + print(" - Reading Entrances") #print "list of " for filename in os.walk(settings.ENTRANCEDESCRIPTIONS).next()[2]: #Should be a better way of getting a list of files if filename.endswith('.html'): readentrance(filename) - print ("Reading Caves") + print (" - Reading Caves") for filename in os.walk(settings.CAVEDESCRIPTIONS).next()[2]: #Should be a better way of getting a list of files if filename.endswith('.html'): readcave(filename) @@ -51,7 +53,7 @@ def readentrance(filename): bearings = getXML(entrancecontents, "bearings", maxItems = 1, context = context) url = getXML(entrancecontents, "url", maxItems = 1, context = context) if len(non_public) == 1 and len(slugs) >= 1 and len(name) >= 1 and len(entrance_description) == 1 and len(explorers) == 1 and len(map_description) == 1 and len(location_description) == 1 and len(approach) == 1 and len(underground_description) == 1 and len(marking) == 1 and len(marking_comment) == 1 and len(findability) == 1 and len(findability_description) == 1 and len(alt) == 1 and len(northing) == 1 and len(easting) == 1 and len(tag_station) == 1 and len(exact_station) == 1 and len(other_station) == 1 and len(other_description) == 1 and len(bearings) == 1 and len(url) == 1: - e = models.Entrance(name = name[0], + e, state = models.Entrance.objects.update_or_create(name = name[0], non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]], entrance_description = entrance_description[0], explorers = explorers[0], @@ -75,14 +77,12 @@ def readentrance(filename): url = url[0], filename = filename, cached_primary_slug = slugs[0]) - e.save() primary = True for slug in slugs: #print slug, filename - cs = models.EntranceSlug(entrance = e, + cs = models.EntranceSlug.objects.update_or_create(entrance = e, slug = slug, primary = primary) - cs.save() primary = False def readcave(filename): @@ -117,7 +117,7 @@ def readcave(filename): url = getXML(cavecontents, "url", maxItems = 1, context = context) entrances = getXML(cavecontents, "entrance", context = context) if len(non_public) == 1 and len(slugs) >= 1 and len(official_name) == 1 and len(areas) >= 1 and len(kataster_code) == 1 and len(kataster_number) == 1 and len(unofficial_number) == 1 and len(explorers) == 1 and len(underground_description) == 1 and len(equipment) == 1 and len(references) == 1 and len(survey) == 1 and len(kataster_status) == 1 and len(underground_centre_line) == 1 and len(notes) == 1 and len(length) == 1 and len(depth) == 1 and len(extent) == 1 and len(survex_file) == 1 and len(description_file ) == 1 and len(url) == 1 and len(entrances) >= 1: - c = models.Cave(non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]], + c, state = models.Cave.objects.update_or_create(non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]], official_name = official_name[0], kataster_code = kataster_code[0], kataster_number = kataster_number[0], @@ -137,7 +137,6 @@ def readcave(filename): description_file = description_file[0], url = url[0], filename = filename) - c.save() for area_slug in areas: area = models.Area.objects.filter(short_name = area_slug) if area: @@ -149,12 +148,13 @@ def readcave(filename): primary = True for slug in slugs: try: - cs = models.CaveSlug(cave = c, + cs = models.CaveSlug.objects.update_or_create(cave = c, slug = slug, primary = primary) - cs.save() except: - print("Can't find text (slug): %s, skipping %s" % (slug, context)) + message = "Can't find text (slug): %s, skipping %s" % (slug, context) + models.DataIssue.objects.create(parser='caves', message=message) + print(message) primary = False for entrance in entrances: @@ -162,10 +162,11 @@ def readcave(filename): letter = getXML(entrance, "letter", maxItems = 1, context = context)[0] try: entrance = models.Entrance.objects.get(entranceslug__slug = slug) - ce = models.CaveAndEntrance(cave = c, entrance_letter = letter, entrance = entrance) - ce.save() + ce = models.CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance) except: - print ("Entrance text (slug) %s missing %s" % (slug, context)) + message = "Entrance text (slug) %s missing %s" % (slug, context) + models.DataIssue.objects.create(parser='caves', message=message) + print(message) def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, context = ""): diff --git a/parsers/people.py b/parsers/people.py index 4bf84a0..eb877f2 100644 --- a/parsers/people.py +++ b/parsers/people.py @@ -65,11 +65,27 @@ def LoadPersonsExpos(): for personline in personreader: name = personline[header["Name"]] name = re.sub(r"<.*?>", "", name) - mname = re.match(r"(\w+)(?:\s((?:van |ten )?\w+))?(?:\s\(([^)]*)\))?", name) - nickname = mname.group(3) or "" - lookupAttribs={'first_name':mname.group(1), 'last_name':(mname.group(2) or "")} - nonLookupAttribs={'is_vfho':personline[header["VfHO member"]],} + firstname = "" + nickname = "" + + rawlastname = personline[header["Lastname"]].strip() + matchlastname = re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", rawlastname) + lastname = matchlastname.group(1).strip() + + splitnick = re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", name) + fullname = splitnick.group(1) + + nickname = splitnick.group(2) or "" + + fullname = fullname.strip() + names = fullname.split(' ') + firstname = names[0] + if len(names) == 1: + lastname = "" + + lookupAttribs={'first_name':firstname, 'last_name':(lastname or "")} + nonLookupAttribs={'is_vfho':personline[header["VfHO member"]], 'fullname':fullname} person, created = save_carefully(models.Person, lookupAttribs, nonLookupAttribs) parseMugShotAndBlurb(personline=personline, header=header, person=person) @@ -120,14 +136,24 @@ def GetPersonExpeditionNameLookup(expedition): possnames = [ ] f = personexpedition.person.first_name.lower() l = personexpedition.person.last_name.lower() + full = personexpedition.person.fullname.lower() if l: possnames.append(f + " " + l) possnames.append(f + " " + l[0]) possnames.append(f + l[0]) possnames.append(f[0] + " " + l) possnames.append(f) - if personexpedition.nickname: + if full not in possnames: + possnames.append(full) + if personexpedition.nickname not in possnames: possnames.append(personexpedition.nickname.lower()) + if l: + # This allows for nickname to be used for short name eg Phil + # adding Phil Sargent to the list + if str(personexpedition.nickname.lower() + " " + l) not in possnames: + possnames.append(personexpedition.nickname.lower() + " " + l) + if str(personexpedition.nickname.lower() + " " + l[0]) not in possnames: + possnames.append(personexpedition.nickname.lower() + " " + l[0]) for possname in possnames: if possname in res: diff --git a/urls.py b/urls.py index 287755b..2a30faf 100644 --- a/urls.py +++ b/urls.py @@ -35,12 +35,12 @@ actualurlpatterns = patterns('', url(r'^newqmnumber/?$', views_other.ajax_QM_number, ), url(r'^lbo_suggestions/?$', logbook_entry_suggestions), #(r'^person/(?P\d*)/?$', views_logbooks.person), - url(r'^person/(?P[A-Z]*[a-z\-\']*)[^a-zA-Z]*(?P[a-z\-\']*[^a-zA-Z]*[A-Z]*[a-z\-]*)/?', views_logbooks.person, name="person"), + url(r'^person/(?P[A-Z]*[a-z\-\'&;]*)[^a-zA-Z]*(?P[a-z\-\']*[^a-zA-Z]*[A-Z]*[a-z\-&;]*)/?', views_logbooks.person, name="person"), #url(r'^person/(\w+_\w+)$', views_logbooks.person, name="person"), url(r'^expedition/(\d+)$', views_logbooks.expedition, name="expedition"), url(r'^expeditions/?$', views_logbooks.ExpeditionListView.as_view(), name="expeditions"), - url(r'^personexpedition/(?P[A-Z]*[a-z]*)[^a-zA-Z]*(?P[A-Z]*[a-zA-Z]*)/(?P\d+)/?$', views_logbooks.personexpedition, name="personexpedition"), + url(r'^personexpedition/(?P[A-Z]*[a-z&;]*)[^a-zA-Z]*(?P[A-Z]*[a-zA-Z&;]*)/(?P\d+)/?$', views_logbooks.personexpedition, name="personexpedition"), url(r'^logbookentry/(?P.*)/(?P.*)/?$', views_logbooks.logbookentry,name="logbookentry"), url(r'^newlogbookentry/(?P.*)$', views_logbooks.newLogbookEntry, name="newLogBookEntry"), url(r'^editlogbookentry/(?P[^/]*)/(?P[^/]*)/(?P[^/]*)/$', views_logbooks.newLogbookEntry, name="editLogBookEntry"),