From 768ec830377e7c99827812b2ed83cffad9764d50 Mon Sep 17 00:00:00 2001
From: Sam Wenham <sam@wenhams.co.uk>
Date: Fri, 19 Apr 2019 22:52:54 +0100
Subject: [PATCH] Updating caves and entrances is no longer nuclear! Big
 overhaul of people processing, fullname added to the model lastname is now
 names -1 unless you only have one (yes you Wookey) this allows for Jon Arne
 Toft and Wookey to live it the same DB names can now have html chars in them,
 this should be real unicode but that can only happen when we go to Python 3!

---
 core/models.py    |  7 ++-----
 databaseReset.py  |  5 +----
 parsers/caves.py  | 37 +++++++++++++++++++------------------
 parsers/people.py | 36 +++++++++++++++++++++++++++++++-----
 urls.py           |  4 ++--
 5 files changed, 55 insertions(+), 34 deletions(-)
diff --git a/core/models.py b/core/models.py
index 223d447..f65efed 100644
--- a/core/models.py
+++ b/core/models.py
@@ -57,7 +57,7 @@ class TroggleModel(models.Model):
     class Meta:
         abstract = True
 
-class TroggleImageModel(ImageModel):
+class TroggleImageModel(models.Model):
     new_since_parsing = models.BooleanField(default=False, editable=False)
     
     def object_name(self):
@@ -123,16 +123,13 @@ class ExpeditionDay(TroggleModel):
 class Person(TroggleModel):
     first_name  = models.CharField(max_length=100)
     last_name   = models.CharField(max_length=100)
+    fullname    = models.CharField(max_length=200)
     is_vfho     = models.BooleanField(help_text="VFHO is the Vereines f&uuml;r H&ouml;hlenkunde in Obersteier, a nearby Austrian caving club.", default=False)
     mug_shot    = models.CharField(max_length=100, blank=True,null=True)
     blurb = models.TextField(blank=True,null=True)
     
     #href        = models.CharField(max_length=200)
     orderref    = models.CharField(max_length=200)  # for alphabetic 
-    
-    #the below have been removed and made methods. I'm not sure what the b in bisnotable stands for. - AC 16 Feb
-    #notability  = models.FloatField()               # for listing the top 20 people
-    #bisnotable  = models.BooleanField(default=False)
     user	= models.OneToOneField(User, null=True, blank=True)
     def get_absolute_url(self):
         return urlparse.urljoin(settings.URL_ROOT,reverse('person',kwargs={'first_name':self.first_name,'last_name':self.last_name}))
diff --git a/databaseReset.py b/databaseReset.py
index 7a5d0fa..fd9b83a 100644
--- a/databaseReset.py
+++ b/databaseReset.py
@@ -42,7 +42,7 @@ def make_dirs():
 
 def import_caves():
     import parsers.caves
-    print("importing caves")
+    print("Importing Caves")
     parsers.caves.readcaves()
 
 def import_people():
@@ -195,9 +195,6 @@ if __name__ == "__main__":
     elif "scans" in sys.argv:
         import_surveyscans()
     elif "caves" in sys.argv:
-        # reload_db()
-        # make_dirs()
-        # pageredirects()
         import_caves()
     elif "people" in sys.argv:
         import_people()
diff --git a/parsers/caves.py b/parsers/caves.py
index 2c28365..606007f 100644
--- a/parsers/caves.py
+++ b/parsers/caves.py
@@ -6,16 +6,18 @@ import re
 
 
 def readcaves():
-  area_1623 = models.Area(short_name = "1623", parent = None)
-  area_1623.save()
-  area_1626 = models.Area(short_name = "1626", parent = None)
-  area_1626.save()
-  print("Reading Entrances")
+
+  # Clear the cave data issues as we are reloading
+  models.DataIssue.objects.filter(parser='caves').delete()
+
+  area_1623 = models.Area.objects.update_or_create(short_name = "1623", parent = None)
+  area_1626 = models.Area.objects.update_or_create(short_name = "1626", parent = None)
+  print(" - Reading Entrances")
   #print "list of <Slug> <Filename>"
   for filename in os.walk(settings.ENTRANCEDESCRIPTIONS).next()[2]: #Should be a better way of getting a list of files
     if filename.endswith('.html'):
       readentrance(filename)
-  print ("Reading Caves")
+  print (" - Reading Caves")
   for filename in os.walk(settings.CAVEDESCRIPTIONS).next()[2]: #Should be a better way of getting a list of files
     if filename.endswith('.html'):
       readcave(filename)
@@ -51,7 +53,7 @@ def readentrance(filename):
         bearings = getXML(entrancecontents, "bearings", maxItems = 1, context = context)
         url = getXML(entrancecontents, "url", maxItems = 1, context = context)
         if len(non_public) == 1 and len(slugs) >= 1 and len(name) >= 1 and  len(entrance_description) == 1 and  len(explorers) == 1 and  len(map_description) == 1 and  len(location_description) == 1 and  len(approach) == 1 and  len(underground_description) == 1 and  len(marking) == 1 and  len(marking_comment) == 1 and  len(findability) == 1 and  len(findability_description) == 1 and  len(alt) == 1 and  len(northing) == 1 and  len(easting) == 1 and  len(tag_station) == 1 and  len(exact_station) == 1 and  len(other_station) == 1 and  len(other_description) == 1 and  len(bearings) == 1 and  len(url) == 1:
-            e = models.Entrance(name = name[0],
+            e, state = models.Entrance.objects.update_or_create(name = name[0],
                          non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]],
                          entrance_description = entrance_description[0],
                          explorers = explorers[0],
@@ -75,14 +77,12 @@ def readentrance(filename):
                          url = url[0],
                          filename = filename,
                          cached_primary_slug = slugs[0])
-            e.save()
             primary = True
             for slug in slugs:
                 #print slug, filename
-                cs = models.EntranceSlug(entrance = e,
+                cs = models.EntranceSlug.objects.update_or_create(entrance = e,
                                          slug = slug,
                                          primary = primary)
-                cs.save()
                 primary = False
 
 def readcave(filename):
@@ -117,7 +117,7 @@ def readcave(filename):
         url = getXML(cavecontents, "url", maxItems = 1, context = context)
         entrances = getXML(cavecontents, "entrance", context = context)
         if len(non_public) == 1 and len(slugs) >= 1 and len(official_name) == 1 and len(areas) >= 1 and len(kataster_code) == 1 and len(kataster_number) == 1 and len(unofficial_number) == 1 and len(explorers) == 1 and len(underground_description) == 1 and len(equipment) == 1 and len(references) == 1 and len(survey) == 1 and len(kataster_status) == 1 and len(underground_centre_line) == 1 and len(notes) == 1 and len(length) == 1 and len(depth) == 1 and len(extent) == 1 and len(survex_file) == 1 and len(description_file ) == 1 and len(url) == 1 and len(entrances) >= 1:
-            c = models.Cave(non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]],
+            c, state = models.Cave.objects.update_or_create(non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]],
                      official_name = official_name[0],
                      kataster_code = kataster_code[0],
                      kataster_number = kataster_number[0],
@@ -137,7 +137,6 @@ def readcave(filename):
                      description_file = description_file[0],
                      url = url[0],
                      filename = filename)
-            c.save()
             for area_slug in areas:
                 area = models.Area.objects.filter(short_name = area_slug)
                 if area:
@@ -149,12 +148,13 @@ def readcave(filename):
             primary = True
             for slug in slugs:
                 try:
-                    cs = models.CaveSlug(cave = c,
+                    cs = models.CaveSlug.objects.update_or_create(cave = c,
                               slug = slug,
                               primary = primary)
-                    cs.save()
                 except:
-                    print("Can't find text (slug): %s, skipping %s" % (slug, context))
+                    message = "Can't find text (slug): %s, skipping %s" % (slug, context)
+                    models.DataIssue.objects.create(parser='caves', message=message)
+                    print(message)
                     
                 primary = False
             for entrance in entrances:
@@ -162,10 +162,11 @@ def readcave(filename):
                 letter = getXML(entrance, "letter", maxItems = 1, context = context)[0]
                 try:
                     entrance = models.Entrance.objects.get(entranceslug__slug = slug)
-                    ce = models.CaveAndEntrance(cave = c, entrance_letter = letter, entrance = entrance)
-                    ce.save()
+                    ce = models.CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance)
                 except:
-                    print ("Entrance text (slug) %s missing %s" % (slug, context))
+                    message = "Entrance text (slug) %s missing %s" % (slug, context)
+                    models.DataIssue.objects.create(parser='caves', message=message)
+                    print(message)
                 
 
 def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, context = ""):
diff --git a/parsers/people.py b/parsers/people.py
index 4bf84a0..eb877f2 100644
--- a/parsers/people.py
+++ b/parsers/people.py
@@ -65,11 +65,27 @@ def LoadPersonsExpos():
     for personline in personreader:
         name = personline[header["Name"]]
         name = re.sub(r"<.*?>", "", name)
-        mname = re.match(r"(\w+)(?:\s((?:van |ten )?\w+))?(?:\s\(([^)]*)\))?", name)
-        nickname = mname.group(3) or ""
 
-        lookupAttribs={'first_name':mname.group(1), 'last_name':(mname.group(2) or "")}
-        nonLookupAttribs={'is_vfho':personline[header["VfHO member"]],}
+        firstname = ""
+        nickname = ""
+
+        rawlastname = personline[header["Lastname"]].strip()
+        matchlastname = re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", rawlastname)
+        lastname = matchlastname.group(1).strip()
+
+        splitnick = re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", name)
+        fullname = splitnick.group(1)
+
+        nickname = splitnick.group(2) or ""
+
+        fullname = fullname.strip()
+        names = fullname.split(' ')
+        firstname = names[0]
+        if len(names) == 1:
+            lastname = ""
+
+        lookupAttribs={'first_name':firstname, 'last_name':(lastname or "")}
+        nonLookupAttribs={'is_vfho':personline[header["VfHO member"]], 'fullname':fullname}
         person, created = save_carefully(models.Person, lookupAttribs, nonLookupAttribs)
 
         parseMugShotAndBlurb(personline=personline, header=header, person=person)
@@ -120,14 +136,24 @@ def GetPersonExpeditionNameLookup(expedition):
         possnames = [ ]
         f = personexpedition.person.first_name.lower()
         l = personexpedition.person.last_name.lower()
+        full = personexpedition.person.fullname.lower()
         if l:
             possnames.append(f + " " + l)
             possnames.append(f + " " + l[0])
             possnames.append(f + l[0])
             possnames.append(f[0] + " " + l)
         possnames.append(f)
-        if personexpedition.nickname:
+        if full not in possnames:
+            possnames.append(full)
+        if personexpedition.nickname not in possnames:
             possnames.append(personexpedition.nickname.lower())
+            if l:
+                # This allows for nickname to be used for short name eg Phil
+                # adding Phil Sargent to the list
+                if str(personexpedition.nickname.lower() + " " + l) not in possnames:
+                    possnames.append(personexpedition.nickname.lower() + " " + l)
+                if str(personexpedition.nickname.lower() + " " + l[0]) not in possnames:
+                    possnames.append(personexpedition.nickname.lower() + " " + l[0])
         
         for possname in possnames:
             if possname in res:
diff --git a/urls.py b/urls.py
index 287755b..2a30faf 100644
--- a/urls.py
+++ b/urls.py
@@ -35,12 +35,12 @@ actualurlpatterns = patterns('',
     url(r'^newqmnumber/?$',              views_other.ajax_QM_number,  ),
     url(r'^lbo_suggestions/?$',              logbook_entry_suggestions),    
     #(r'^person/(?P<person_id>\d*)/?$', views_logbooks.person),
-    url(r'^person/(?P<first_name>[A-Z]*[a-z\-\']*)[^a-zA-Z]*(?P<last_name>[a-z\-\']*[^a-zA-Z]*[A-Z]*[a-z\-]*)/?', views_logbooks.person, name="person"),
+    url(r'^person/(?P<first_name>[A-Z]*[a-z\-\'&;]*)[^a-zA-Z]*(?P<last_name>[a-z\-\']*[^a-zA-Z]*[A-Z]*[a-z\-&;]*)/?', views_logbooks.person, name="person"),
     #url(r'^person/(\w+_\w+)$',       views_logbooks.person,      name="person"),
     
     url(r'^expedition/(\d+)$',  views_logbooks.expedition,  name="expedition"),
     url(r'^expeditions/?$',  views_logbooks.ExpeditionListView.as_view(), name="expeditions"),
-    url(r'^personexpedition/(?P<first_name>[A-Z]*[a-z]*)[^a-zA-Z]*(?P<last_name>[A-Z]*[a-zA-Z]*)/(?P<year>\d+)/?$', views_logbooks.personexpedition, name="personexpedition"),
+    url(r'^personexpedition/(?P<first_name>[A-Z]*[a-z&;]*)[^a-zA-Z]*(?P<last_name>[A-Z]*[a-zA-Z&;]*)/(?P<year>\d+)/?$', views_logbooks.personexpedition, name="personexpedition"),
     url(r'^logbookentry/(?P<date>.*)/(?P<slug>.*)/?$', views_logbooks.logbookentry,name="logbookentry"),
     url(r'^newlogbookentry/(?P<expeditionyear>.*)$', views_logbooks.newLogbookEntry,     name="newLogBookEntry"),
     url(r'^editlogbookentry/(?P<expeditionyear>[^/]*)/(?P<pdate>[^/]*)/(?P<pslug>[^/]*)/$', views_logbooks.newLogbookEntry,     name="editLogBookEntry"),