Updating caves and entrances is no longer nuclear!

Big overhaul of people processing, fullname added to the model lastname is now names -1 unless you only have one (yes you Wookey) this allows for Jon Arne Toft and Wookey to live it the same DB names can now have html chars in them, this should be real unicode but that can only happen when we go to Python 3!
2019-04-19 22:52:54 +01:00
parent cc313246bb
commit b42249890e
5 changed files with 55 additions and 34 deletions
@@ -6,16 +6,18 @@ import re


 def readcaves():
-  area_1623 = models.Area(short_name = "1623", parent = None)
-  area_1623.save()
-  area_1626 = models.Area(short_name = "1626", parent = None)
-  area_1626.save()
-  print("Reading Entrances")
+
+  # Clear the cave data issues as we are reloading
+  models.DataIssue.objects.filter(parser='caves').delete()
+
+  area_1623 = models.Area.objects.update_or_create(short_name = "1623", parent = None)
+  area_1626 = models.Area.objects.update_or_create(short_name = "1626", parent = None)
+  print(" - Reading Entrances")
  #print "list of <Slug> <Filename>"
  for filename in os.walk(settings.ENTRANCEDESCRIPTIONS).next()[2]: #Should be a better way of getting a list of files
    if filename.endswith('.html'):
      readentrance(filename)
-  print ("Reading Caves")
+  print (" - Reading Caves")
  for filename in os.walk(settings.CAVEDESCRIPTIONS).next()[2]: #Should be a better way of getting a list of files
    if filename.endswith('.html'):
      readcave(filename)
@@ -51,7 +53,7 @@ def readentrance(filename):
        bearings = getXML(entrancecontents, "bearings", maxItems = 1, context = context)
        url = getXML(entrancecontents, "url", maxItems = 1, context = context)
        if len(non_public) == 1 and len(slugs) >= 1 and len(name) >= 1 and  len(entrance_description) == 1 and  len(explorers) == 1 and  len(map_description) == 1 and  len(location_description) == 1 and  len(approach) == 1 and  len(underground_description) == 1 and  len(marking) == 1 and  len(marking_comment) == 1 and  len(findability) == 1 and  len(findability_description) == 1 and  len(alt) == 1 and  len(northing) == 1 and  len(easting) == 1 and  len(tag_station) == 1 and  len(exact_station) == 1 and  len(other_station) == 1 and  len(other_description) == 1 and  len(bearings) == 1 and  len(url) == 1:
-            e = models.Entrance(name = name[0],
+            e, state = models.Entrance.objects.update_or_create(name = name[0],
                         non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]],
                         entrance_description = entrance_description[0],
                         explorers = explorers[0],
@@ -75,14 +77,12 @@ def readentrance(filename):
                         url = url[0],
                         filename = filename,
                         cached_primary_slug = slugs[0])
-            e.save()
            primary = True
            for slug in slugs:
                #print slug, filename
-                cs = models.EntranceSlug(entrance = e,
+                cs = models.EntranceSlug.objects.update_or_create(entrance = e,
                                         slug = slug,
                                         primary = primary)
-                cs.save()
                primary = False

 def readcave(filename):
@@ -117,7 +117,7 @@ def readcave(filename):
        url = getXML(cavecontents, "url", maxItems = 1, context = context)
        entrances = getXML(cavecontents, "entrance", context = context)
        if len(non_public) == 1 and len(slugs) >= 1 and len(official_name) == 1 and len(areas) >= 1 and len(kataster_code) == 1 and len(kataster_number) == 1 and len(unofficial_number) == 1 and len(explorers) == 1 and len(underground_description) == 1 and len(equipment) == 1 and len(references) == 1 and len(survey) == 1 and len(kataster_status) == 1 and len(underground_centre_line) == 1 and len(notes) == 1 and len(length) == 1 and len(depth) == 1 and len(extent) == 1 and len(survex_file) == 1 and len(description_file ) == 1 and len(url) == 1 and len(entrances) >= 1:
-            c = models.Cave(non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]],
+            c, state = models.Cave.objects.update_or_create(non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]],
                     official_name = official_name[0],
                     kataster_code = kataster_code[0],
                     kataster_number = kataster_number[0],
@@ -137,7 +137,6 @@ def readcave(filename):
                     description_file = description_file[0],
                     url = url[0],
                     filename = filename)
-            c.save()
            for area_slug in areas:
                area = models.Area.objects.filter(short_name = area_slug)
                if area:
@@ -149,12 +148,13 @@ def readcave(filename):
            primary = True
            for slug in slugs:
                try:
-                    cs = models.CaveSlug(cave = c,
+                    cs = models.CaveSlug.objects.update_or_create(cave = c,
                              slug = slug,
                              primary = primary)
-                    cs.save()
                except:
-                    print("Can't find text (slug): %s, skipping %s" % (slug, context))
+                    message = "Can't find text (slug): %s, skipping %s" % (slug, context)
+                    models.DataIssue.objects.create(parser='caves', message=message)
+                    print(message)
                    
                primary = False
            for entrance in entrances:
@@ -162,10 +162,11 @@ def readcave(filename):
                letter = getXML(entrance, "letter", maxItems = 1, context = context)[0]
                try:
                    entrance = models.Entrance.objects.get(entranceslug__slug = slug)
-                    ce = models.CaveAndEntrance(cave = c, entrance_letter = letter, entrance = entrance)
-                    ce.save()
+                    ce = models.CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance)
                except:
-                    print ("Entrance text (slug) %s missing %s" % (slug, context))
+                    message = "Entrance text (slug) %s missing %s" % (slug, context)
+                    models.DataIssue.objects.create(parser='caves', message=message)
+                    print(message)
                

 def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, context = ""):
@@ -65,11 +65,27 @@ def LoadPersonsExpos():
    for personline in personreader:
        name = personline[header["Name"]]
        name = re.sub(r"<.*?>", "", name)
-        mname = re.match(r"(\w+)(?:\s((?:van |ten )?\w+))?(?:\s\(([^)]*)\))?", name)
-        nickname = mname.group(3) or ""

-        lookupAttribs={'first_name':mname.group(1), 'last_name':(mname.group(2) or "")}
-        nonLookupAttribs={'is_vfho':personline[header["VfHO member"]],}
+        firstname = ""
+        nickname = ""
+
+        rawlastname = personline[header["Lastname"]].strip()
+        matchlastname = re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", rawlastname)
+        lastname = matchlastname.group(1).strip()
+
+        splitnick = re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", name)
+        fullname = splitnick.group(1)
+
+        nickname = splitnick.group(2) or ""
+
+        fullname = fullname.strip()
+        names = fullname.split(' ')
+        firstname = names[0]
+        if len(names) == 1:
+            lastname = ""
+
+        lookupAttribs={'first_name':firstname, 'last_name':(lastname or "")}
+        nonLookupAttribs={'is_vfho':personline[header["VfHO member"]], 'fullname':fullname}
        person, created = save_carefully(models.Person, lookupAttribs, nonLookupAttribs)

        parseMugShotAndBlurb(personline=personline, header=header, person=person)
@@ -120,14 +136,24 @@ def GetPersonExpeditionNameLookup(expedition):
        possnames = [ ]
        f = personexpedition.person.first_name.lower()
        l = personexpedition.person.last_name.lower()
+        full = personexpedition.person.fullname.lower()
        if l:
            possnames.append(f + " " + l)
            possnames.append(f + " " + l[0])
            possnames.append(f + l[0])
            possnames.append(f[0] + " " + l)
        possnames.append(f)
-        if personexpedition.nickname:
+        if full not in possnames:
+            possnames.append(full)
+        if personexpedition.nickname not in possnames:
            possnames.append(personexpedition.nickname.lower())
+            if l:
+                # This allows for nickname to be used for short name eg Phil
+                # adding Phil Sargent to the list
+                if str(personexpedition.nickname.lower() + " " + l) not in possnames:
+                    possnames.append(personexpedition.nickname.lower() + " " + l)
+                if str(personexpedition.nickname.lower() + " " + l[0]) not in possnames:
+                    possnames.append(personexpedition.nickname.lower() + " " + l[0])
        
        for possname in possnames:
            if possname in res: