From a60a495c83ab6eb8910f0778ee90e5f5fe8f5d82 Mon Sep 17 00:00:00 2001
From: Philip Sargent <philip.sargent@klebos.com>
Date: Mon, 29 Jun 2020 21:15:42 +0100
Subject: [PATCH] Creating forgotten caves & better GetCaveLookup()

---
 .gitignore           |   5 ++
 core/models_caves.py | 167 +++++++++++++++++++++++++++++++++++++++++++
 parsers/caves.py     |  53 ++++++++++----
 parsers/logbooks.py  |  29 +-------
 4 files changed, 211 insertions(+), 43 deletions(-)

diff --git a/.gitignore b/.gitignore
index 76c0d7c..b5a2e9a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -30,3 +30,8 @@ posnotfound
 troggle.sqlite-journal
 loadsurvexblks.log
 logbktrips.shelve
+cave-lookup.json
+svxblks.log
+svxlinear.log
+loadlogbk.log
+_1623.svx
diff --git a/core/models_caves.py b/core/models_caves.py
index e20b17f..60a62a9 100644
--- a/core/models_caves.py
+++ b/core/models_caves.py
@@ -3,6 +3,7 @@ import os
 import datetime
 import logging
 import re
+import json
 from subprocess import call
 
 from urllib.parse import urljoin
@@ -574,3 +575,169 @@ class PersonTrip(TroggleModel):
 
     def __str__(self):
         return "%s (%s)" % (self.personexpedition, self.logbook_entry.date)
+
+# lookup function modelled on GetPersonExpeditionNameLookup
+# repeated assignment each call, needs refactoring
+Gcavelookup = None
+def GetCaveLookup():
+    global Gcavelookup
+    if Gcavelookup:
+        return Gcavelookup
+    Gcavelookup = {"NONEPLACEHOLDER":None}
+    for cave in Cave.objects.all():
+        Gcavelookup[cave.official_name.lower()] = cave
+        if cave.kataster_number:
+            Gcavelookup[cave.kataster_number] = cave
+        if cave.unofficial_number:
+            Gcavelookup[cave.unofficial_number.lower()] = cave
+        if cave.filename:
+            # this is the slug - usually..
+            Gcavelookup[cave.filename.replace(".html","").lower()] = cave
+        if cave.slug():
+            slug = cave.slug()
+            Gcavelookup[slug.lower()] = cave
+    # These are exact matches! edit to check for prefix only!
+    # mostly taken from expoweb/noinfo/cave-number-index 
+    # and Becka's email of 25 may 2020 on new kataster numbers
+    # this should be re-done as a JSON file upload
+    Gcavelookup["1987-02"] = Gcavelookup["267"]
+    Gcavelookup["1990-01"] = Gcavelookup["171"]
+    Gcavelookup["1990-02"] = Gcavelookup["172"]
+    Gcavelookup["1990-03"] = Gcavelookup["173"]
+    Gcavelookup["1990-04"] = Gcavelookup["174"]
+    Gcavelookup["1990-05"] = Gcavelookup["175"]
+    Gcavelookup["1990-06"] = Gcavelookup["176"]
+    Gcavelookup["1990-07"] = Gcavelookup["177"]
+    Gcavelookup["1990-08"] = Gcavelookup["178"]
+    Gcavelookup["1990-09"] = Gcavelookup["179"]
+    Gcavelookup["1990-10"] = Gcavelookup["180"]
+    Gcavelookup["1990-11"] = Gcavelookup["181"]
+    Gcavelookup["1990-12"] = Gcavelookup["182"]
+    Gcavelookup["1990-13"] = Gcavelookup["183"]
+    Gcavelookup["1990-14"] = Gcavelookup["184"]
+    Gcavelookup["1990-18"] = Gcavelookup["188"]
+    Gcavelookup["1990-adam"] = Gcavelookup["225"]
+    Gcavelookup["1993-01"] = Gcavelookup["200"]
+    Gcavelookup["1996-02"] = Gcavelookup["224"]
+    Gcavelookup["1996-03"] = Gcavelookup["223"]
+    Gcavelookup["1996-04"] = Gcavelookup["222"]
+    Gcavelookup["1996wk2"] = Gcavelookup["207"]
+    Gcavelookup["1996wk3"] = Gcavelookup["208"]
+    Gcavelookup["1996wk5"] = Gcavelookup["219"]
+    Gcavelookup["1996wk6"] = Gcavelookup["218"]
+    Gcavelookup["1996wk8"] = Gcavelookup["209"]
+    Gcavelookup["1996wk11"] = Gcavelookup["268"]
+    Gcavelookup["96wk11"]   = Gcavelookup["268"]
+    Gcavelookup["1998-01"] = Gcavelookup["201"]
+    Gcavelookup["1998-03"] = Gcavelookup["210"]
+    Gcavelookup["1999-03"] = Gcavelookup["204"]
+    Gcavelookup["1999-04"] = Gcavelookup["230"]
+    Gcavelookup["1999-10"] = Gcavelookup["162"]
+    Gcavelookup["1999-bo-01"] = Gcavelookup["205"]
+    Gcavelookup["1999-ob-01"] = Gcavelookup["205"]
+    Gcavelookup["1999-ob-03"] = Gcavelookup["226"]
+    Gcavelookup["1999-ob-04"] = Gcavelookup["227"]
+    Gcavelookup["2000-01"] = Gcavelookup["231"]
+    Gcavelookup["2000-03"] = Gcavelookup["214"]
+    Gcavelookup["2000-04"] = Gcavelookup["220"]
+    Gcavelookup["2000-05"] = Gcavelookup["215"]
+    Gcavelookup["2000-06"] = Gcavelookup["216"]
+    Gcavelookup["2000-07"] = Gcavelookup["217"]
+    Gcavelookup["2000-09"] = Gcavelookup["234"]
+    Gcavelookup["2000-aa-01"] = Gcavelookup["250"]
+    Gcavelookup["2001-04"] = Gcavelookup["239"]
+    Gcavelookup["2001-05"] = Gcavelookup["243"]
+    Gcavelookup["2002-01"] = Gcavelookup["249"]
+    Gcavelookup["2002-02"] = Gcavelookup["234"]
+    Gcavelookup["2002-04"] = Gcavelookup["242"]
+    Gcavelookup["2002-05"] = Gcavelookup["294"]
+    Gcavelookup["2003-01"] = Gcavelookup["256"]
+    Gcavelookup["2003-02"] = Gcavelookup["248"]
+    Gcavelookup["2003-03"] = Gcavelookup["247"]
+    Gcavelookup["2003-04"] = Gcavelookup["241"]
+    Gcavelookup["2003-05"] = Gcavelookup["246"]
+    Gcavelookup["2003-06"] = Gcavelookup["161"]
+    Gcavelookup["2003-08"] = Gcavelookup["240"]
+    Gcavelookup["2003-09"] = Gcavelookup["245"]
+    Gcavelookup["2003-10"] = Gcavelookup["244"]
+    Gcavelookup["2004-01"] = Gcavelookup["269"]
+    Gcavelookup["2004-03"] = Gcavelookup["270"]
+    Gcavelookup["2004-11"] = Gcavelookup["251"]
+    Gcavelookup["2004-12"] = Gcavelookup["161"]
+    Gcavelookup["2004-15"] = Gcavelookup["253"]
+    Gcavelookup["2004-19"] = Gcavelookup["254"]
+    Gcavelookup["2004-20"] = Gcavelookup["255"]
+    Gcavelookup["2005-04"] = Gcavelookup["204"]
+    Gcavelookup["2005-05"] = Gcavelookup["264"]
+    Gcavelookup["2005-07"] = Gcavelookup["257"]
+    Gcavelookup["2006-08"] = Gcavelookup["285"]
+    Gcavelookup["2006-09"] = Gcavelookup["298"]
+    Gcavelookup["2007-71"] = Gcavelookup["271"]
+    Gcavelookup["2010-01"] = Gcavelookup["263"]
+    Gcavelookup["2010-03"] = Gcavelookup["293"]
+    Gcavelookup["2011-01"] = Gcavelookup["292"]
+    Gcavelookup["2012-dd-05"] = Gcavelookup["286"]
+    Gcavelookup["2012-ns-13"] = Gcavelookup["292"]
+    Gcavelookup["2014-neo-01"] = Gcavelookup["273"]
+    Gcavelookup["2014-sd-01"] = Gcavelookup["274"]
+    Gcavelookup["2014-ms-14"] = Gcavelookup["287"]
+    Gcavelookup["2015-mf-06"] = Gcavelookup["288"]
+    Gcavelookup["2016-jb-01"] = Gcavelookup["289"]
+    Gcavelookup["2017-pw-01"] = Gcavelookup["277"]
+    Gcavelookup["2018-dm-07"] = Gcavelookup["359"]
+    Gcavelookup["2017_cucc_24"] = Gcavelookup["291"]
+    Gcavelookup["2017_cucc_23"] = Gcavelookup["295"]
+    Gcavelookup["2017_cucc_28"] = Gcavelookup["290"]
+    Gcavelookup["bs17"] = Gcavelookup["283"]
+    
+    Gcavelookup["1976/b11"] = Gcavelookup["198"]
+    Gcavelookup["1976/b8"] = Gcavelookup["197"]
+    Gcavelookup["1976/b9"] = Gcavelookup["190"]
+    Gcavelookup["b11"] = Gcavelookup["1976/b11"]
+    Gcavelookup["b8"] = Gcavelookup["1976/b8"]
+    Gcavelookup["b9"] = Gcavelookup["1976/b9"]
+
+    Gcavelookup["2011-01-bs30"] = Gcavelookup["190"]
+    Gcavelookup["bs30"] = Gcavelookup["190"]
+    Gcavelookup["2011-01"] = Gcavelookup["190"]
+ 
+    Gcavelookup["2002-x11"] = Gcavelookup["2005-08"]
+    Gcavelookup["2002-x12"] = Gcavelookup["2005-07"]
+    Gcavelookup["2002-x13"] = Gcavelookup["2005-06"]
+    Gcavelookup["2002-x14"] = Gcavelookup["2005-05"]
+    
+    Gcavelookup["kh"] = Gcavelookup["161"]
+    Gcavelookup["161-kh"] = Gcavelookup["161"]
+    Gcavelookup["204-steinBH"] = Gcavelookup["204"]
+    Gcavelookup["stonebridge"] = Gcavelookup["204"]
+    Gcavelookup["hauchhole"] = Gcavelookup["234"]
+    Gcavelookup["hauch"] = Gcavelookup["234"]
+    Gcavelookup["234-hauch"] = Gcavelookup["234"]
+    Gcavelookup["tunnocks"] = Gcavelookup["258"]
+    Gcavelookup["balcony"] = Gcavelookup["264"]
+    Gcavelookup["balkon"] = Gcavelookup["264"]
+    Gcavelookup["fgh"] = Gcavelookup["290"]
+    Gcavelookup["gsh"] = Gcavelookup["291"]
+
+    Gcavelookup["homecoming"] = Gcavelookup["2018-dm-07"]
+    Gcavelookup["99ob02"] = Gcavelookup["1999-ob-02"]
+    
+    addmore = {}
+    for id in Gcavelookup:
+        addmore[id.replace("-","_")] = Gcavelookup[id]
+        addmore[id.replace("_","-")] = Gcavelookup[id]
+    Gcavelookup = {**addmore, **Gcavelookup}
+
+    addmore ={}
+    for id in Gcavelookup:
+        if not Gcavelookup[id]:
+            pass
+        elif Gcavelookup[id].kataster_number:
+            #print(Gcavelookup[id], file=sys.stderr)
+            addmore[id] = Gcavelookup[id].kataster_number
+        elif Gcavelookup[id].unofficial_number:
+            addmore[id] = Gcavelookup[id].unofficial_number.lower()
+    with open("cave-lookup.json", 'w') as f:
+        json.dump(addmore, f)
+
+    return Gcavelookup
diff --git a/parsers/caves.py b/parsers/caves.py
index 7f7364c..3c5d98e 100644
--- a/parsers/caves.py
+++ b/parsers/caves.py
@@ -8,21 +8,44 @@ import troggle.core.models as models
 import troggle.core.models_caves as models_caves
 
 def readcaves():
-  # Clear the cave data issues as we are reloading
-  # Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
-  models.DataIssue.objects.filter(parser='caves').delete()
+    # Clear the cave data issues as we are reloading
+    models.DataIssue.objects.filter(parser='caves').delete()
+
+    # Do this first, so that these empty entries are overwritten as they get properly created.
+    # For those caves which do not have XML files even though they exist and have surveys
+    forgotten = ["2007-04", "2007-05", "2007-06", "2007-07", "2007-12", "2009-01", "2009-02", 
+            "2010-06", "2010-07", "2012-ns-01", "2012-ns-02", "2010-04", "2012-ns-05", "2012-ns-06", 
+            "2012-ns-07", "2012-ns-08", "2012-ns-12", "2012-ns-14", "2012-ns-15", "2014-bl888", 
+            "2018-pf-01", "2018-pf-02", "haldenloch", "gruenstein"]
+    for k in forgotten:
+        try:
+            cave = models_caves.Cave(
+                    unofficial_number = k, 
+                    official_name = "Mislaid cave - created as empty object. No XML available at this time.",
+                    notes="_Survex file found in loser repo but no description in expoweb")
+            if cave:
+                print("{} {}".format(cave.unofficial_number, cave.official_name))
+                cave.save()
+            else:
+                print("Failed to create cave {} ".format(k))
+        except:
+            message = " ! Forgotten cave error, slug: %s forgotten-id: %s" % (slug, k)
+            models.DataIssue.objects.create(parser='caves', message=message)
+            print(message)
+
+    # Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
+    area_1623 = models_caves.Area.objects.update_or_create(short_name = "1623", parent = None)
+    area_1626 = models_caves.Area.objects.update_or_create(short_name = "1626", parent = None)
+    print(" - Reading Entrances")
+    for filename in next(os.walk(settings.ENTRANCEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files
+        if filename.endswith('.html'):
+            readentrance(filename)
+    print (" - Reading Caves")
+    for filename in next(os.walk(settings.CAVEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files
+        if filename.endswith('.html'):
+            readcave(filename)
+        
 
-  area_1623 = models_caves.Area.objects.update_or_create(short_name = "1623", parent = None)
-  area_1626 = models_caves.Area.objects.update_or_create(short_name = "1626", parent = None)
-  print(" - Reading Entrances")
-  #print "list of <Slug> <Filename>"
-  for filename in next(os.walk(settings.ENTRANCEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files
-    if filename.endswith('.html'):
-      readentrance(filename)
-  print (" - Reading Caves")
-  for filename in next(os.walk(settings.CAVEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files
-    if filename.endswith('.html'):
-      readcave(filename)
 
 
 def readentrance(filename):
@@ -204,7 +227,7 @@ def readcave(filename):
                     message = " ! Entrance setting failure, slug: %s letter: %s" % (slug, letter)
                     models.DataIssue.objects.create(parser='caves', message=message)
                     print(message)
-                
+
 
 def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, context = ""):
     items = re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S)
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index a492135..c4f2c9c 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -12,7 +12,7 @@ from django.template.defaultfilters import slugify
 from django.utils.timezone import get_current_timezone, make_aware
 
 from troggle.core.models import DataIssue, Expedition
-from troggle.core.models_caves import Cave, OtherCaveName, LogbookEntry, PersonTrip
+from troggle.core.models_caves import Cave, OtherCaveName, LogbookEntry, PersonTrip, GetCaveLookup
 from parsers.people import GetPersonExpeditionNameLookup
 from utils import save_carefully
 
@@ -78,33 +78,6 @@ def GetTripCave(place):
         print(("No cave found for place " , place))
         return None
 
-# lookup function modelled on GetPersonExpeditionNameLookup
-# repeated assignment each call, needs refactoring
-Gcavelookup = None
-def GetCaveLookup():
-    global Gcavelookup
-    if Gcavelookup:
-        return Gcavelookup
-    Gcavelookup = {"NONEPLACEHOLDER":None}
-    for cave in Cave.objects.all():
-        Gcavelookup[cave.official_name.lower()] = cave
-        if cave.kataster_number:
-            Gcavelookup[cave.kataster_number] = cave
-        if cave.unofficial_number:
-            Gcavelookup[cave.unofficial_number.lower()] = cave
-        if cave.filename:
-            # this is the slug - usually..
-            Gcavelookup[cave.filename.replace(".html","").lower()] = cave
-    # These are exact matches! edit to check for prefix only!
-    Gcavelookup["tunnocks"] = Gcavelookup["258"]
-    Gcavelookup["hauchhole"] = Gcavelookup["234"]
-    Gcavelookup["KH"] = Gcavelookup["161"]
-    Gcavelookup["Balcony"] = Gcavelookup["264"]
-    Gcavelookup["Balkon"] = Gcavelookup["264"]
-    Gcavelookup["FGH"] = Gcavelookup["290"]
-    Gcavelookup["GSH"] = Gcavelookup["291"]
-    Gcavelookup["Homecoming"] = Gcavelookup["2018-dm-07"]
-    return Gcavelookup
 
 
 logentries = [] # the entire logbook for one year is a single object: a list of entries