From c2c7de4c59cc02fea3524ed8bf4c33bb923624ef Mon Sep 17 00:00:00 2001
From: Philip Sargent <philip.sargent@klebos.com>
Date: Wed, 14 Apr 2021 22:50:47 +0100
Subject: [PATCH] more cave parsing data fixes

---
 parsers/caves.py                  | 23 +++++++++++++----------
 templates/dataformat/entrance.xml |  5 +++++
 urls.py                           |  4 ++--
 3 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/parsers/caves.py b/parsers/caves.py
index cce680d..ba8f60e 100644
--- a/parsers/caves.py
+++ b/parsers/caves.py
@@ -45,7 +45,7 @@ def readcaves():
         
         # For those caves which do not have cave_data/1623-xxx.html XML files even though they exist and have surveys
         # also needs to be done *before* entrances so that the entrance-cave links work properly.
-        pending = ["2007-04", "2007-05", "2007-06", "2007-07", "2007-12", "2009-01", "2009-02", 
+        pending = ["2007-05", "2007-06", "2007-12", "2009-01", "2009-02", 
                 "2010-06", "2010-07", "2012-ns-01", "2012-ns-02", "2010-04", "2012-ns-05", "2012-ns-06", 
                 "2012-ns-07", "2012-ns-08", "2012-ns-12", "2012-ns-14", "2012-ns-15", "2014-bl888", 
                 "2018-pf-01", "2018-pf-02", "haldenloch"]
@@ -58,13 +58,16 @@ def readcaves():
                         underground_description = "Pending cave write-up - creating as empty object. No XML file available yet.",
                         survex_file = "caves-1623/" + k + "/" + k +".svx",
                         url = url,
-                        notes="_Survex file found in loser repo but no description in expoweb")
+                        notes="_Survex file found in loser repo but no description in expoweb <br>\n"+
+                            "INSTRUCTIONS: First open 'This survex file' (link above the CaveView panel) to find the date and info. Then <br>\n" +
+                            "search in the Expo for that year e.g. <a href='/expedition/2007'>2007</a> to find a relevant logbook entry, then <br>\n" +
+                            "click on 'New Entrance' at the bottom of this page as we need to create the entrance *first*.")
                 if cave:
                     cave.save() # must save to have id before foreign keys work. This is also a ManyToMany key.
                     #print(f' ! - READ CAVES:  cave {k} {cave}')
                     cave.area.add(area_1623[0])
                     cave.save()
-                    message = " ! {:11s} {}".format(cave.unofficial_number, cave.underground_description)
+                    message = f" ! {k:12} {cave.underground_description}"
                     DataIssue.objects.create(parser='caves', message=message, url=url)
                     print(message)
                     
@@ -108,7 +111,7 @@ def readentrance(filename):
     # Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
     with open(os.path.join(settings.ENTRANCEDESCRIPTIONS, filename)) as f:
         contents = f.read()
-    context = "in file %s" % filename
+    context = filename
     #print("Reading file ENTRANCE {} / {}".format(settings.ENTRANCEDESCRIPTIONS, filename))
     entrancecontentslist = getXML(contents, "entrance", maxItems = 1, context = context)
     if len(entrancecontentslist) != 1:
@@ -199,7 +202,7 @@ def readcave(filename):
     # Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
     with open(os.path.join(settings.CAVEDESCRIPTIONS, filename)) as f:
         contents = f.read()
-    context = " in file %s" % filename
+    context = filename
     cavecontentslist = getXML(contents, "cave", maxItems = 1, context = context)
     if len(cavecontentslist) != 1:
         message = f'! BAD CAVE at "{filename}"'
@@ -310,13 +313,13 @@ def readcave(filename):
                     
             if survex_file[0]:
                 if not (Path(SURVEX_DATA) / survex_file[0]).is_file():
-                    message = f' ! survex filename does not exist :LOSER:"{survex_file[0]}" in "{filename}"'
+                    message = f' ! {slug:12} survex filename does not exist :LOSER:"{survex_file[0]}" in "{filename}"'
                     DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/')
                     print(message)
                     
             if description_file[0]:
                 if not (Path(EXPOWEB) / description_file[0]).is_file():
-                    message = f' ! description filename does not exist :{EXPOWEB}:"{description_file[0]}" in "{filename}"'
+                    message = f' ! {slug:12} description filename does not exist :{EXPOWEB}:"{description_file[0]}" in "{filename}"'
                     DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/')
                     print(message)
                     #c.description_file="" # done only once, to clear out cruft.
@@ -327,14 +330,14 @@ def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True,
     if len(items) < minItems and printwarnings:
         message = " ! %(count)i %(itemname)s found, at least %(min)i expected" % {"count": len(items),
                                                                            "itemname": itemname,
-                                                                           "min": minItems} + context
-        DataIssue.objects.create(parser='caves', message=message)
+                                                                           "min": minItems} + " in file " + context
+        DataIssue.objects.create(parser='caves', message=message, url=""+context)
         print(message)
         
     if maxItems is not None and len(items) > maxItems and printwarnings:
         message = " ! %(count)i %(itemname)s found, no more than %(max)i expected in this XML unit " % {"count": len(items),
                                                                                "itemname": itemname,
-                                                                               "max": maxItems} + context
+                                                                               "max": maxItems} + " in file " + context
         DataIssue.objects.create(parser='caves', message=message)
         print(message)
     return items
\ No newline at end of file
diff --git a/templates/dataformat/entrance.xml b/templates/dataformat/entrance.xml
index eb6b45a..e325c2b 100644
--- a/templates/dataformat/entrance.xml
+++ b/templates/dataformat/entrance.xml
@@ -2,6 +2,11 @@
 <!-- Only put one entrance in this file -->
 <!-- This file is generated using the form documented at /handbook/survey/caveentry.html -->
 <!-- If you edit this file by hand, make sure you update the database by doing a full data import -->
+
+    <!-- See http://expo.survex.com/handbook/survey/caveentryfields.html for current details on filling in this form -->
+    <!-- there is 1 required field: slug. The entrance will not appear without that. -->
+    <!-- ALWAYS use &uuml; for u+Umlaut and &ouml; for o+umlaut eg H&ouml;hle for Hohle and Gl&uuml;ck for Gluck-->
+
 <html lang="en">
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
diff --git a/urls.py b/urls.py
index 1e0b080..ff67d0e 100644
--- a/urls.py
+++ b/urls.py
@@ -112,9 +112,9 @@ trogglepatterns = [
     url(r'^cave/logbook/([^/]+)/?$', caves.caveLogbook),
     url(r'^(?P<karea>\d\d\d\d)(?P<subpath>.*)$',     cavepage,     name="cavepage"), # shorthand references such as /1623/264
 
-    url(r'^getEntrances/(?P<caveslug>.*)', caves.get_entrances, name = "get_entrances"), #works e.g. /getEntrances/1623-161
+    url(r'^getEntrances/(?P<caveslug>.*)', caves.get_entrances, name = "get_entrances"), #works e.g. /getEntrances/1623-161 # CASE SENSITIVE
     url(r'^entrance/(?P<caveslug>[^/]+)/(?P<slug>[^/]+)/edit/', caves.editEntrance, name = "editentrance"),
-    url(r'^entrance/new/(?P<caveslug>[^/]+)/', caves.editEntrance, name = "newentrance"),
+    url(r'^entrance/new/(?P<caveslug>[^/]+)/', caves.editEntrance, name = "newentrance"), # NOT WORKING
     
 
     url(r'^statistics/?$',  statistics.stats, name="stats"),