more cave parsing data fixes

2026-05-19 05:01:29 +01:00 · 2021-04-14 22:50:47 +01:00
parent d598a6d0f5
commit c2c7de4c59
3 changed files with 20 additions and 12 deletions
@@ -45,7 +45,7 @@ def readcaves():
        # For those caves which do not have cave_data/1623-xxx.html XML files even though they exist and have surveys
        # also needs to be done *before* entrances so that the entrance-cave links work properly.
-        pending = ["2007-04", "2007-05", "2007-06", "2007-07", "2007-12", "2009-01", "2009-02", 
+        pending = ["2007-05", "2007-06", "2007-12", "2009-01", "2009-02", 
                "2010-06", "2010-07", "2012-ns-01", "2012-ns-02", "2010-04", "2012-ns-05", "2012-ns-06", 
                "2012-ns-07", "2012-ns-08", "2012-ns-12", "2012-ns-14", "2012-ns-15", "2014-bl888", 
                "2018-pf-01", "2018-pf-02", "haldenloch"]
@@ -58,13 +58,16 @@ def readcaves():
                        underground_description = "Pending cave write-up - creating as empty object. No XML file available yet.",
                        survex_file = "caves-1623/" + k + "/" + k +".svx",
                        url = url,
-                        notes="_Survex file found in loser repo but no description in expoweb")
+                        notes="_Survex file found in loser repo but no description in expoweb <br>\n"+
                            "INSTRUCTIONS: First open 'This survex file' (link above the CaveView panel) to find the date and info. Then <br>\n" +
                            "search in the Expo for that year e.g. <a href='/expedition/2007'>2007</a> to find a relevant logbook entry, then <br>\n" +
                            "click on 'New Entrance' at the bottom of this page as we need to create the entrance *first*.")
                if cave:
                    cave.save() # must save to have id before foreign keys work. This is also a ManyToMany key.
                    #print(f' ! - READ CAVES:  cave {k} {cave}')
                    cave.area.add(area_1623[0])
                    cave.save()
-                    message = " ! {:11s} {}".format(cave.unofficial_number, cave.underground_description)
+                    message = f" ! {k:12} {cave.underground_description}"
                    DataIssue.objects.create(parser='caves', message=message, url=url)
                    print(message)
@@ -108,7 +111,7 @@ def readentrance(filename):
    # Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
    with open(os.path.join(settings.ENTRANCEDESCRIPTIONS, filename)) as f:
        contents = f.read()
-    context = "in file %s" % filename
+    context = filename
    #print("Reading file ENTRANCE {} / {}".format(settings.ENTRANCEDESCRIPTIONS, filename))
    entrancecontentslist = getXML(contents, "entrance", maxItems = 1, context = context)
    if len(entrancecontentslist) != 1:
@@ -199,7 +202,7 @@ def readcave(filename):
    # Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
    with open(os.path.join(settings.CAVEDESCRIPTIONS, filename)) as f:
        contents = f.read()
-    context = " in file %s" % filename
+    context = filename
    cavecontentslist = getXML(contents, "cave", maxItems = 1, context = context)
    if len(cavecontentslist) != 1:
        message = f'! BAD CAVE at "{filename}"'
@@ -310,13 +313,13 @@ def readcave(filename):
            if survex_file[0]:
                if not (Path(SURVEX_DATA) / survex_file[0]).is_file():
-                    message = f' ! survex filename does not exist :LOSER:"{survex_file[0]}" in "{filename}"'
+                    message = f' ! {slug:12} survex filename does not exist :LOSER:"{survex_file[0]}" in "{filename}"'
                    DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/')
                    print(message)
            if description_file[0]:
                if not (Path(EXPOWEB) / description_file[0]).is_file():
-                    message = f' ! description filename does not exist :{EXPOWEB}:"{description_file[0]}" in "{filename}"'
+                    message = f' ! {slug:12} description filename does not exist :{EXPOWEB}:"{description_file[0]}" in "{filename}"'
                    DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/')
                    print(message)
                    #c.description_file="" # done only once, to clear out cruft.
@@ -327,14 +330,14 @@ def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True,
    if len(items) < minItems and printwarnings:
        message = " ! %(count)i %(itemname)s found, at least %(min)i expected" % {"count": len(items),
                                                                           "itemname": itemname,
-                                                                           "min": minItems} + context
+                                                                           "min": minItems} + " in file " + context
-        DataIssue.objects.create(parser='caves', message=message)
+        DataIssue.objects.create(parser='caves', message=message, url=""+context)
        print(message)
    if maxItems is not None and len(items) > maxItems and printwarnings:
        message = " ! %(count)i %(itemname)s found, no more than %(max)i expected in this XML unit " % {"count": len(items),
                                                                               "itemname": itemname,
-                                                                               "max": maxItems} + context
+                                                                               "max": maxItems} + " in file " + context
        DataIssue.objects.create(parser='caves', message=message)
        print(message)
    return items
@@ -2,6 +2,11 @@
 <!-- Only put one entrance in this file -->
 <!-- This file is generated using the form documented at /handbook/survey/caveentry.html -->
 <!-- If you edit this file by hand, make sure you update the database by doing a full data import -->
    <!-- See http://expo.survex.com/handbook/survey/caveentryfields.html for current details on filling in this form -->
    <!-- there is 1 required field: slug. The entrance will not appear without that. -->
    <!-- ALWAYS use &uuml; for u+Umlaut and &ouml; for o+umlaut eg H&ouml;hle for Hohle and Gl&uuml;ck for Gluck-->
 <html lang="en">
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
@@ -112,9 +112,9 @@ trogglepatterns = [
    url(r'^cave/logbook/([^/]+)/?$', caves.caveLogbook),
    url(r'^(?P<karea>\d\d\d\d)(?P<subpath>.*)$',     cavepage,     name="cavepage"), # shorthand references such as /1623/264
-    url(r'^getEntrances/(?P<caveslug>.*)', caves.get_entrances, name = "get_entrances"), #works e.g. /getEntrances/1623-161
+    url(r'^getEntrances/(?P<caveslug>.*)', caves.get_entrances, name = "get_entrances"), #works e.g. /getEntrances/1623-161 # CASE SENSITIVE
    url(r'^entrance/(?P<caveslug>[^/]+)/(?P<slug>[^/]+)/edit/', caves.editEntrance, name = "editentrance"),
-    url(r'^entrance/new/(?P<caveslug>[^/]+)/', caves.editEntrance, name = "newentrance"),
+    url(r'^entrance/new/(?P<caveslug>[^/]+)/', caves.editEntrance, name = "newentrance"), # NOT WORKING
    url(r'^statistics/?$',  statistics.stats, name="stats"),