From c2c7de4c59cc02fea3524ed8bf4c33bb923624ef Mon Sep 17 00:00:00 2001 From: Philip Sargent <philip.sargent@klebos.com> Date: Wed, 14 Apr 2021 22:50:47 +0100 Subject: [PATCH] more cave parsing data fixes --- parsers/caves.py | 23 +++++++++++++---------- templates/dataformat/entrance.xml | 5 +++++ urls.py | 4 ++-- 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/parsers/caves.py b/parsers/caves.py index cce680d..ba8f60e 100644 --- a/parsers/caves.py +++ b/parsers/caves.py @@ -45,7 +45,7 @@ def readcaves(): # For those caves which do not have cave_data/1623-xxx.html XML files even though they exist and have surveys # also needs to be done *before* entrances so that the entrance-cave links work properly. - pending = ["2007-04", "2007-05", "2007-06", "2007-07", "2007-12", "2009-01", "2009-02", + pending = ["2007-05", "2007-06", "2007-12", "2009-01", "2009-02", "2010-06", "2010-07", "2012-ns-01", "2012-ns-02", "2010-04", "2012-ns-05", "2012-ns-06", "2012-ns-07", "2012-ns-08", "2012-ns-12", "2012-ns-14", "2012-ns-15", "2014-bl888", "2018-pf-01", "2018-pf-02", "haldenloch"] @@ -58,13 +58,16 @@ def readcaves(): underground_description = "Pending cave write-up - creating as empty object. No XML file available yet.", survex_file = "caves-1623/" + k + "/" + k +".svx", url = url, - notes="_Survex file found in loser repo but no description in expoweb") + notes="_Survex file found in loser repo but no description in expoweb <br>\n"+ + "INSTRUCTIONS: First open 'This survex file' (link above the CaveView panel) to find the date and info. Then <br>\n" + + "search in the Expo for that year e.g. <a href='/expedition/2007'>2007</a> to find a relevant logbook entry, then <br>\n" + + "click on 'New Entrance' at the bottom of this page as we need to create the entrance *first*.") if cave: cave.save() # must save to have id before foreign keys work. This is also a ManyToMany key. #print(f' ! - READ CAVES: cave {k} {cave}') cave.area.add(area_1623[0]) cave.save() - message = " ! {:11s} {}".format(cave.unofficial_number, cave.underground_description) + message = f" ! {k:12} {cave.underground_description}" DataIssue.objects.create(parser='caves', message=message, url=url) print(message) @@ -108,7 +111,7 @@ def readentrance(filename): # Note: these are HTML files in the EXPOWEB repo, not from the loser repo. with open(os.path.join(settings.ENTRANCEDESCRIPTIONS, filename)) as f: contents = f.read() - context = "in file %s" % filename + context = filename #print("Reading file ENTRANCE {} / {}".format(settings.ENTRANCEDESCRIPTIONS, filename)) entrancecontentslist = getXML(contents, "entrance", maxItems = 1, context = context) if len(entrancecontentslist) != 1: @@ -199,7 +202,7 @@ def readcave(filename): # Note: these are HTML files in the EXPOWEB repo, not from the loser repo. with open(os.path.join(settings.CAVEDESCRIPTIONS, filename)) as f: contents = f.read() - context = " in file %s" % filename + context = filename cavecontentslist = getXML(contents, "cave", maxItems = 1, context = context) if len(cavecontentslist) != 1: message = f'! BAD CAVE at "{filename}"' @@ -310,13 +313,13 @@ def readcave(filename): if survex_file[0]: if not (Path(SURVEX_DATA) / survex_file[0]).is_file(): - message = f' ! survex filename does not exist :LOSER:"{survex_file[0]}" in "{filename}"' + message = f' ! {slug:12} survex filename does not exist :LOSER:"{survex_file[0]}" in "{filename}"' DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/') print(message) if description_file[0]: if not (Path(EXPOWEB) / description_file[0]).is_file(): - message = f' ! description filename does not exist :{EXPOWEB}:"{description_file[0]}" in "{filename}"' + message = f' ! {slug:12} description filename does not exist :{EXPOWEB}:"{description_file[0]}" in "{filename}"' DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/') print(message) #c.description_file="" # done only once, to clear out cruft. @@ -327,14 +330,14 @@ def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, if len(items) < minItems and printwarnings: message = " ! %(count)i %(itemname)s found, at least %(min)i expected" % {"count": len(items), "itemname": itemname, - "min": minItems} + context - DataIssue.objects.create(parser='caves', message=message) + "min": minItems} + " in file " + context + DataIssue.objects.create(parser='caves', message=message, url=""+context) print(message) if maxItems is not None and len(items) > maxItems and printwarnings: message = " ! %(count)i %(itemname)s found, no more than %(max)i expected in this XML unit " % {"count": len(items), "itemname": itemname, - "max": maxItems} + context + "max": maxItems} + " in file " + context DataIssue.objects.create(parser='caves', message=message) print(message) return items \ No newline at end of file diff --git a/templates/dataformat/entrance.xml b/templates/dataformat/entrance.xml index eb6b45a..e325c2b 100644 --- a/templates/dataformat/entrance.xml +++ b/templates/dataformat/entrance.xml @@ -2,6 +2,11 @@ <!-- Only put one entrance in this file --> <!-- This file is generated using the form documented at /handbook/survey/caveentry.html --> <!-- If you edit this file by hand, make sure you update the database by doing a full data import --> + + <!-- See http://expo.survex.com/handbook/survey/caveentryfields.html for current details on filling in this form --> + <!-- there is 1 required field: slug. The entrance will not appear without that. --> + <!-- ALWAYS use ü for u+Umlaut and ö for o+umlaut eg Höhle for Hohle and Glück for Gluck--> + <html lang="en"> <head> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/> diff --git a/urls.py b/urls.py index 1e0b080..ff67d0e 100644 --- a/urls.py +++ b/urls.py @@ -112,9 +112,9 @@ trogglepatterns = [ url(r'^cave/logbook/([^/]+)/?$', caves.caveLogbook), url(r'^(?P<karea>\d\d\d\d)(?P<subpath>.*)$', cavepage, name="cavepage"), # shorthand references such as /1623/264 - url(r'^getEntrances/(?P<caveslug>.*)', caves.get_entrances, name = "get_entrances"), #works e.g. /getEntrances/1623-161 + url(r'^getEntrances/(?P<caveslug>.*)', caves.get_entrances, name = "get_entrances"), #works e.g. /getEntrances/1623-161 # CASE SENSITIVE url(r'^entrance/(?P<caveslug>[^/]+)/(?P<slug>[^/]+)/edit/', caves.editEntrance, name = "editentrance"), - url(r'^entrance/new/(?P<caveslug>[^/]+)/', caves.editEntrance, name = "newentrance"), + url(r'^entrance/new/(?P<caveslug>[^/]+)/', caves.editEntrance, name = "newentrance"), # NOT WORKING url(r'^statistics/?$', statistics.stats, name="stats"),