2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2024-11-29 05:11:52 +00:00

more cave parsing data fixes

This commit is contained in:
Philip Sargent 2021-04-14 22:50:47 +01:00
parent d598a6d0f5
commit c2c7de4c59
3 changed files with 20 additions and 12 deletions

View File

@ -45,7 +45,7 @@ def readcaves():
# For those caves which do not have cave_data/1623-xxx.html XML files even though they exist and have surveys # For those caves which do not have cave_data/1623-xxx.html XML files even though they exist and have surveys
# also needs to be done *before* entrances so that the entrance-cave links work properly. # also needs to be done *before* entrances so that the entrance-cave links work properly.
pending = ["2007-04", "2007-05", "2007-06", "2007-07", "2007-12", "2009-01", "2009-02", pending = ["2007-05", "2007-06", "2007-12", "2009-01", "2009-02",
"2010-06", "2010-07", "2012-ns-01", "2012-ns-02", "2010-04", "2012-ns-05", "2012-ns-06", "2010-06", "2010-07", "2012-ns-01", "2012-ns-02", "2010-04", "2012-ns-05", "2012-ns-06",
"2012-ns-07", "2012-ns-08", "2012-ns-12", "2012-ns-14", "2012-ns-15", "2014-bl888", "2012-ns-07", "2012-ns-08", "2012-ns-12", "2012-ns-14", "2012-ns-15", "2014-bl888",
"2018-pf-01", "2018-pf-02", "haldenloch"] "2018-pf-01", "2018-pf-02", "haldenloch"]
@ -58,13 +58,16 @@ def readcaves():
underground_description = "Pending cave write-up - creating as empty object. No XML file available yet.", underground_description = "Pending cave write-up - creating as empty object. No XML file available yet.",
survex_file = "caves-1623/" + k + "/" + k +".svx", survex_file = "caves-1623/" + k + "/" + k +".svx",
url = url, url = url,
notes="_Survex file found in loser repo but no description in expoweb") notes="_Survex file found in loser repo but no description in expoweb <br>\n"+
"INSTRUCTIONS: First open 'This survex file' (link above the CaveView panel) to find the date and info. Then <br>\n" +
"search in the Expo for that year e.g. <a href='/expedition/2007'>2007</a> to find a relevant logbook entry, then <br>\n" +
"click on 'New Entrance' at the bottom of this page as we need to create the entrance *first*.")
if cave: if cave:
cave.save() # must save to have id before foreign keys work. This is also a ManyToMany key. cave.save() # must save to have id before foreign keys work. This is also a ManyToMany key.
#print(f' ! - READ CAVES: cave {k} {cave}') #print(f' ! - READ CAVES: cave {k} {cave}')
cave.area.add(area_1623[0]) cave.area.add(area_1623[0])
cave.save() cave.save()
message = " ! {:11s} {}".format(cave.unofficial_number, cave.underground_description) message = f" ! {k:12} {cave.underground_description}"
DataIssue.objects.create(parser='caves', message=message, url=url) DataIssue.objects.create(parser='caves', message=message, url=url)
print(message) print(message)
@ -108,7 +111,7 @@ def readentrance(filename):
# Note: these are HTML files in the EXPOWEB repo, not from the loser repo. # Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
with open(os.path.join(settings.ENTRANCEDESCRIPTIONS, filename)) as f: with open(os.path.join(settings.ENTRANCEDESCRIPTIONS, filename)) as f:
contents = f.read() contents = f.read()
context = "in file %s" % filename context = filename
#print("Reading file ENTRANCE {} / {}".format(settings.ENTRANCEDESCRIPTIONS, filename)) #print("Reading file ENTRANCE {} / {}".format(settings.ENTRANCEDESCRIPTIONS, filename))
entrancecontentslist = getXML(contents, "entrance", maxItems = 1, context = context) entrancecontentslist = getXML(contents, "entrance", maxItems = 1, context = context)
if len(entrancecontentslist) != 1: if len(entrancecontentslist) != 1:
@ -199,7 +202,7 @@ def readcave(filename):
# Note: these are HTML files in the EXPOWEB repo, not from the loser repo. # Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
with open(os.path.join(settings.CAVEDESCRIPTIONS, filename)) as f: with open(os.path.join(settings.CAVEDESCRIPTIONS, filename)) as f:
contents = f.read() contents = f.read()
context = " in file %s" % filename context = filename
cavecontentslist = getXML(contents, "cave", maxItems = 1, context = context) cavecontentslist = getXML(contents, "cave", maxItems = 1, context = context)
if len(cavecontentslist) != 1: if len(cavecontentslist) != 1:
message = f'! BAD CAVE at "{filename}"' message = f'! BAD CAVE at "{filename}"'
@ -310,13 +313,13 @@ def readcave(filename):
if survex_file[0]: if survex_file[0]:
if not (Path(SURVEX_DATA) / survex_file[0]).is_file(): if not (Path(SURVEX_DATA) / survex_file[0]).is_file():
message = f' ! survex filename does not exist :LOSER:"{survex_file[0]}" in "{filename}"' message = f' ! {slug:12} survex filename does not exist :LOSER:"{survex_file[0]}" in "{filename}"'
DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/') DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/')
print(message) print(message)
if description_file[0]: if description_file[0]:
if not (Path(EXPOWEB) / description_file[0]).is_file(): if not (Path(EXPOWEB) / description_file[0]).is_file():
message = f' ! description filename does not exist :{EXPOWEB}:"{description_file[0]}" in "{filename}"' message = f' ! {slug:12} description filename does not exist :{EXPOWEB}:"{description_file[0]}" in "{filename}"'
DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/') DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/')
print(message) print(message)
#c.description_file="" # done only once, to clear out cruft. #c.description_file="" # done only once, to clear out cruft.
@ -327,14 +330,14 @@ def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True,
if len(items) < minItems and printwarnings: if len(items) < minItems and printwarnings:
message = " ! %(count)i %(itemname)s found, at least %(min)i expected" % {"count": len(items), message = " ! %(count)i %(itemname)s found, at least %(min)i expected" % {"count": len(items),
"itemname": itemname, "itemname": itemname,
"min": minItems} + context "min": minItems} + " in file " + context
DataIssue.objects.create(parser='caves', message=message) DataIssue.objects.create(parser='caves', message=message, url=""+context)
print(message) print(message)
if maxItems is not None and len(items) > maxItems and printwarnings: if maxItems is not None and len(items) > maxItems and printwarnings:
message = " ! %(count)i %(itemname)s found, no more than %(max)i expected in this XML unit " % {"count": len(items), message = " ! %(count)i %(itemname)s found, no more than %(max)i expected in this XML unit " % {"count": len(items),
"itemname": itemname, "itemname": itemname,
"max": maxItems} + context "max": maxItems} + " in file " + context
DataIssue.objects.create(parser='caves', message=message) DataIssue.objects.create(parser='caves', message=message)
print(message) print(message)
return items return items

View File

@ -2,6 +2,11 @@
<!-- Only put one entrance in this file --> <!-- Only put one entrance in this file -->
<!-- This file is generated using the form documented at /handbook/survey/caveentry.html --> <!-- This file is generated using the form documented at /handbook/survey/caveentry.html -->
<!-- If you edit this file by hand, make sure you update the database by doing a full data import --> <!-- If you edit this file by hand, make sure you update the database by doing a full data import -->
<!-- See http://expo.survex.com/handbook/survey/caveentryfields.html for current details on filling in this form -->
<!-- there is 1 required field: slug. The entrance will not appear without that. -->
<!-- ALWAYS use &uuml; for u+Umlaut and &ouml; for o+umlaut eg H&ouml;hle for Hohle and Gl&uuml;ck for Gluck-->
<html lang="en"> <html lang="en">
<head> <head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>

View File

@ -112,9 +112,9 @@ trogglepatterns = [
url(r'^cave/logbook/([^/]+)/?$', caves.caveLogbook), url(r'^cave/logbook/([^/]+)/?$', caves.caveLogbook),
url(r'^(?P<karea>\d\d\d\d)(?P<subpath>.*)$', cavepage, name="cavepage"), # shorthand references such as /1623/264 url(r'^(?P<karea>\d\d\d\d)(?P<subpath>.*)$', cavepage, name="cavepage"), # shorthand references such as /1623/264
url(r'^getEntrances/(?P<caveslug>.*)', caves.get_entrances, name = "get_entrances"), #works e.g. /getEntrances/1623-161 url(r'^getEntrances/(?P<caveslug>.*)', caves.get_entrances, name = "get_entrances"), #works e.g. /getEntrances/1623-161 # CASE SENSITIVE
url(r'^entrance/(?P<caveslug>[^/]+)/(?P<slug>[^/]+)/edit/', caves.editEntrance, name = "editentrance"), url(r'^entrance/(?P<caveslug>[^/]+)/(?P<slug>[^/]+)/edit/', caves.editEntrance, name = "editentrance"),
url(r'^entrance/new/(?P<caveslug>[^/]+)/', caves.editEntrance, name = "newentrance"), url(r'^entrance/new/(?P<caveslug>[^/]+)/', caves.editEntrance, name = "newentrance"), # NOT WORKING
url(r'^statistics/?$', statistics.stats, name="stats"), url(r'^statistics/?$', statistics.stats, name="stats"),