forked from expo/troggle
initial refactoring
This commit is contained in:
112
parsers/caves.py
112
parsers/caves.py
@@ -274,8 +274,11 @@ def do_pending_cave(k, url, area):
|
|||||||
|
|
||||||
def readentrance(filename):
|
def readentrance(filename):
|
||||||
"""Reads an entrance description from the .html file
|
"""Reads an entrance description from the .html file
|
||||||
Convoluted. Sorry.This is as I inherited it and I haven't fiddled with it. Needs rewriting
|
|
||||||
"""
|
"""
|
||||||
|
def getXMLmax1(field):
|
||||||
|
return getXML(entrancecontents, field, maxItems=1, context=context)
|
||||||
|
# return getXML(entrancecontents, field, maxItems=1, context=context)[0]
|
||||||
|
|
||||||
global entrances_xslug
|
global entrances_xslug
|
||||||
global caves_xslug
|
global caves_xslug
|
||||||
global areas_xslug
|
global areas_xslug
|
||||||
@@ -287,35 +290,43 @@ def readentrance(filename):
|
|||||||
# print("Reading file ENTRANCE {} / {}".format(ENTRANCEDESCRIPTIONS, filename))
|
# print("Reading file ENTRANCE {} / {}".format(ENTRANCEDESCRIPTIONS, filename))
|
||||||
entrancecontentslist = getXML(contents, "entrance", maxItems=1, context=context)
|
entrancecontentslist = getXML(contents, "entrance", maxItems=1, context=context)
|
||||||
if len(entrancecontentslist) != 1:
|
if len(entrancecontentslist) != 1:
|
||||||
message = f'! BAD ENTRANCE at "{filename}"'
|
message = f'! BAD ENTRANCE at "{filename}". Loading aborted. '
|
||||||
DataIssue.objects.create(parser="entrances", message=message)
|
DataIssue.objects.create(parser="entrances", message=message)
|
||||||
print(message)
|
print(message)
|
||||||
else:
|
else:
|
||||||
entrancecontents = entrancecontentslist[0]
|
entrancecontents = entrancecontentslist[0]
|
||||||
non_public = getXML(entrancecontents, "non_public", maxItems=1, context=context)
|
|
||||||
name = getXML(entrancecontents, "name", maxItems=1, context=context)
|
|
||||||
slugs = getXML(entrancecontents, "slug", context=context)
|
|
||||||
entrance_description = getXML(entrancecontents, "entrance_description", maxItems=1, context=context)
|
|
||||||
explorers = getXML(entrancecontents, "explorers", maxItems=1, context=context)
|
|
||||||
map_description = getXML(entrancecontents, "map_description", maxItems=1, context=context)
|
|
||||||
location_description = getXML(entrancecontents, "location_description", maxItems=1, context=context)
|
|
||||||
lastvisit = getXML(entrancecontents, "last visit date", maxItems=1, minItems=0, context=context)
|
lastvisit = getXML(entrancecontents, "last visit date", maxItems=1, minItems=0, context=context)
|
||||||
approach = getXML(entrancecontents, "approach", maxItems=1, context=context)
|
slugs = getXML(entrancecontents, "slug", context=context)
|
||||||
underground_description = getXML(entrancecontents, "underground_description", maxItems=1, context=context)
|
|
||||||
photo = getXML(entrancecontents, "photo", maxItems=1, context=context)
|
alt = getXMLmax1("alt")
|
||||||
marking = getXML(entrancecontents, "marking", maxItems=1, context=context)
|
approach = getXMLmax1("approach")
|
||||||
marking_comment = getXML(entrancecontents, "marking_comment", maxItems=1, context=context)
|
bearings = getXMLmax1("bearings")
|
||||||
findability = getXML(entrancecontents, "findability", maxItems=1, context=context)
|
easting = getXMLmax1("easting")
|
||||||
findability_description = getXML(entrancecontents, "findability_description", maxItems=1, context=context)
|
entrance_description = getXMLmax1("entrance_description")
|
||||||
alt = getXML(entrancecontents, "alt", maxItems=1, context=context)
|
exact_station = getXMLmax1("exact_station")
|
||||||
northing = getXML(entrancecontents, "northing", maxItems=1, context=context)
|
explorers = getXMLmax1("explorers")
|
||||||
easting = getXML(entrancecontents, "easting", maxItems=1, context=context)
|
findability = getXMLmax1("findability")
|
||||||
tag_station = getXML(entrancecontents, "tag_station", maxItems=1, context=context)
|
findability_description = getXMLmax1("findability_description")
|
||||||
exact_station = getXML(entrancecontents, "exact_station", maxItems=1, context=context)
|
location_description = getXMLmax1("location_description")
|
||||||
other_station = getXML(entrancecontents, "other_station", maxItems=1, context=context)
|
map_description = getXMLmax1("map_description")
|
||||||
other_description = getXML(entrancecontents, "other_description", maxItems=1, context=context)
|
marking = getXMLmax1("marking")
|
||||||
bearings = getXML(entrancecontents, "bearings", maxItems=1, context=context)
|
marking_comment = getXMLmax1("marking_comment")
|
||||||
url = getXML(entrancecontents, "url", maxItems=1, context=context)
|
name = getXMLmax1("name")
|
||||||
|
non_public = getXMLmax1("non_public")
|
||||||
|
northing = getXMLmax1("northing")
|
||||||
|
other_description = getXMLmax1("other_description")
|
||||||
|
other_station = getXMLmax1("other_station")
|
||||||
|
photo = getXMLmax1("photo")
|
||||||
|
tag_station = getXMLmax1("tag_station")
|
||||||
|
underground_description = getXMLmax1("underground_description")
|
||||||
|
url = getXMLmax1("url")
|
||||||
|
|
||||||
|
if len(slugs) >1:
|
||||||
|
# Only ever one of these per entrance in the expo dataset
|
||||||
|
message = f" ! - More than one slug for an entrance: {entrance}, slugs: {slugs}. Aborting."
|
||||||
|
DataIssue.objects.create(parser="entrances", message=message, url=f"/cave/{slug}/edit/")
|
||||||
|
print(message)
|
||||||
|
return
|
||||||
|
|
||||||
e, state = Entrance.objects.update_or_create(
|
e, state = Entrance.objects.update_or_create(
|
||||||
name=name[0],
|
name=name[0],
|
||||||
@@ -325,35 +336,31 @@ def readentrance(filename):
|
|||||||
"true": True,
|
"true": True,
|
||||||
"false": False,
|
"false": False,
|
||||||
}[non_public[0]],
|
}[non_public[0]],
|
||||||
entrance_description=entrance_description[0],
|
alt=alt[0],
|
||||||
explorers=explorers[0],
|
|
||||||
map_description=map_description[0],
|
|
||||||
location_description=location_description[0],
|
|
||||||
lastvisit=lastvisit[0],
|
|
||||||
approach=approach[0],
|
approach=approach[0],
|
||||||
underground_description=underground_description[0],
|
bearings=bearings[0],
|
||||||
photo=photo[0],
|
easting=easting[0],
|
||||||
marking=marking[0],
|
entrance_description=entrance_description[0],
|
||||||
marking_comment=marking_comment[0],
|
exact_station=exact_station[0],
|
||||||
|
explorers=explorers[0],
|
||||||
|
filename=filename,
|
||||||
findability=findability[0],
|
findability=findability[0],
|
||||||
findability_description=findability_description[0],
|
findability_description=findability_description[0],
|
||||||
alt=alt[0],
|
lastvisit=lastvisit[0],
|
||||||
|
location_description=location_description[0],
|
||||||
|
map_description=map_description[0],
|
||||||
|
marking=marking[0],
|
||||||
|
marking_comment=marking_comment[0],
|
||||||
northing=northing[0],
|
northing=northing[0],
|
||||||
easting=easting[0],
|
|
||||||
tag_station=tag_station[0],
|
|
||||||
exact_station=exact_station[0],
|
|
||||||
other_station=other_station[0],
|
|
||||||
other_description=other_description[0],
|
other_description=other_description[0],
|
||||||
bearings=bearings[0],
|
other_station=other_station[0],
|
||||||
url=url[0],
|
photo=photo[0],
|
||||||
filename=filename,
|
|
||||||
slug=slugs[0],
|
slug=slugs[0],
|
||||||
|
tag_station=tag_station[0],
|
||||||
|
underground_description=underground_description[0],
|
||||||
|
url=url[0],
|
||||||
)
|
)
|
||||||
if len(slugs) >1:
|
|
||||||
# Only ever one of these in the expo dataset
|
|
||||||
message = f" ! - More than one slug for an entrance: {entrance}, slugs: {slugs}. Aborting."
|
|
||||||
DataIssue.objects.create(parser="entrances", message=message, url=f"/cave/{slug}/edit/")
|
|
||||||
print(message)
|
|
||||||
|
|
||||||
def readcave(filename):
|
def readcave(filename):
|
||||||
"""Reads an enrance description from the .html file
|
"""Reads an enrance description from the .html file
|
||||||
@@ -538,10 +545,13 @@ def readcave(filename):
|
|||||||
print(message)
|
print(message)
|
||||||
|
|
||||||
|
|
||||||
def getXML(text, itemname, minItems=1, maxItems=None, printwarnings=True, context=""):
|
def getXML(text, itemname, minItems=1, maxItems=None, context=""):
|
||||||
"""Reads a single XML tag"""
|
"""Reads a single XML tag
|
||||||
|
Should throw exception rather than producing error message here,
|
||||||
|
then handle exception in calling routine where it has the context.
|
||||||
|
"""
|
||||||
items = re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S)
|
items = re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S)
|
||||||
if len(items) < minItems and printwarnings:
|
if len(items) < minItems:
|
||||||
message = (
|
message = (
|
||||||
" ! %(count)i x %(itemname)s found, at least %(min)i expected. Load ABORT. "
|
" ! %(count)i x %(itemname)s found, at least %(min)i expected. Load ABORT. "
|
||||||
% {"count": len(items), "itemname": itemname, "min": minItems}
|
% {"count": len(items), "itemname": itemname, "min": minItems}
|
||||||
@@ -551,7 +561,7 @@ def getXML(text, itemname, minItems=1, maxItems=None, printwarnings=True, contex
|
|||||||
DataIssue.objects.create(parser="caves", message=message, url="" + context)
|
DataIssue.objects.create(parser="caves", message=message, url="" + context)
|
||||||
print(message)
|
print(message)
|
||||||
|
|
||||||
if maxItems is not None and len(items) > maxItems and printwarnings:
|
if maxItems is not None and len(items) > maxItems:
|
||||||
message = (
|
message = (
|
||||||
" ! %(count)i x %(itemname)s found, no more than %(max)i expected in this XML unit. Load ABORT. "
|
" ! %(count)i x %(itemname)s found, no more than %(max)i expected in this XML unit. Load ABORT. "
|
||||||
% {"count": len(items), "itemname": itemname, "max": maxItems}
|
% {"count": len(items), "itemname": itemname, "max": maxItems}
|
||||||
|
|||||||
Reference in New Issue
Block a user