From c2029df3c9e253973d92812d8b3664a0c4197884 Mon Sep 17 00:00:00 2001 From: Martin Green Date: Sun, 10 Jun 2012 16:56:12 +0100 Subject: [PATCH] New parser for new cave format --- parsers/caves.py | 166 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 166 insertions(+) create mode 100644 parsers/caves.py diff --git a/parsers/caves.py b/parsers/caves.py new file mode 100644 index 0000000..43d3688 --- /dev/null +++ b/parsers/caves.py @@ -0,0 +1,166 @@ +# -*- coding: utf-8 -*- +import troggle.core.models as models +from django.conf import settings +import os +import re + + +def readcaves(): + newArea = models.Area(short_name = "1623", parent = None) + newArea.save() + print "Entrances" + for filename in os.walk(settings.ENTRANCEDESCRIPTIONS).next()[2]: #Should be a better way of getting a list of files + readentrance(filename) + print "Caves" + for filename in os.walk(settings.CAVEDESCRIPTIONS).next()[2]: #Should be a better way of getting a list of files + readcave(filename) + +def readentrance(filename): + with open(os.path.join(settings.ENTRANCEDESCRIPTIONS, filename)) as f: + contents = f.read() + context = " in file %s" % filename + entrancecontentslist = getXML(contents, "entrance", maxItems = 1, context = context) + if len(entrancecontentslist) == 1: + entrancecontents = entrancecontentslist[0] + non_public = getXML(entrancecontents, "non_public", maxItems = 1, context = context) + name = getXML(entrancecontents, "name", maxItems = 1, context = context) + slugs = getXML(entrancecontents, "slug", context = context) + entrance_description = getXML(entrancecontents, "entrance_description", maxItems = 1, context = context) + explorers = getXML(entrancecontents, "explorers", maxItems = 1, context = context) + map_description = getXML(entrancecontents, "map_description", maxItems = 1, context = context) + location_description = getXML(entrancecontents, "location_description", maxItems = 1, context = context) + approach = getXML(entrancecontents, "approach", maxItems = 1, context = context) + underground_description = getXML(entrancecontents, "underground_description", maxItems = 1, context = context) + photo = getXML(entrancecontents, "photo", maxItems = 1, context = context) + marking = getXML(entrancecontents, "marking", maxItems = 1, context = context) + marking_comment = getXML(entrancecontents, "marking_comment", maxItems = 1, context = context) + findability = getXML(entrancecontents, "findability", maxItems = 1, context = context) + findability_description = getXML(entrancecontents, "findability_description", maxItems = 1, context = context) + alt = getXML(entrancecontents, "alt", maxItems = 1, context = context) + northing = getXML(entrancecontents, "northing", maxItems = 1, context = context) + easting = getXML(entrancecontents, "easting", maxItems = 1, context = context) + tag_station = getXML(entrancecontents, "tag_station", maxItems = 1, context = context) + exact_station = getXML(entrancecontents, "exact_station", maxItems = 1, context = context) + other_station = getXML(entrancecontents, "other_station", maxItems = 1, context = context) + other_description = getXML(entrancecontents, "other_description", maxItems = 1, context = context) + bearings = getXML(entrancecontents, "bearings", maxItems = 1, context = context) + url = getXML(entrancecontents, "url", maxItems = 1, context = context) + if len(non_public) == 1 and len(slugs) >= 1 and len(name) >= 1 and len(entrance_description) == 1 and len(explorers) == 1 and len(map_description) == 1 and len(location_description) == 1 and len(approach) == 1 and len(underground_description) == 1 and len(marking) == 1 and len(marking_comment) == 1 and len(findability) == 1 and len(findability_description) == 1 and len(alt) == 1 and len(northing) == 1 and len(easting) == 1 and len(tag_station) == 1 and len(exact_station) == 1 and len(other_station) == 1 and len(other_description) == 1 and len(bearings) == 1 and len(url) == 1: + e = models.Entrance(name = name[0], + non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]], + entrance_description = entrance_description[0], + explorers = explorers[0], + map_description = map_description[0], + location_description = location_description[0], + approach = approach[0], + underground_description = underground_description[0], + photo = photo[0], + marking = marking[0], + marking_comment = marking_comment[0], + findability = findability[0], + findability_description = findability_description[0], + alt = alt[0], + northing = northing[0], + easting = easting[0], + tag_station = tag_station[0], + exact_station = exact_station[0], + other_station = other_station[0], + other_description = other_description[0], + bearings = bearings[0], + url = url[0], + filename = filename, + cached_primary_slug = slugs[0]) + e.save() + primary = True + for slug in slugs: + cs = models.EntranceSlug(entrance = e, + slug = slug, + primary = primary) + cs.save() + primary = False + +def readcave(filename): + with open(os.path.join(settings.CAVEDESCRIPTIONS, filename)) as f: + contents = f.read() + context = " in file %s" % filename + cavecontentslist = getXML(contents, "cave", maxItems = 1, context = context) + #print cavecontentslist + if len(cavecontentslist) == 1: + cavecontents = cavecontentslist[0] + non_public = getXML(cavecontents, "non_public", maxItems = 1, context = context) + slugs = getXML(cavecontents, "caveslug", maxItems = 1, context = context) + official_name = getXML(cavecontents, "official_name", maxItems = 1, context = context) + areas = getXML(cavecontents, "area", context = context) + kataster_code = getXML(cavecontents, "kataster_code", maxItems = 1, context = context) + kataster_number = getXML(cavecontents, "kataster_number", maxItems = 1, context = context) + unofficial_number = getXML(cavecontents, "unofficial_number", maxItems = 1, context = context) + explorers = getXML(cavecontents, "explorers", maxItems = 1, context = context) + underground_description = getXML(cavecontents, "underground_description", maxItems = 1, context = context) + equipment = getXML(cavecontents, "equipment", maxItems = 1, context = context) + references = getXML(cavecontents, "references", maxItems = 1, context = context) + survey = getXML(cavecontents, "survey", maxItems = 1, context = context) + kataster_status = getXML(cavecontents, "kataster_status", maxItems = 1, context = context) + underground_centre_line = getXML(cavecontents, "underground_centre_line", maxItems = 1, context = context) + notes = getXML(cavecontents, "notes", maxItems = 1, context = context) + length = getXML(cavecontents, "length", maxItems = 1, context = context) + depth = getXML(cavecontents, "depth", maxItems = 1, context = context) + extent = getXML(cavecontents, "extent", maxItems = 1, context = context) + survex_file = getXML(cavecontents, "survex_file", maxItems = 1, context = context) + description_file = getXML(cavecontents, "description_file", maxItems = 1, context = context) + url = getXML(cavecontents, "url", maxItems = 1, context = context) + entrances = getXML(cavecontents, "entrance", context = context) + if len(non_public) == 1 and len(slugs) >= 1 and len(official_name) == 1 and len(areas) >= 1 and len(kataster_code) == 1 and len(kataster_number) == 1 and len(unofficial_number) == 1 and len(explorers) == 1 and len(underground_description) == 1 and len(equipment) == 1 and len(references) == 1 and len(survey) == 1 and len(kataster_status) == 1 and len(underground_centre_line) == 1 and len(notes) == 1 and len(length) == 1 and len(depth) == 1 and len(extent) == 1 and len(survex_file) == 1 and len(description_file ) == 1 and len(url) == 1 and len(entrances) >= 1: + c = models.Cave(non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]], + official_name = official_name[0], + kataster_code = kataster_code[0], + kataster_number = kataster_number[0], + unofficial_number = unofficial_number[0], + explorers = explorers[0], + underground_description = underground_description[0], + equipment = equipment[0], + references = references[0], + survey = survey[0], + kataster_status = kataster_status[0], + underground_centre_line = underground_centre_line[0], + notes = notes[0], + length = length[0], + depth = depth[0], + extent = extent[0], + survex_file = survex_file[0], + description_file = description_file[0], + url = url[0], + filename = filename) + c.save() + for area_slug in areas: + area = models.Area.objects.filter(short_name = area_slug) + if area: + newArea = area[0] + else: + newArea = models.Area(short_name = area_slug, parent = models.Area.objects.get(short_name = "1623")) + newArea.save() + c.area.add(newArea) + primary = True + for slug in slugs: + cs = models.CaveSlug(cave = c, + slug = slug, + primary = primary) + cs.save() + primary = False + for entrance in entrances: + slug = getXML(entrance, "entranceslug", maxItems = 1, context = context)[0] + letter = getXML(entrance, "letter", maxItems = 1, context = context)[0] + entrance = models.Entrance.objects.get(entranceslug__slug = slug) + ce = models.CaveAndEntrance(cave = c, entrance_letter = letter, entrance = entrance) + ce.save() + +def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, context = ""): + items = re.findall("<%(itemname)s>(.*?)" % {"itemname": itemname}, text, re.S) + if len(items) < minItems and printwarnings: + print "%(count)i %(itemname)s found, at least %(min)i expected" % {"count": len(items), + "itemname": itemname, + "min": minItems} + context + if maxItems is not None and len(items) > maxItems and printwarnings: + print "%(count)i %(itemname)s found, no more than %(max)i expected" % {"count": len(items), + "itemname": itemname, + "max": maxItems} + context + return items