From 19055d01fb4310f4ae241ac831270fd03dccb0bb Mon Sep 17 00:00:00 2001
From: Martin Green <martin.speleo@gmail.com>
Date: Sun, 10 Jun 2012 16:56:12 +0100
Subject: [PATCH] New parser for new cave format

---
 parsers/caves.py | 166 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 166 insertions(+)
 create mode 100644 parsers/caves.py

diff --git a/parsers/caves.py b/parsers/caves.py
new file mode 100644
index 0000000..43d3688
--- /dev/null
+++ b/parsers/caves.py
@@ -0,0 +1,166 @@
+# -*- coding: utf-8 -*-
+import troggle.core.models as models
+from django.conf import settings
+import os
+import re
+
+
+def readcaves():
+  newArea = models.Area(short_name = "1623", parent = None)
+  newArea.save()
+  print "Entrances"
+  for filename in os.walk(settings.ENTRANCEDESCRIPTIONS).next()[2]: #Should be a better way of getting a list of files
+    readentrance(filename)
+  print "Caves"
+  for filename in os.walk(settings.CAVEDESCRIPTIONS).next()[2]: #Should be a better way of getting a list of files
+    readcave(filename)
+
+def readentrance(filename):
+    with open(os.path.join(settings.ENTRANCEDESCRIPTIONS, filename)) as f:
+        contents = f.read()
+    context = " in file %s" % filename
+    entrancecontentslist = getXML(contents, "entrance", maxItems = 1, context = context)
+    if len(entrancecontentslist) == 1:
+        entrancecontents = entrancecontentslist[0]
+        non_public = getXML(entrancecontents, "non_public", maxItems = 1, context = context)
+        name = getXML(entrancecontents, "name", maxItems = 1, context = context)
+        slugs = getXML(entrancecontents, "slug", context = context)
+        entrance_description = getXML(entrancecontents, "entrance_description", maxItems = 1, context = context)
+        explorers = getXML(entrancecontents, "explorers", maxItems = 1, context = context)
+        map_description = getXML(entrancecontents, "map_description", maxItems = 1, context = context)
+        location_description = getXML(entrancecontents, "location_description", maxItems = 1, context = context)
+        approach = getXML(entrancecontents, "approach", maxItems = 1, context = context)
+        underground_description = getXML(entrancecontents, "underground_description", maxItems = 1, context = context)
+        photo = getXML(entrancecontents, "photo", maxItems = 1, context = context)
+        marking = getXML(entrancecontents, "marking", maxItems = 1, context = context)
+        marking_comment = getXML(entrancecontents, "marking_comment", maxItems = 1, context = context)
+        findability = getXML(entrancecontents, "findability", maxItems = 1, context = context)
+        findability_description = getXML(entrancecontents, "findability_description", maxItems = 1, context = context)
+        alt = getXML(entrancecontents, "alt", maxItems = 1, context = context)
+        northing = getXML(entrancecontents, "northing", maxItems = 1, context = context)
+        easting = getXML(entrancecontents, "easting", maxItems = 1, context = context)
+        tag_station = getXML(entrancecontents, "tag_station", maxItems = 1, context = context)
+        exact_station = getXML(entrancecontents, "exact_station", maxItems = 1, context = context)
+        other_station = getXML(entrancecontents, "other_station", maxItems = 1, context = context)
+        other_description = getXML(entrancecontents, "other_description", maxItems = 1, context = context)
+        bearings = getXML(entrancecontents, "bearings", maxItems = 1, context = context)
+        url = getXML(entrancecontents, "url", maxItems = 1, context = context)
+        if len(non_public) == 1 and len(slugs) >= 1 and len(name) >= 1 and  len(entrance_description) == 1 and  len(explorers) == 1 and  len(map_description) == 1 and  len(location_description) == 1 and  len(approach) == 1 and  len(underground_description) == 1 and  len(marking) == 1 and  len(marking_comment) == 1 and  len(findability) == 1 and  len(findability_description) == 1 and  len(alt) == 1 and  len(northing) == 1 and  len(easting) == 1 and  len(tag_station) == 1 and  len(exact_station) == 1 and  len(other_station) == 1 and  len(other_description) == 1 and  len(bearings) == 1 and  len(url) == 1:
+            e = models.Entrance(name = name[0],
+                         non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]],
+                         entrance_description = entrance_description[0],
+                         explorers = explorers[0],
+                         map_description = map_description[0],
+                         location_description = location_description[0],
+                         approach = approach[0],
+                         underground_description = underground_description[0],
+                         photo = photo[0],
+                         marking = marking[0],
+                         marking_comment = marking_comment[0],
+                         findability = findability[0],
+                         findability_description = findability_description[0],
+                         alt = alt[0],
+                         northing = northing[0],
+                         easting = easting[0],
+                         tag_station = tag_station[0],
+                         exact_station = exact_station[0],
+                         other_station = other_station[0],
+                         other_description = other_description[0],
+                         bearings = bearings[0],
+                         url = url[0],
+                         filename = filename,
+                         cached_primary_slug = slugs[0])
+            e.save()
+            primary = True
+            for slug in slugs:
+                cs = models.EntranceSlug(entrance = e,
+                                         slug = slug,
+                                         primary = primary)
+                cs.save()
+                primary = False
+
+def readcave(filename):
+    with open(os.path.join(settings.CAVEDESCRIPTIONS, filename)) as f:
+        contents = f.read()
+    context = " in file %s" % filename
+    cavecontentslist = getXML(contents, "cave", maxItems = 1, context = context)
+    #print cavecontentslist
+    if len(cavecontentslist) == 1:
+        cavecontents = cavecontentslist[0]
+        non_public = getXML(cavecontents, "non_public", maxItems = 1, context = context)
+        slugs = getXML(cavecontents, "caveslug", maxItems = 1, context = context)
+        official_name = getXML(cavecontents, "official_name", maxItems = 1, context = context)
+        areas = getXML(cavecontents, "area", context = context)
+        kataster_code = getXML(cavecontents, "kataster_code", maxItems = 1, context = context)
+        kataster_number = getXML(cavecontents, "kataster_number", maxItems = 1, context = context)
+        unofficial_number = getXML(cavecontents, "unofficial_number", maxItems = 1, context = context)
+        explorers = getXML(cavecontents, "explorers", maxItems = 1, context = context)
+        underground_description = getXML(cavecontents, "underground_description", maxItems = 1, context = context)
+        equipment = getXML(cavecontents, "equipment", maxItems = 1, context = context)
+        references = getXML(cavecontents, "references", maxItems = 1, context = context)
+        survey = getXML(cavecontents, "survey", maxItems = 1, context = context)
+        kataster_status = getXML(cavecontents, "kataster_status", maxItems = 1, context = context)
+        underground_centre_line = getXML(cavecontents, "underground_centre_line", maxItems = 1, context = context)
+        notes = getXML(cavecontents, "notes", maxItems = 1, context = context)
+        length = getXML(cavecontents, "length", maxItems = 1, context = context)
+        depth = getXML(cavecontents, "depth", maxItems = 1, context = context)
+        extent = getXML(cavecontents, "extent", maxItems = 1, context = context)
+        survex_file = getXML(cavecontents, "survex_file", maxItems = 1, context = context)
+        description_file = getXML(cavecontents, "description_file", maxItems = 1, context = context)
+        url = getXML(cavecontents, "url", maxItems = 1, context = context)
+        entrances = getXML(cavecontents, "entrance", context = context)
+        if len(non_public) == 1 and len(slugs) >= 1 and len(official_name) == 1 and len(areas) >= 1 and len(kataster_code) == 1 and len(kataster_number) == 1 and len(unofficial_number) == 1 and len(explorers) == 1 and len(underground_description) == 1 and len(equipment) == 1 and len(references) == 1 and len(survey) == 1 and len(kataster_status) == 1 and len(underground_centre_line) == 1 and len(notes) == 1 and len(length) == 1 and len(depth) == 1 and len(extent) == 1 and len(survex_file) == 1 and len(description_file ) == 1 and len(url) == 1 and len(entrances) >= 1:
+            c = models.Cave(non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]],
+                     official_name = official_name[0],
+                     kataster_code = kataster_code[0],
+                     kataster_number = kataster_number[0],
+                     unofficial_number = unofficial_number[0],
+                     explorers = explorers[0],
+                     underground_description = underground_description[0],
+                     equipment = equipment[0],
+                     references = references[0],
+                     survey = survey[0],
+                     kataster_status = kataster_status[0],
+                     underground_centre_line = underground_centre_line[0],
+                     notes = notes[0],
+                     length = length[0],
+                     depth = depth[0],
+                     extent = extent[0],
+                     survex_file = survex_file[0],
+                     description_file = description_file[0],
+                     url = url[0],
+                     filename = filename)
+            c.save()
+            for area_slug in areas:
+                area = models.Area.objects.filter(short_name = area_slug)
+                if area:
+                    newArea = area[0]
+                else:
+                    newArea = models.Area(short_name = area_slug, parent = models.Area.objects.get(short_name = "1623"))
+                    newArea.save()
+                c.area.add(newArea)
+            primary = True
+            for slug in slugs:
+                cs = models.CaveSlug(cave = c,
+                              slug = slug,
+                              primary = primary)
+                cs.save()
+                primary = False
+            for entrance in entrances:
+                slug = getXML(entrance, "entranceslug", maxItems = 1, context = context)[0]
+                letter = getXML(entrance, "letter", maxItems = 1, context = context)[0]
+                entrance = models.Entrance.objects.get(entranceslug__slug = slug)
+                ce = models.CaveAndEntrance(cave = c, entrance_letter = letter, entrance = entrance)
+                ce.save()
+
+def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, context = ""):
+    items = re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S)
+    if len(items) < minItems and printwarnings:
+        print "%(count)i %(itemname)s found, at least %(min)i expected" % {"count": len(items),
+                                                                           "itemname": itemname,
+                                                                           "min": minItems} + context
+    if maxItems is not None and len(items) > maxItems and printwarnings:
+        print "%(count)i %(itemname)s found, no more than %(max)i expected" % {"count": len(items),
+                                                                               "itemname": itemname,
+                                                                               "max": maxItems} + context
+    return items