forked from expo/troggle
b42249890e
Big overhaul of people processing, fullname added to the model lastname is now names -1 unless you only have one (yes you Wookey) this allows for Jon Arne Toft and Wookey to live it the same DB names can now have html chars in them, this should be real unicode but that can only happen when we go to Python 3!
188 lines
12 KiB
Python
188 lines
12 KiB
Python
# -*- coding: utf-8 -*-
|
|
import troggle.core.models as models
|
|
from django.conf import settings
|
|
import os
|
|
import re
|
|
|
|
|
|
def readcaves():
|
|
|
|
# Clear the cave data issues as we are reloading
|
|
models.DataIssue.objects.filter(parser='caves').delete()
|
|
|
|
area_1623 = models.Area.objects.update_or_create(short_name = "1623", parent = None)
|
|
area_1626 = models.Area.objects.update_or_create(short_name = "1626", parent = None)
|
|
print(" - Reading Entrances")
|
|
#print "list of <Slug> <Filename>"
|
|
for filename in os.walk(settings.ENTRANCEDESCRIPTIONS).next()[2]: #Should be a better way of getting a list of files
|
|
if filename.endswith('.html'):
|
|
readentrance(filename)
|
|
print (" - Reading Caves")
|
|
for filename in os.walk(settings.CAVEDESCRIPTIONS).next()[2]: #Should be a better way of getting a list of files
|
|
if filename.endswith('.html'):
|
|
readcave(filename)
|
|
|
|
def readentrance(filename):
|
|
with open(os.path.join(settings.ENTRANCEDESCRIPTIONS, filename)) as f:
|
|
contents = f.read()
|
|
context = "in file %s" % filename
|
|
entrancecontentslist = getXML(contents, "entrance", maxItems = 1, context = context)
|
|
if len(entrancecontentslist) == 1:
|
|
entrancecontents = entrancecontentslist[0]
|
|
non_public = getXML(entrancecontents, "non_public", maxItems = 1, context = context)
|
|
name = getXML(entrancecontents, "name", maxItems = 1, context = context)
|
|
slugs = getXML(entrancecontents, "slug", context = context)
|
|
entrance_description = getXML(entrancecontents, "entrance_description", maxItems = 1, context = context)
|
|
explorers = getXML(entrancecontents, "explorers", maxItems = 1, context = context)
|
|
map_description = getXML(entrancecontents, "map_description", maxItems = 1, context = context)
|
|
location_description = getXML(entrancecontents, "location_description", maxItems = 1, context = context)
|
|
approach = getXML(entrancecontents, "approach", maxItems = 1, context = context)
|
|
underground_description = getXML(entrancecontents, "underground_description", maxItems = 1, context = context)
|
|
photo = getXML(entrancecontents, "photo", maxItems = 1, context = context)
|
|
marking = getXML(entrancecontents, "marking", maxItems = 1, context = context)
|
|
marking_comment = getXML(entrancecontents, "marking_comment", maxItems = 1, context = context)
|
|
findability = getXML(entrancecontents, "findability", maxItems = 1, context = context)
|
|
findability_description = getXML(entrancecontents, "findability_description", maxItems = 1, context = context)
|
|
alt = getXML(entrancecontents, "alt", maxItems = 1, context = context)
|
|
northing = getXML(entrancecontents, "northing", maxItems = 1, context = context)
|
|
easting = getXML(entrancecontents, "easting", maxItems = 1, context = context)
|
|
tag_station = getXML(entrancecontents, "tag_station", maxItems = 1, context = context)
|
|
exact_station = getXML(entrancecontents, "exact_station", maxItems = 1, context = context)
|
|
other_station = getXML(entrancecontents, "other_station", maxItems = 1, context = context)
|
|
other_description = getXML(entrancecontents, "other_description", maxItems = 1, context = context)
|
|
bearings = getXML(entrancecontents, "bearings", maxItems = 1, context = context)
|
|
url = getXML(entrancecontents, "url", maxItems = 1, context = context)
|
|
if len(non_public) == 1 and len(slugs) >= 1 and len(name) >= 1 and len(entrance_description) == 1 and len(explorers) == 1 and len(map_description) == 1 and len(location_description) == 1 and len(approach) == 1 and len(underground_description) == 1 and len(marking) == 1 and len(marking_comment) == 1 and len(findability) == 1 and len(findability_description) == 1 and len(alt) == 1 and len(northing) == 1 and len(easting) == 1 and len(tag_station) == 1 and len(exact_station) == 1 and len(other_station) == 1 and len(other_description) == 1 and len(bearings) == 1 and len(url) == 1:
|
|
e, state = models.Entrance.objects.update_or_create(name = name[0],
|
|
non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]],
|
|
entrance_description = entrance_description[0],
|
|
explorers = explorers[0],
|
|
map_description = map_description[0],
|
|
location_description = location_description[0],
|
|
approach = approach[0],
|
|
underground_description = underground_description[0],
|
|
photo = photo[0],
|
|
marking = marking[0],
|
|
marking_comment = marking_comment[0],
|
|
findability = findability[0],
|
|
findability_description = findability_description[0],
|
|
alt = alt[0],
|
|
northing = northing[0],
|
|
easting = easting[0],
|
|
tag_station = tag_station[0],
|
|
exact_station = exact_station[0],
|
|
other_station = other_station[0],
|
|
other_description = other_description[0],
|
|
bearings = bearings[0],
|
|
url = url[0],
|
|
filename = filename,
|
|
cached_primary_slug = slugs[0])
|
|
primary = True
|
|
for slug in slugs:
|
|
#print slug, filename
|
|
cs = models.EntranceSlug.objects.update_or_create(entrance = e,
|
|
slug = slug,
|
|
primary = primary)
|
|
primary = False
|
|
|
|
def readcave(filename):
|
|
with open(os.path.join(settings.CAVEDESCRIPTIONS, filename)) as f:
|
|
contents = f.read()
|
|
context = " in file %s" % filename
|
|
#print "Reading file %s" % filename
|
|
cavecontentslist = getXML(contents, "cave", maxItems = 1, context = context)
|
|
#print cavecontentslist
|
|
if len(cavecontentslist) == 1:
|
|
cavecontents = cavecontentslist[0]
|
|
non_public = getXML(cavecontents, "non_public", maxItems = 1, context = context)
|
|
slugs = getXML(cavecontents, "caveslug", maxItems = 1, context = context)
|
|
official_name = getXML(cavecontents, "official_name", maxItems = 1, context = context)
|
|
areas = getXML(cavecontents, "area", context = context)
|
|
kataster_code = getXML(cavecontents, "kataster_code", maxItems = 1, context = context)
|
|
kataster_number = getXML(cavecontents, "kataster_number", maxItems = 1, context = context)
|
|
unofficial_number = getXML(cavecontents, "unofficial_number", maxItems = 1, context = context)
|
|
explorers = getXML(cavecontents, "explorers", maxItems = 1, context = context)
|
|
underground_description = getXML(cavecontents, "underground_description", maxItems = 1, context = context)
|
|
equipment = getXML(cavecontents, "equipment", maxItems = 1, context = context)
|
|
references = getXML(cavecontents, "references", maxItems = 1, context = context)
|
|
survey = getXML(cavecontents, "survey", maxItems = 1, context = context)
|
|
kataster_status = getXML(cavecontents, "kataster_status", maxItems = 1, context = context)
|
|
underground_centre_line = getXML(cavecontents, "underground_centre_line", maxItems = 1, context = context)
|
|
notes = getXML(cavecontents, "notes", maxItems = 1, context = context)
|
|
length = getXML(cavecontents, "length", maxItems = 1, context = context)
|
|
depth = getXML(cavecontents, "depth", maxItems = 1, context = context)
|
|
extent = getXML(cavecontents, "extent", maxItems = 1, context = context)
|
|
survex_file = getXML(cavecontents, "survex_file", maxItems = 1, context = context)
|
|
description_file = getXML(cavecontents, "description_file", maxItems = 1, context = context)
|
|
url = getXML(cavecontents, "url", maxItems = 1, context = context)
|
|
entrances = getXML(cavecontents, "entrance", context = context)
|
|
if len(non_public) == 1 and len(slugs) >= 1 and len(official_name) == 1 and len(areas) >= 1 and len(kataster_code) == 1 and len(kataster_number) == 1 and len(unofficial_number) == 1 and len(explorers) == 1 and len(underground_description) == 1 and len(equipment) == 1 and len(references) == 1 and len(survey) == 1 and len(kataster_status) == 1 and len(underground_centre_line) == 1 and len(notes) == 1 and len(length) == 1 and len(depth) == 1 and len(extent) == 1 and len(survex_file) == 1 and len(description_file ) == 1 and len(url) == 1 and len(entrances) >= 1:
|
|
c, state = models.Cave.objects.update_or_create(non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]],
|
|
official_name = official_name[0],
|
|
kataster_code = kataster_code[0],
|
|
kataster_number = kataster_number[0],
|
|
unofficial_number = unofficial_number[0],
|
|
explorers = explorers[0],
|
|
underground_description = underground_description[0],
|
|
equipment = equipment[0],
|
|
references = references[0],
|
|
survey = survey[0],
|
|
kataster_status = kataster_status[0],
|
|
underground_centre_line = underground_centre_line[0],
|
|
notes = notes[0],
|
|
length = length[0],
|
|
depth = depth[0],
|
|
extent = extent[0],
|
|
survex_file = survex_file[0],
|
|
description_file = description_file[0],
|
|
url = url[0],
|
|
filename = filename)
|
|
for area_slug in areas:
|
|
area = models.Area.objects.filter(short_name = area_slug)
|
|
if area:
|
|
newArea = area[0]
|
|
else:
|
|
newArea = models.Area(short_name = area_slug, parent = models.Area.objects.get(short_name = "1623"))
|
|
newArea.save()
|
|
c.area.add(newArea)
|
|
primary = True
|
|
for slug in slugs:
|
|
try:
|
|
cs = models.CaveSlug.objects.update_or_create(cave = c,
|
|
slug = slug,
|
|
primary = primary)
|
|
except:
|
|
message = "Can't find text (slug): %s, skipping %s" % (slug, context)
|
|
models.DataIssue.objects.create(parser='caves', message=message)
|
|
print(message)
|
|
|
|
primary = False
|
|
for entrance in entrances:
|
|
slug = getXML(entrance, "entranceslug", maxItems = 1, context = context)[0]
|
|
letter = getXML(entrance, "letter", maxItems = 1, context = context)[0]
|
|
try:
|
|
entrance = models.Entrance.objects.get(entranceslug__slug = slug)
|
|
ce = models.CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance)
|
|
except:
|
|
message = "Entrance text (slug) %s missing %s" % (slug, context)
|
|
models.DataIssue.objects.create(parser='caves', message=message)
|
|
print(message)
|
|
|
|
|
|
def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, context = ""):
|
|
items = re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S)
|
|
if len(items) < minItems and printwarnings:
|
|
message = "%(count)i %(itemname)s found, at least %(min)i expected" % {"count": len(items),
|
|
"itemname": itemname,
|
|
"min": minItems} + context
|
|
models.DataIssue.objects.create(parser='caves', message=message)
|
|
print(message)
|
|
|
|
if maxItems is not None and len(items) > maxItems and printwarnings:
|
|
message = "%(count)i %(itemname)s found, no more than %(max)i expected" % {"count": len(items),
|
|
"itemname": itemname,
|
|
"max": maxItems} + context
|
|
models.DataIssue.objects.create(parser='caves', message=message)
|
|
print(message)
|
|
return items
|