# -*- coding: utf-8 -*- import os import re from django.conf import settings import troggle.core.models as models import troggle.core.models_caves as models_caves def readcaves(): # Clear the cave data issues as we are reloading # Note: these are HTML files in the EXPOWEB repo, not from the loser repo. models.DataIssue.objects.filter(parser='caves').delete() area_1623 = models_caves.Area.objects.update_or_create(short_name = "1623", parent = None) area_1626 = models_caves.Area.objects.update_or_create(short_name = "1626", parent = None) print(" - Reading Entrances") #print "list of " for filename in next(os.walk(settings.ENTRANCEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files if filename.endswith('.html'): readentrance(filename) print (" - Reading Caves") for filename in next(os.walk(settings.CAVEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files if filename.endswith('.html'): readcave(filename) def readentrance(filename): # Note: these are HTML files in the EXPOWEB repo, not from the loser repo. with open(os.path.join(settings.ENTRANCEDESCRIPTIONS, filename)) as f: contents = f.read() context = "in file %s" % filename #print("Reading file ENTRANCE {} / {}".format(settings.ENTRANCEDESCRIPTIONS, filename)) entrancecontentslist = getXML(contents, "entrance", maxItems = 1, context = context) if len(entrancecontentslist) == 1: entrancecontents = entrancecontentslist[0] non_public = getXML(entrancecontents, "non_public", maxItems = 1, context = context) name = getXML(entrancecontents, "name", maxItems = 1, context = context) slugs = getXML(entrancecontents, "slug", context = context) entrance_description = getXML(entrancecontents, "entrance_description", maxItems = 1, context = context) explorers = getXML(entrancecontents, "explorers", maxItems = 1, context = context) map_description = getXML(entrancecontents, "map_description", maxItems = 1, context = context) location_description = getXML(entrancecontents, "location_description", maxItems = 1, context = context) approach = getXML(entrancecontents, "approach", maxItems = 1, context = context) underground_description = getXML(entrancecontents, "underground_description", maxItems = 1, context = context) photo = getXML(entrancecontents, "photo", maxItems = 1, context = context) marking = getXML(entrancecontents, "marking", maxItems = 1, context = context) marking_comment = getXML(entrancecontents, "marking_comment", maxItems = 1, context = context) findability = getXML(entrancecontents, "findability", maxItems = 1, context = context) findability_description = getXML(entrancecontents, "findability_description", maxItems = 1, context = context) alt = getXML(entrancecontents, "alt", maxItems = 1, context = context) northing = getXML(entrancecontents, "northing", maxItems = 1, context = context) easting = getXML(entrancecontents, "easting", maxItems = 1, context = context) tag_station = getXML(entrancecontents, "tag_station", maxItems = 1, context = context) exact_station = getXML(entrancecontents, "exact_station", maxItems = 1, context = context) other_station = getXML(entrancecontents, "other_station", maxItems = 1, context = context) other_description = getXML(entrancecontents, "other_description", maxItems = 1, context = context) bearings = getXML(entrancecontents, "bearings", maxItems = 1, context = context) url = getXML(entrancecontents, "url", maxItems = 1, context = context) if len(non_public) == 1 and len(slugs) >= 1 and len(name) >= 1 and len(entrance_description) == 1 and len(explorers) == 1 and len(map_description) == 1 and len(location_description) == 1 and len(approach) == 1 and len(underground_description) == 1 and len(marking) == 1 and len(marking_comment) == 1 and len(findability) == 1 and len(findability_description) == 1 and len(alt) == 1 and len(northing) == 1 and len(easting) == 1 and len(tag_station) == 1 and len(exact_station) == 1 and len(other_station) == 1 and len(other_description) == 1 and len(bearings) == 1 and len(url) == 1: e, state = models_caves.Entrance.objects.update_or_create(name = name[0], non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]], entrance_description = entrance_description[0], explorers = explorers[0], map_description = map_description[0], location_description = location_description[0], approach = approach[0], underground_description = underground_description[0], photo = photo[0], marking = marking[0], marking_comment = marking_comment[0], findability = findability[0], findability_description = findability_description[0], alt = alt[0], northing = northing[0], easting = easting[0], tag_station = tag_station[0], exact_station = exact_station[0], other_station = other_station[0], other_description = other_description[0], bearings = bearings[0], url = url[0], filename = filename, cached_primary_slug = slugs[0]) primary = True for slug in slugs: #print("entrance slug:{} filename:{}".format(slug, filename)) try: cs = models_caves.EntranceSlug.objects.update_or_create(entrance = e, slug = slug, primary = primary) except: # need to cope with duplicates print(" ! FAILED to get only one ENTRANCE when updating using: "+filename) kents = models_caves.EntranceSlug.objects.all().filter(entrance = e, slug = slug, primary = primary) for k in kents: message = " ! - DUPLICATE in db. entrance:"+ str(k.entrance) + ", slug:" + str(k.slug()) models.DataIssue.objects.create(parser='caves', message=message) print(message) for k in kents: if k.slug() != None: print(" ! - OVERWRITING this one: slug:"+ str(k.slug())) k.notes = "DUPLICATE entrance found on import. Please fix\n" + k.notes c = k primary = False def readcave(filename): # Note: these are HTML files in the EXPOWEB repo, not from the loser repo. with open(os.path.join(settings.CAVEDESCRIPTIONS, filename)) as f: contents = f.read() context = " in file %s" % filename #print("Reading file CAVE {}".format(filename)) cavecontentslist = getXML(contents, "cave", maxItems = 1, context = context) #print cavecontentslist if len(cavecontentslist) == 1: cavecontents = cavecontentslist[0] non_public = getXML(cavecontents, "non_public", maxItems = 1, context = context) slugs = getXML(cavecontents, "caveslug", maxItems = 1, context = context) official_name = getXML(cavecontents, "official_name", maxItems = 1, context = context) areas = getXML(cavecontents, "area", context = context) kataster_code = getXML(cavecontents, "kataster_code", maxItems = 1, context = context) kataster_number = getXML(cavecontents, "kataster_number", maxItems = 1, context = context) unofficial_number = getXML(cavecontents, "unofficial_number", maxItems = 1, context = context) explorers = getXML(cavecontents, "explorers", maxItems = 1, context = context) underground_description = getXML(cavecontents, "underground_description", maxItems = 1, context = context) equipment = getXML(cavecontents, "equipment", maxItems = 1, context = context) references = getXML(cavecontents, "references", maxItems = 1, context = context) survey = getXML(cavecontents, "survey", maxItems = 1, context = context) kataster_status = getXML(cavecontents, "kataster_status", maxItems = 1, context = context) underground_centre_line = getXML(cavecontents, "underground_centre_line", maxItems = 1, context = context) notes = getXML(cavecontents, "notes", maxItems = 1, context = context) length = getXML(cavecontents, "length", maxItems = 1, context = context) depth = getXML(cavecontents, "depth", maxItems = 1, context = context) extent = getXML(cavecontents, "extent", maxItems = 1, context = context) survex_file = getXML(cavecontents, "survex_file", maxItems = 1, context = context) description_file = getXML(cavecontents, "description_file", maxItems = 1, context = context) url = getXML(cavecontents, "url", maxItems = 1, context = context) entrances = getXML(cavecontents, "entrance", context = context) if len(non_public) == 1 and len(slugs) >= 1 and len(official_name) == 1 and len(areas) >= 1 and len(kataster_code) == 1 and len(kataster_number) == 1 and len(unofficial_number) == 1 and len(explorers) == 1 and len(underground_description) == 1 and len(equipment) == 1 and len(references) == 1 and len(survey) == 1 and len(kataster_status) == 1 and len(underground_centre_line) == 1 and len(notes) == 1 and len(length) == 1 and len(depth) == 1 and len(extent) == 1 and len(survex_file) == 1 and len(description_file ) == 1 and len(url) == 1 and len(entrances) >= 1: try: c, state = models_caves.Cave.objects.update_or_create(non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]], official_name = official_name[0], kataster_code = kataster_code[0], kataster_number = kataster_number[0], unofficial_number = unofficial_number[0], explorers = explorers[0], underground_description = underground_description[0], equipment = equipment[0], references = references[0], survey = survey[0], kataster_status = kataster_status[0], underground_centre_line = underground_centre_line[0], notes = notes[0], length = length[0], depth = depth[0], extent = extent[0], survex_file = survex_file[0], description_file = description_file[0], url = url[0], filename = filename) except: # need to cope with duplicates print(" ! FAILED to get only one CAVE when updating using: "+filename) kaves = models_caves.Cave.objects.all().filter(kataster_number=kataster_number[0]) for k in kaves: message = " ! - DUPLICATES in db. kataster:"+ str(k.kataster_number) + ", slug:" + str(k.slug()) models.DataIssue.objects.create(parser='caves', message=message) print(message) for k in kaves: if k.slug() != None: print(" ! - OVERWRITING this one: slug:"+ str(k.slug())) k.notes = "DUPLICATE kataster number found on import. Please fix\n" + k.notes c = k for area_slug in areas: area = models_caves.Area.objects.filter(short_name = area_slug) if area: newArea = area[0] else: newArea = models_caves.Area(short_name = area_slug, parent = models_caves.Area.objects.get(short_name = "1623")) newArea.save() c.area.add(newArea) primary = True for slug in slugs: try: cs = models_caves.CaveSlug.objects.update_or_create(cave = c, slug = slug, primary = primary) except: message = " ! Cave update/create failure: %s, skipping file %s" % (slug, context) models.DataIssue.objects.create(parser='caves', message=message) print(message) primary = False for entrance in entrances: slug = getXML(entrance, "entranceslug", maxItems = 1, context = context)[0] letter = getXML(entrance, "letter", maxItems = 1, context = context)[0] try: entrance = models_caves.Entrance.objects.get(entranceslug__slug = slug) ce = models_caves.CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance) except: message = " ! Entrance setting failure, slug: %s letter: %s" % (slug, letter) models.DataIssue.objects.create(parser='caves', message=message) print(message) def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, context = ""): items = re.findall("<%(itemname)s>(.*?)" % {"itemname": itemname}, text, re.S) if len(items) < minItems and printwarnings: message = " ! %(count)i %(itemname)s found, at least %(min)i expected" % {"count": len(items), "itemname": itemname, "min": minItems} + context models.DataIssue.objects.create(parser='caves', message=message) print(message) if maxItems is not None and len(items) > maxItems and printwarnings: message = " ! %(count)i %(itemname)s found, no more than %(max)i expected" % {"count": len(items), "itemname": itemname, "max": maxItems} + context models.DataIssue.objects.create(parser='caves', message=message) print(message) return items