troggle/parsers/caves.py

# -*- coding: utf-8 -*-
import os
import re

from django.conf import settings

import troggle.core.models as models
import troggle.core.models_caves as models_caves

def readcaves():
  # Clear the cave data issues as we are reloading
  # Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
  models.DataIssue.objects.filter(parser='caves').delete()

  area_1623 = models_caves.Area.objects.update_or_create(short_name = "1623", parent = None)
  area_1626 = models_caves.Area.objects.update_or_create(short_name = "1626", parent = None)
  print(" - Reading Entrances")
  #print "list of <Slug> <Filename>"
  for filename in next(os.walk(settings.ENTRANCEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files
    if filename.endswith('.html'):
      readentrance(filename)
  print (" - Reading Caves")
  for filename in next(os.walk(settings.CAVEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files
    if filename.endswith('.html'):
      readcave(filename)


def readentrance(filename):
  # Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
    with open(os.path.join(settings.ENTRANCEDESCRIPTIONS, filename)) as f:
        contents = f.read()
    context = "in file %s" % filename
    #print("Reading file ENTRANCE {} / {}".format(settings.ENTRANCEDESCRIPTIONS, filename))
    entrancecontentslist = getXML(contents, "entrance", maxItems = 1, context = context)
    if len(entrancecontentslist) == 1:
        entrancecontents = entrancecontentslist[0]
        non_public = getXML(entrancecontents, "non_public", maxItems = 1, context = context)
        name = getXML(entrancecontents, "name", maxItems = 1, context = context)
        slugs = getXML(entrancecontents, "slug", context = context)
        entrance_description = getXML(entrancecontents, "entrance_description", maxItems = 1, context = context)
        explorers = getXML(entrancecontents, "explorers", maxItems = 1, context = context)
        map_description = getXML(entrancecontents, "map_description", maxItems = 1, context = context)
        location_description = getXML(entrancecontents, "location_description", maxItems = 1, context = context)
        approach = getXML(entrancecontents, "approach", maxItems = 1, context = context)
        underground_description = getXML(entrancecontents, "underground_description", maxItems = 1, context = context)
        photo = getXML(entrancecontents, "photo", maxItems = 1, context = context)
        marking = getXML(entrancecontents, "marking", maxItems = 1, context = context)
        marking_comment = getXML(entrancecontents, "marking_comment", maxItems = 1, context = context)
        findability = getXML(entrancecontents, "findability", maxItems = 1, context = context)
        findability_description = getXML(entrancecontents, "findability_description", maxItems = 1, context = context)
        alt = getXML(entrancecontents, "alt", maxItems = 1, context = context)
        northing = getXML(entrancecontents, "northing", maxItems = 1, context = context)
        easting = getXML(entrancecontents, "easting", maxItems = 1, context = context)
        tag_station = getXML(entrancecontents, "tag_station", maxItems = 1, context = context)
        exact_station = getXML(entrancecontents, "exact_station", maxItems = 1, context = context)
        other_station = getXML(entrancecontents, "other_station", maxItems = 1, context = context)
        other_description = getXML(entrancecontents, "other_description", maxItems = 1, context = context)
        bearings = getXML(entrancecontents, "bearings", maxItems = 1, context = context)
        url = getXML(entrancecontents, "url", maxItems = 1, context = context)
        if len(non_public) == 1 and len(slugs) >= 1 and len(name) >= 1 and  len(entrance_description) == 1 and  len(explorers) == 1 and  len(map_description) == 1 and  len(location_description) == 1 and  len(approach) == 1 and  len(underground_description) == 1 and  len(marking) == 1 and  len(marking_comment) == 1 and  len(findability) == 1 and  len(findability_description) == 1 and  len(alt) == 1 and  len(northing) == 1 and  len(easting) == 1 and  len(tag_station) == 1 and  len(exact_station) == 1 and  len(other_station) == 1 and  len(other_description) == 1 and  len(bearings) == 1 and  len(url) == 1:
            e, state = models_caves.Entrance.objects.update_or_create(name = name[0],
                         non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]],
                         entrance_description = entrance_description[0],
                         explorers = explorers[0],
                         map_description = map_description[0],
                         location_description = location_description[0],
                         approach = approach[0],
                         underground_description = underground_description[0],
                         photo = photo[0],
                         marking = marking[0],
                         marking_comment = marking_comment[0],
                         findability = findability[0],
                         findability_description = findability_description[0],
                         alt = alt[0],
                         northing = northing[0],
                         easting = easting[0],
                         tag_station = tag_station[0],
                         exact_station = exact_station[0],
                         other_station = other_station[0],
                         other_description = other_description[0],
                         bearings = bearings[0],
                         url = url[0],
                         filename = filename,
                         cached_primary_slug = slugs[0])
            primary = True
            for slug in slugs:
                #print("entrance slug:{} filename:{}".format(slug, filename))
                try:
                    cs = models_caves.EntranceSlug.objects.update_or_create(entrance = e,
                                             slug = slug,
                                             primary = primary)
                except:
                    # need to cope with duplicates
                    print(" ! FAILED to get only one ENTRANCE when updating using: "+filename)
                    kents = models_caves.EntranceSlug.objects.all().filter(entrance = e,
                                                 slug = slug,
                                                 primary = primary)
                    for k in kents:
                        message = " ! - DUPLICATE in db. entrance:"+ str(k.entrance) + ", slug:" + str(k.slug()) 
                        models.DataIssue.objects.create(parser='caves', message=message)
                        print(message)
                    for k in kents:
                        if k.slug() != None:
                            print(" ! - OVERWRITING this one: slug:"+ str(k.slug()))
                            k.notes = "DUPLICATE entrance found on import. Please fix\n" + k.notes
                            c = k
                primary = False

def readcave(filename):
  # Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
    with open(os.path.join(settings.CAVEDESCRIPTIONS, filename)) as f:
        contents = f.read()
    context = " in file %s" % filename
    #print("Reading file CAVE  {}".format(filename))
    cavecontentslist = getXML(contents, "cave", maxItems = 1, context = context)
    #print cavecontentslist
    if len(cavecontentslist) == 1:
        cavecontents = cavecontentslist[0]
        non_public = getXML(cavecontents, "non_public", maxItems = 1, context = context)
        slugs = getXML(cavecontents, "caveslug", maxItems = 1, context = context)
        official_name = getXML(cavecontents, "official_name", maxItems = 1, context = context)
        areas = getXML(cavecontents, "area", context = context)
        kataster_code = getXML(cavecontents, "kataster_code", maxItems = 1, context = context)
        kataster_number = getXML(cavecontents, "kataster_number", maxItems = 1, context = context)
        unofficial_number = getXML(cavecontents, "unofficial_number", maxItems = 1, context = context)
        explorers = getXML(cavecontents, "explorers", maxItems = 1, context = context)
        underground_description = getXML(cavecontents, "underground_description", maxItems = 1, context = context)
        equipment = getXML(cavecontents, "equipment", maxItems = 1, context = context)
        references = getXML(cavecontents, "references", maxItems = 1, context = context)
        survey = getXML(cavecontents, "survey", maxItems = 1, context = context)
        kataster_status = getXML(cavecontents, "kataster_status", maxItems = 1, context = context)
        underground_centre_line = getXML(cavecontents, "underground_centre_line", maxItems = 1, context = context)
        notes = getXML(cavecontents, "notes", maxItems = 1, context = context)
        length = getXML(cavecontents, "length", maxItems = 1, context = context)
        depth = getXML(cavecontents, "depth", maxItems = 1, context = context)
        extent = getXML(cavecontents, "extent", maxItems = 1, context = context)
        survex_file = getXML(cavecontents, "survex_file", maxItems = 1, context = context)
        description_file = getXML(cavecontents, "description_file", maxItems = 1, context = context)
        url = getXML(cavecontents, "url", maxItems = 1, context = context)
        entrances = getXML(cavecontents, "entrance", context = context)
        if len(non_public) == 1 and len(slugs) >= 1 and len(official_name) == 1 and len(areas) >= 1 and len(kataster_code) == 1 and len(kataster_number) == 1 and len(unofficial_number) == 1 and len(explorers) == 1 and len(underground_description) == 1 and len(equipment) == 1 and len(references) == 1 and len(survey) == 1 and len(kataster_status) == 1 and len(underground_centre_line) == 1 and len(notes) == 1 and len(length) == 1 and len(depth) == 1 and len(extent) == 1 and len(survex_file) == 1 and len(description_file ) == 1 and len(url) == 1 and len(entrances) >= 1:
            try:
                c, state = models_caves.Cave.objects.update_or_create(non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]],
                         official_name = official_name[0],
                         kataster_code = kataster_code[0],
                         kataster_number = kataster_number[0],
                         unofficial_number = unofficial_number[0],
                         explorers = explorers[0],
                         underground_description = underground_description[0],
                         equipment = equipment[0],
                         references = references[0],
                         survey = survey[0],
                         kataster_status = kataster_status[0],
                         underground_centre_line = underground_centre_line[0],
                         notes = notes[0],
                         length = length[0],
                         depth = depth[0],
                         extent = extent[0],
                         survex_file = survex_file[0],
                         description_file = description_file[0],
                         url = url[0],
                         filename = filename)
            except:
                # need to cope with duplicates
                print(" ! FAILED to get only one CAVE when updating using: "+filename)
                kaves = models_caves.Cave.objects.all().filter(kataster_number=kataster_number[0])
                for k in kaves:
                    message = " ! - DUPLICATES in db. kataster:"+ str(k.kataster_number) + ", slug:" + str(k.slug()) 
                    models.DataIssue.objects.create(parser='caves', message=message)
                    print(message)
                for k in kaves:
                    if k.slug() != None:
                        print(" ! - OVERWRITING this one: slug:"+ str(k.slug()))
                        k.notes = "DUPLICATE kataster number found on import. Please fix\n" + k.notes
                        c = k
                
            for area_slug in areas:
                area = models_caves.Area.objects.filter(short_name = area_slug)
                if area:
                    newArea = area[0]
                else:
                    newArea = models_caves.Area(short_name = area_slug, parent = models_caves.Area.objects.get(short_name = "1623"))
                    newArea.save()
                c.area.add(newArea)
            primary = True
            for slug in slugs:
                try:
                    cs = models_caves.CaveSlug.objects.update_or_create(cave = c,
                              slug = slug,
                              primary = primary)
                except:
                    message = " ! Cave update/create failure: %s, skipping file %s" % (slug, context)
                    models.DataIssue.objects.create(parser='caves', message=message)
                    print(message)
                    
                primary = False
            for entrance in entrances:
                slug = getXML(entrance, "entranceslug", maxItems = 1, context = context)[0]
                letter = getXML(entrance, "letter", maxItems = 1, context = context)[0]
                try:
                    entrance = models_caves.Entrance.objects.get(entranceslug__slug = slug)
                    ce = models_caves.CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance)
                except:
                    message = " ! Entrance setting failure, slug: %s letter: %s" % (slug, letter)
                    models.DataIssue.objects.create(parser='caves', message=message)
                    print(message)
                

def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, context = ""):
    items = re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S)
    if len(items) < minItems and printwarnings:
        message = " ! %(count)i %(itemname)s found, at least %(min)i expected" % {"count": len(items),
                                                                           "itemname": itemname,
                                                                           "min": minItems} + context
        models.DataIssue.objects.create(parser='caves', message=message)
        print(message)
        
    if maxItems is not None and len(items) > maxItems and printwarnings:
        message = " ! %(count)i %(itemname)s found, no more than %(max)i expected" % {"count": len(items),
                                                                               "itemname": itemname,
                                                                               "max": maxItems} + context
        models.DataIssue.objects.create(parser='caves', message=message)
        print(message)
    return items
New parser for new cave format 2012-06-10 16:56:12 +01:00			`# -- coding: utf-8 --`
			`import os`
			`import re`

Convert codebase for python3 usage 2020-05-24 01:57:06 +01:00			`from django.conf import settings`

			`import troggle.core.models as models`
Moved classes to models_caves and fixed imports 2020-05-28 04:54:53 +01:00			`import troggle.core.models_caves as models_caves`
New parser for new cave format 2012-06-10 16:56:12 +01:00
			`def readcaves():`
Updating caves and entrances is no longer nuclear! Big overhaul of people processing, fullname added to the model lastname is now names -1 unless you only have one (yes you Wookey) this allows for Jon Arne Toft and Wookey to live it the same DB names can now have html chars in them, this should be real unicode but that can only happen when we go to Python 3! 2019-04-19 22:52:54 +01:00			`# Clear the cave data issues as we are reloading`
attempt to simplify wnt horribly wrong 2020-06-06 22:51:55 +01:00			`# Note: these are HTML files in the EXPOWEB repo, not from the loser repo.`
Updating caves and entrances is no longer nuclear! Big overhaul of people processing, fullname added to the model lastname is now names -1 unless you only have one (yes you Wookey) this allows for Jon Arne Toft and Wookey to live it the same DB names can now have html chars in them, this should be real unicode but that can only happen when we go to Python 3! 2019-04-19 22:52:54 +01:00			`models.DataIssue.objects.filter(parser='caves').delete()`

Moved classes to models_caves and fixed imports 2020-05-28 04:54:53 +01:00			`area_1623 = models_caves.Area.objects.update_or_create(short_name = "1623", parent = None)`
			`area_1626 = models_caves.Area.objects.update_or_create(short_name = "1626", parent = None)`
Updating caves and entrances is no longer nuclear! Big overhaul of people processing, fullname added to the model lastname is now names -1 unless you only have one (yes you Wookey) this allows for Jon Arne Toft and Wookey to live it the same DB names can now have html chars in them, this should be real unicode but that can only happen when we go to Python 3! 2019-04-19 22:52:54 +01:00			`print(" - Reading Entrances")`
Add some exception checking to parsers/caves.py so that missing entrance slugs don't blow up the import. Also reduce the noise, so you just get a warning about missing slugs printed out 2012-09-24 23:23:38 +01:00			`#print "list of <Slug> <Filename>"`
Convert codebase for python3 usage 2020-05-24 01:57:06 +01:00			`for filename in next(os.walk(settings.ENTRANCEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files`
Try and ignore files that don't end .html (We really need to change to .xml) eg .html.orig!! Change the index on troggle to move on with the year 2015-01-19 21:28:35 +00:00			`if filename.endswith('.html'):`
Make sure that cave parser only reads .html files in cave_data dir (to stop foo~ causing 'duplicate cave' error) 2018-06-18 23:17:05 +01:00			`readentrance(filename)`
Updating caves and entrances is no longer nuclear! Big overhaul of people processing, fullname added to the model lastname is now names -1 unless you only have one (yes you Wookey) this allows for Jon Arne Toft and Wookey to live it the same DB names can now have html chars in them, this should be real unicode but that can only happen when we go to Python 3! 2019-04-19 22:52:54 +01:00			`print (" - Reading Caves")`
Convert codebase for python3 usage 2020-05-24 01:57:06 +01:00			`for filename in next(os.walk(settings.CAVEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files`
Make sure that cave parser only reads .html files in cave_data dir (to stop foo~ causing 'duplicate cave' error) 2018-06-18 23:17:05 +01:00			`if filename.endswith('.html'):`
			`readcave(filename)`
New parser for new cave format 2012-06-10 16:56:12 +01:00
Moved classes to models_caves and fixed imports 2020-05-28 04:54:53 +01:00
New parser for new cave format 2012-06-10 16:56:12 +01:00			`def readentrance(filename):`
attempt to simplify wnt horribly wrong 2020-06-06 22:51:55 +01:00			`# Note: these are HTML files in the EXPOWEB repo, not from the loser repo.`
New parser for new cave format 2012-06-10 16:56:12 +01:00			`with open(os.path.join(settings.ENTRANCEDESCRIPTIONS, filename)) as f:`
			`contents = f.read()`
Add some exception checking to parsers/caves.py so that missing entrance slugs don't blow up the import. Also reduce the noise, so you just get a warning about missing slugs printed out 2012-09-24 23:23:38 +01:00			`context = "in file %s" % filename`
import fixes & statistics table 2020-06-12 18:10:07 +01:00			`#print("Reading file ENTRANCE {} / {}".format(settings.ENTRANCEDESCRIPTIONS, filename))`
New parser for new cave format 2012-06-10 16:56:12 +01:00			`entrancecontentslist = getXML(contents, "entrance", maxItems = 1, context = context)`
			`if len(entrancecontentslist) == 1:`
			`entrancecontents = entrancecontentslist[0]`
			`non_public = getXML(entrancecontents, "non_public", maxItems = 1, context = context)`
			`name = getXML(entrancecontents, "name", maxItems = 1, context = context)`
			`slugs = getXML(entrancecontents, "slug", context = context)`
			`entrance_description = getXML(entrancecontents, "entrance_description", maxItems = 1, context = context)`
			`explorers = getXML(entrancecontents, "explorers", maxItems = 1, context = context)`
			`map_description = getXML(entrancecontents, "map_description", maxItems = 1, context = context)`
			`location_description = getXML(entrancecontents, "location_description", maxItems = 1, context = context)`
			`approach = getXML(entrancecontents, "approach", maxItems = 1, context = context)`
			`underground_description = getXML(entrancecontents, "underground_description", maxItems = 1, context = context)`
			`photo = getXML(entrancecontents, "photo", maxItems = 1, context = context)`
			`marking = getXML(entrancecontents, "marking", maxItems = 1, context = context)`
			`marking_comment = getXML(entrancecontents, "marking_comment", maxItems = 1, context = context)`
			`findability = getXML(entrancecontents, "findability", maxItems = 1, context = context)`
			`findability_description = getXML(entrancecontents, "findability_description", maxItems = 1, context = context)`
			`alt = getXML(entrancecontents, "alt", maxItems = 1, context = context)`
			`northing = getXML(entrancecontents, "northing", maxItems = 1, context = context)`
			`easting = getXML(entrancecontents, "easting", maxItems = 1, context = context)`
			`tag_station = getXML(entrancecontents, "tag_station", maxItems = 1, context = context)`
			`exact_station = getXML(entrancecontents, "exact_station", maxItems = 1, context = context)`
			`other_station = getXML(entrancecontents, "other_station", maxItems = 1, context = context)`
			`other_description = getXML(entrancecontents, "other_description", maxItems = 1, context = context)`
			`bearings = getXML(entrancecontents, "bearings", maxItems = 1, context = context)`
			`url = getXML(entrancecontents, "url", maxItems = 1, context = context)`
			if len(non_public) == 1 and len(slugs) >= 1 and len(name) >= 1 and len(entrance_description) == 1 and len(explorers) == 1 and len(map_description) == 1 and len(location_description) == 1 and len(approach) == 1 and len(underground_description) == 1 and len(marking) == 1 and len(marking_comment) == 1 and len(findability) == 1 and len(findability_description) == 1 and len(alt) == 1 and len(northing) == 1 and len(easting) == 1 and len(tag_station) == 1 and len(exact_station) == 1 and len(other_station) == 1 and len(other_description) == 1 and len(bearings) == 1 and len(url) == 1:
Moved classes to models_caves and fixed imports 2020-05-28 04:54:53 +01:00			`e, state = models_caves.Entrance.objects.update_or_create(name = name[0],`
New parser for new cave format 2012-06-10 16:56:12 +01:00			`non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]],`
			`entrance_description = entrance_description[0],`
			`explorers = explorers[0],`
			`map_description = map_description[0],`
			`location_description = location_description[0],`
			`approach = approach[0],`
			`underground_description = underground_description[0],`
			`photo = photo[0],`
			`marking = marking[0],`
			`marking_comment = marking_comment[0],`
			`findability = findability[0],`
			`findability_description = findability_description[0],`
			`alt = alt[0],`
			`northing = northing[0],`
			`easting = easting[0],`
			`tag_station = tag_station[0],`
			`exact_station = exact_station[0],`
			`other_station = other_station[0],`
			`other_description = other_description[0],`
			`bearings = bearings[0],`
			`url = url[0],`
			`filename = filename,`
			`cached_primary_slug = slugs[0])`
			`primary = True`
			`for slug in slugs:`
import fixes & statistics table 2020-06-12 18:10:07 +01:00			`#print("entrance slug:{} filename:{}".format(slug, filename))`
			`try:`
			`cs = models_caves.EntranceSlug.objects.update_or_create(entrance = e,`
			`slug = slug,`
			`primary = primary)`
			`except:`
			`# need to cope with duplicates`
			`print(" ! FAILED to get only one ENTRANCE when updating using: "+filename)`
			`kents = models_caves.EntranceSlug.objects.all().filter(entrance = e,`
			`slug = slug,`
			`primary = primary)`
			`for k in kents:`
			`message = " ! - DUPLICATE in db. entrance:"+ str(k.entrance) + ", slug:" + str(k.slug())`
			`models.DataIssue.objects.create(parser='caves', message=message)`
			`print(message)`
bugfix returning multiple object catch 2020-06-13 01:26:28 +01:00			`for k in kents:`
import fixes & statistics table 2020-06-12 18:10:07 +01:00			`if k.slug() != None:`
			`print(" ! - OVERWRITING this one: slug:"+ str(k.slug()))`
			`k.notes = "DUPLICATE entrance found on import. Please fix\n" + k.notes`
			`c = k`
New parser for new cave format 2012-06-10 16:56:12 +01:00			`primary = False`

			`def readcave(filename):`
attempt to simplify wnt horribly wrong 2020-06-06 22:51:55 +01:00			`# Note: these are HTML files in the EXPOWEB repo, not from the loser repo.`
New parser for new cave format 2012-06-10 16:56:12 +01:00			`with open(os.path.join(settings.CAVEDESCRIPTIONS, filename)) as f:`
			`contents = f.read()`
			`context = " in file %s" % filename`
import fixes & statistics table 2020-06-12 18:10:07 +01:00			`#print("Reading file CAVE {}".format(filename))`
New parser for new cave format 2012-06-10 16:56:12 +01:00			`cavecontentslist = getXML(contents, "cave", maxItems = 1, context = context)`
			`#print cavecontentslist`
			`if len(cavecontentslist) == 1:`
			`cavecontents = cavecontentslist[0]`
			`non_public = getXML(cavecontents, "non_public", maxItems = 1, context = context)`
			`slugs = getXML(cavecontents, "caveslug", maxItems = 1, context = context)`
			`official_name = getXML(cavecontents, "official_name", maxItems = 1, context = context)`
			`areas = getXML(cavecontents, "area", context = context)`
			`kataster_code = getXML(cavecontents, "kataster_code", maxItems = 1, context = context)`
			`kataster_number = getXML(cavecontents, "kataster_number", maxItems = 1, context = context)`
			`unofficial_number = getXML(cavecontents, "unofficial_number", maxItems = 1, context = context)`
			`explorers = getXML(cavecontents, "explorers", maxItems = 1, context = context)`
			`underground_description = getXML(cavecontents, "underground_description", maxItems = 1, context = context)`
			`equipment = getXML(cavecontents, "equipment", maxItems = 1, context = context)`
			`references = getXML(cavecontents, "references", maxItems = 1, context = context)`
			`survey = getXML(cavecontents, "survey", maxItems = 1, context = context)`
			`kataster_status = getXML(cavecontents, "kataster_status", maxItems = 1, context = context)`
			`underground_centre_line = getXML(cavecontents, "underground_centre_line", maxItems = 1, context = context)`
			`notes = getXML(cavecontents, "notes", maxItems = 1, context = context)`
			`length = getXML(cavecontents, "length", maxItems = 1, context = context)`
			`depth = getXML(cavecontents, "depth", maxItems = 1, context = context)`
			`extent = getXML(cavecontents, "extent", maxItems = 1, context = context)`
			`survex_file = getXML(cavecontents, "survex_file", maxItems = 1, context = context)`
			`description_file = getXML(cavecontents, "description_file", maxItems = 1, context = context)`
			`url = getXML(cavecontents, "url", maxItems = 1, context = context)`
			`entrances = getXML(cavecontents, "entrance", context = context)`
			if len(non_public) == 1 and len(slugs) >= 1 and len(official_name) == 1 and len(areas) >= 1 and len(kataster_code) == 1 and len(kataster_number) == 1 and len(unofficial_number) == 1 and len(explorers) == 1 and len(underground_description) == 1 and len(equipment) == 1 and len(references) == 1 and len(survey) == 1 and len(kataster_status) == 1 and len(underground_centre_line) == 1 and len(notes) == 1 and len(length) == 1 and len(depth) == 1 and len(extent) == 1 and len(survex_file) == 1 and len(description_file ) == 1 and len(url) == 1 and len(entrances) >= 1:
Make import robust against duplicate kataster numbers 2020-06-07 17:49:58 +01:00			`try:`
			`c, state = models_caves.Cave.objects.update_or_create(non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]],`
			`official_name = official_name[0],`
			`kataster_code = kataster_code[0],`
			`kataster_number = kataster_number[0],`
			`unofficial_number = unofficial_number[0],`
			`explorers = explorers[0],`
			`underground_description = underground_description[0],`
			`equipment = equipment[0],`
			`references = references[0],`
			`survey = survey[0],`
			`kataster_status = kataster_status[0],`
			`underground_centre_line = underground_centre_line[0],`
			`notes = notes[0],`
			`length = length[0],`
			`depth = depth[0],`
			`extent = extent[0],`
			`survex_file = survex_file[0],`
			`description_file = description_file[0],`
			`url = url[0],`
			`filename = filename)`
			`except:`
			`# need to cope with duplicates`
import fixes & statistics table 2020-06-12 18:10:07 +01:00			`print(" ! FAILED to get only one CAVE when updating using: "+filename)`
Make import robust against duplicate kataster numbers 2020-06-07 17:49:58 +01:00			`kaves = models_caves.Cave.objects.all().filter(kataster_number=kataster_number[0])`
			`for k in kaves:`
			`message = " ! - DUPLICATES in db. kataster:"+ str(k.kataster_number) + ", slug:" + str(k.slug())`
			`models.DataIssue.objects.create(parser='caves', message=message)`
			`print(message)`
			`for k in kaves:`
			`if k.slug() != None:`
			`print(" ! - OVERWRITING this one: slug:"+ str(k.slug()))`
			`k.notes = "DUPLICATE kataster number found on import. Please fix\n" + k.notes`
			`c = k`

New parser for new cave format 2012-06-10 16:56:12 +01:00			`for area_slug in areas:`
Moved classes to models_caves and fixed imports 2020-05-28 04:54:53 +01:00			`area = models_caves.Area.objects.filter(short_name = area_slug)`
New parser for new cave format 2012-06-10 16:56:12 +01:00			`if area:`
			`newArea = area[0]`
			`else:`
Moved classes to models_caves and fixed imports 2020-05-28 04:54:53 +01:00			`newArea = models_caves.Area(short_name = area_slug, parent = models_caves.Area.objects.get(short_name = "1623"))`
New parser for new cave format 2012-06-10 16:56:12 +01:00			`newArea.save()`
			`c.area.add(newArea)`
			`primary = True`
			`for slug in slugs:`
Add some exception checking to parsers/caves.py so that missing entrance slugs don't blow up the import. Also reduce the noise, so you just get a warning about missing slugs printed out 2012-09-24 23:23:38 +01:00			`try:`
Moved classes to models_caves and fixed imports 2020-05-28 04:54:53 +01:00			`cs = models_caves.CaveSlug.objects.update_or_create(cave = c,`
New parser for new cave format 2012-06-10 16:56:12 +01:00			`slug = slug,`
			`primary = primary)`
Add some exception checking to parsers/caves.py so that missing entrance slugs don't blow up the import. Also reduce the noise, so you just get a warning about missing slugs printed out 2012-09-24 23:23:38 +01:00			`except:`
Make import robust against duplicate kataster numbers 2020-06-07 17:49:58 +01:00			`message = " ! Cave update/create failure: %s, skipping file %s" % (slug, context)`
Updating caves and entrances is no longer nuclear! Big overhaul of people processing, fullname added to the model lastname is now names -1 unless you only have one (yes you Wookey) this allows for Jon Arne Toft and Wookey to live it the same DB names can now have html chars in them, this should be real unicode but that can only happen when we go to Python 3! 2019-04-19 22:52:54 +01:00			`models.DataIssue.objects.create(parser='caves', message=message)`
			`print(message)`
Add some exception checking to parsers/caves.py so that missing entrance slugs don't blow up the import. Also reduce the noise, so you just get a warning about missing slugs printed out 2012-09-24 23:23:38 +01:00
New parser for new cave format 2012-06-10 16:56:12 +01:00			`primary = False`
			`for entrance in entrances:`
			`slug = getXML(entrance, "entranceslug", maxItems = 1, context = context)[0]`
			`letter = getXML(entrance, "letter", maxItems = 1, context = context)[0]`
Add some exception checking to parsers/caves.py so that missing entrance slugs don't blow up the import. Also reduce the noise, so you just get a warning about missing slugs printed out 2012-09-24 23:23:38 +01:00			`try:`
Moved classes to models_caves and fixed imports 2020-05-28 04:54:53 +01:00			`entrance = models_caves.Entrance.objects.get(entranceslug__slug = slug)`
			`ce = models_caves.CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance)`
Add some exception checking to parsers/caves.py so that missing entrance slugs don't blow up the import. Also reduce the noise, so you just get a warning about missing slugs printed out 2012-09-24 23:23:38 +01:00			`except:`
Make import robust against duplicate kataster numbers 2020-06-07 17:49:58 +01:00			`message = " ! Entrance setting failure, slug: %s letter: %s" % (slug, letter)`
Updating caves and entrances is no longer nuclear! Big overhaul of people processing, fullname added to the model lastname is now names -1 unless you only have one (yes you Wookey) this allows for Jon Arne Toft and Wookey to live it the same DB names can now have html chars in them, this should be real unicode but that can only happen when we go to Python 3! 2019-04-19 22:52:54 +01:00			`models.DataIssue.objects.create(parser='caves', message=message)`
			`print(message)`
Add some exception checking to parsers/caves.py so that missing entrance slugs don't blow up the import. Also reduce the noise, so you just get a warning about missing slugs printed out 2012-09-24 23:23:38 +01:00
New parser for new cave format 2012-06-10 16:56:12 +01:00
			`def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, context = ""):`
			`items = re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S)`
			`if len(items) < minItems and printwarnings:`
Thorough spring clean and profiling 2020-04-27 23:51:41 +01:00			`message = " ! %(count)i %(itemname)s found, at least %(min)i expected" % {"count": len(items),`
New parser for new cave format 2012-06-10 16:56:12 +01:00			`"itemname": itemname,`
Fix CSRF issues in svx form Set date formats Add DataIssue model and add errors to it to allow us to give people a list of stuff to fix 2019-04-14 22:45:31 +01:00			`"min": minItems} + context`
			`models.DataIssue.objects.create(parser='caves', message=message)`
			`print(message)`

New parser for new cave format 2012-06-10 16:56:12 +01:00			`if maxItems is not None and len(items) > maxItems and printwarnings:`
Thorough spring clean and profiling 2020-04-27 23:51:41 +01:00			`message = " ! %(count)i %(itemname)s found, no more than %(max)i expected" % {"count": len(items),`
New parser for new cave format 2012-06-10 16:56:12 +01:00			`"itemname": itemname,`
Fix CSRF issues in svx form Set date formats Add DataIssue model and add errors to it to allow us to give people a list of stuff to fix 2019-04-14 22:45:31 +01:00			`"max": maxItems} + context`
			`models.DataIssue.objects.create(parser='caves', message=message)`
			`print(message)`
Moved classes to models_caves and fixed imports 2020-05-28 04:54:53 +01:00			`return items`