diff --git a/parsers/caves.py b/parsers/caves.py index 5389a8e..e1ec8dc 100644 --- a/parsers/caves.py +++ b/parsers/caves.py @@ -1,5 +1,7 @@ import os import re + + from pathlib import Path from django.conf import settings @@ -426,6 +428,14 @@ def read_cave(filename, cave=None): If not called as part of initial import, then the global lists will not be correct but this is OK, a search will find them in the db. + + Attempted to use standard python3.11 xml library but fails on HTML entities (2023-04-23) + import xml.etree.ElementTree as ET + tree = ET.parse(fn) + xml_root = tree.getroot() + for t in ["html", "head", "body", "cave","non_public", "caveslug", "official_name","entrance"]: + elements = xml_root.findall(t) + """ def getXMLmax1(field): return getXML(cavecontents, field, maxItems=1, context=context) @@ -492,6 +502,8 @@ def read_cave(filename, cave=None): # Note: these are HTML files in the EXPOWEB repo, not from the loser repo. fn = settings.CAVEDESCRIPTIONS / filename + context = filename + # print(f" - Reading Cave from cave descriptions file {fn}") if not fn.exists(): message = f" ! Cave_data file reading problem filename:'cave_data/{filename}'" @@ -501,7 +513,6 @@ def read_cave(filename, cave=None): with open(fn) as f: contents = f.read() - context = filename cavecontentslist = getXML(contents, "cave", maxItems=1, context=context) if len(cavecontentslist) != 1: @@ -580,7 +591,7 @@ def read_cave(filename, cave=None): cave.url=url[0] areas = getXML(cavecontents, "area", context=context) - # cave.area_set.clear() # Need to find correct syntax. Does not delete previously loaded areas.. WARNING + cave.area.clear() # Deletes all links to areas in db for area_slug in areas: if area_slug in areas_xslug: newArea = areas_xslug[area_slug] @@ -630,70 +641,6 @@ def read_cave(filename, cave=None): cave.save() return cave - -# ChatGPT replacement attempt 2023-04-21. Obviously very incomplete, but some useful ideas -# import os -# import xml.etree.ElementTree as ET - -# class BadCaveException(Exception): - # pass - -# class FailedCaveUpdateException(Exception): - # pass - -# def read_cave_chatgpt(filename, entrances_xslug, caves_xslug, areas_xslug): - # """Reads an entrance description from the .html file and updates the corresponding Cave object""" - # tree = ET.parse(os.path.join(CAVEDESCRIPTIONS, filename)) - # root = tree.getroot() - - # cavecontents = root.find("cave") - # if cavecontents is None: - # raise BadCaveException(f'! BAD CAVE at "{filename}"') - - # non_public = cavecontents.findtext("non_public") - # slugs = cavecontents.findtext("caveslug") - # official_name = cavecontents.findtext("official_name") - # kataster_code = cavecontents.findtext("kataster_code") - # kataster_number = cavecontents.findtext("kataster_number") - # unofficial_number = cavecontents.findtext("unofficial_number") - # explorers = cavecontents.findtext("explorers") - # underground_description = cavecontents.findtext("underground_description") - # equipment = cavecontents.findtext("equipment") - # references = cavecontents.findtext("references") - # survey = cavecontents.findtext("survey") - # kataster_status = cavecontents.findtext("kataster_status") - # underground_centre_line = cavecontents.findtext("underground_centre_line") - # notes = cavecontents.findtext("notes") - # length = cavecontents.findtext("length") - # depth = cavecontents.findtext("depth") - # extent = cavecontents.findtext("extent") - # survex_file = cavecontents.findtext("survex_file") - # description_file = cavecontents.findtext("description_file") - # url = cavecontents.findtext("url") - - # areas = cavecontents.findall("area") - # entrances = cavecontents.findall("entrance") - - # if ( - # non_public is not None - # # etc. - # # wrong, some of these should be ==1 and some >=1 - # ): - # try: - # cave = caves_xslug.get(kataster_number) - # if cave is None: - # cave = Cave.objects.create( - # non_public={ - # "True": True, - # "False": False, - # "true": True, - # "false": False, - # }[non_public], - # official_name=official_name, - # # kataster [truncated] - - - def readcaves(): """Called from databaseReset mass importer. Reads the xml-format HTML 'cave' files in the EXPOWEB repo, the survex files from the loser repo. @@ -732,7 +679,6 @@ def readcaves(): with transaction.atomic(): area = get_area("1623") - print(" - settings.CAVEDESCRIPTIONS: ", CAVEDESCRIPTIONS) print(" - Reading Entrances from entrance descriptions xml files") for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files read_entrance(filename) diff --git a/templates/dataformat/cave.xml b/templates/dataformat/cave.xml index 022e50e..dc66f14 100644 --- a/templates/dataformat/cave.xml +++ b/templates/dataformat/cave.xml @@ -12,7 +12,7 @@ though, you do not need to do a data import as it happens automatically -->
This file is generated by troggle on {{date}} UTC using the form documented at the form documented at -handbook/survey/caveentry.html +handbook/survey/caveentry.html