xml parser attmpt retract

2023-04-22 23:15:50 +01:00 · 2023-04-22 23:15:50 +01:00 · c5a9bdc724
commit c5a9bdc724
parent 30ef427b90
3 changed files with 15 additions and 69 deletions
--- a/parsers/caves.py
+++ b/parsers/caves.py
@ -1,5 +1,7 @@
 import os
 import re
 from pathlib import Path
 from django.conf import settings
@ -426,6 +428,14 @@ def read_cave(filename, cave=None):
    If not called as part of initial import, then the global lists will not be correct
    but this is OK, a search will find them in the db.
    Attempted to use standard python3.11 xml library but fails on HTML entities (2023-04-23)
    import xml.etree.ElementTree as ET
    tree = ET.parse(fn)
    xml_root = tree.getroot()
    for t in ["html", "head", "body", "cave","non_public", "caveslug", "official_name","entrance"]:
        elements = xml_root.findall(t)
    """
    def getXMLmax1(field):
        return getXML(cavecontents, field, maxItems=1, context=context)
@ -492,6 +502,8 @@ def read_cave(filename, cave=None):
    # Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
    fn = settings.CAVEDESCRIPTIONS / filename
    context = filename
    # print(f" - Reading Cave from cave descriptions file {fn}")
    if not fn.exists():
        message = f" ! Cave_data file reading problem filename:'cave_data/{filename}'"
@ -501,7 +513,6 @@ def read_cave(filename, cave=None):
    with open(fn) as f:
        contents = f.read()
    context = filename
    cavecontentslist = getXML(contents, "cave", maxItems=1, context=context)
    if len(cavecontentslist) != 1:
@ -580,7 +591,7 @@ def read_cave(filename, cave=None):
    cave.url=url[0]
    areas = getXML(cavecontents, "area", context=context)
-    # cave.area_set.clear() # Need to find correct syntax. Does not delete previously loaded areas.. WARNING
+    cave.area.clear() # Deletes all links to areas in db
    for area_slug in areas:
        if area_slug in areas_xslug:
            newArea = areas_xslug[area_slug]
@ -630,70 +641,6 @@ def read_cave(filename, cave=None):
    cave.save()
    return cave
 # ChatGPT replacement attempt 2023-04-21. Obviously very incomplete, but some useful ideas
 # import os
 # import xml.etree.ElementTree as ET
 # class BadCaveException(Exception):
    # pass
 # class FailedCaveUpdateException(Exception):
    # pass
 # def read_cave_chatgpt(filename, entrances_xslug, caves_xslug, areas_xslug):
    # """Reads an entrance description from the .html file and updates the corresponding Cave object"""
    # tree = ET.parse(os.path.join(CAVEDESCRIPTIONS, filename))
    # root = tree.getroot()
    # cavecontents = root.find("cave")
    # if cavecontents is None:
        # raise BadCaveException(f'! BAD CAVE at "{filename}"')
    # non_public = cavecontents.findtext("non_public")
    # slugs = cavecontents.findtext("caveslug")
    # official_name = cavecontents.findtext("official_name")
    # kataster_code = cavecontents.findtext("kataster_code")
    # kataster_number = cavecontents.findtext("kataster_number")
    # unofficial_number = cavecontents.findtext("unofficial_number")
    # explorers = cavecontents.findtext("explorers")
    # underground_description = cavecontents.findtext("underground_description")
    # equipment = cavecontents.findtext("equipment")
    # references = cavecontents.findtext("references")
    # survey = cavecontents.findtext("survey")
    # kataster_status = cavecontents.findtext("kataster_status")
    # underground_centre_line = cavecontents.findtext("underground_centre_line")
    # notes = cavecontents.findtext("notes")
    # length = cavecontents.findtext("length")
    # depth = cavecontents.findtext("depth")
    # extent = cavecontents.findtext("extent")
    # survex_file = cavecontents.findtext("survex_file")
    # description_file = cavecontents.findtext("description_file")
    # url = cavecontents.findtext("url")
    # areas = cavecontents.findall("area")
    # entrances = cavecontents.findall("entrance")
    # if (
        # non_public is not None
 # # etc.
 # # wrong, some of these should be ==1 and some >=1 
    # ):
        # try:
            # cave = caves_xslug.get(kataster_number)
            # if cave is None:
                # cave = Cave.objects.create(
                    # non_public={
                        # "True": True,
                        # "False": False,
                        # "true": True,
                        # "false": False,
                    # }[non_public],
                    # official_name=official_name,
                    # # kataster [truncated]
 def readcaves():
    """Called from databaseReset mass importer.
    Reads the xml-format HTML 'cave' files in the EXPOWEB repo, the survex files from the loser repo.
@ -732,7 +679,6 @@ def readcaves():
    with transaction.atomic():
        area = get_area("1623")
        print(" - settings.CAVEDESCRIPTIONS: ", CAVEDESCRIPTIONS)
        print(" - Reading Entrances from entrance descriptions xml files")
        for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]:  # Should be a better way of getting a list of files
            read_entrance(filename)
--- a/templates/dataformat/cave.xml
+++ b/templates/dataformat/cave.xml
@ -12,7 +12,7 @@ though, you do not need to do a data import as it happens automatically -->
 <body>
 <b>This file is generated by troggle</b> on {{date}} UTC using the form documented at
 the form documented at 
-<a ="/handbook/survey/caveentry.html">handbook/survey/caveentry.html</a>
+<a href="/handbook/survey/caveentry.html">handbook/survey/caveentry.html</a>
 <br>
 <cave>
--- a/templates/dataformat/entrance.xml
+++ b/templates/dataformat/entrance.xml
@ -15,7 +15,7 @@ though, you do not need to do a data import as it happens automatically -->
 </head>
 <body>
 <b>This file is generated by troggle</b> on {{date}} UTC using the form documented at 
-<a ="/handbook/survey/ententry.html">handbook/survey/ententry.html</a>
+<a href="/handbook/survey/ententry.html">handbook/survey/ententry.html</a>
 <br>
 <entrance>
 <non_public>{{ entrance.non_public }}</non_public>