edit cave reads from HTML file not just db

2025-12-14 19:47:12 +00:00 · 2023-04-22 01:24:32 +01:00
parent 116cfc7c6e
commit 2ed66fe3d0
3 changed files with 342 additions and 197 deletions
--- a/parsers/caves.py
+++ b/parsers/caves.py
@@ -10,7 +10,12 @@ from troggle.core.models.logbooks import CaveSlug
 from troggle.core.models.troggle import DataIssue
 from troggle.settings import CAVEDESCRIPTIONS, ENTRANCEDESCRIPTIONS, EXPOWEB, SURVEX_DATA

-"""Reads all the cave description data by parsing the xml files (stored as e.g. :EXPOWEB:/cave_data/1623-161.html )
+"""Reads all the cave description data and entrance description data 
+by parsing the xml files stored as e.g. 
+:EXPOWEB:/cave_data/1623-161.html 
+or
+:EXPOWEB:/entrance_data/1623-161g.html 
+
 and creating the various Cave, Entrance and necessary Area objects.

 This is the first import that happens after the database is reinitialised. 
@@ -272,8 +277,41 @@ def do_pending_cave(k, url, area):
            print(message)
    return cave

-def readentrance(filename):
+def getXML(text, itemname, minItems=1, maxItems=None, context=""):
+    """Reads a single XML tag
+    Should throw exception rather than producing error message here,
+    then handle exception in calling routine where it has the context.
+    """
+    items = re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S)
+    if len(items) < minItems:
+        message = (
+            " ! %(count)i x %(itemname)s found, at least %(min)i expected. Load ABORT. "
+            % {"count": len(items), "itemname": itemname, "min": minItems}
+            + " in file "
+            + context
+        )
+        DataIssue.objects.create(parser="caves", message=message, url="" + context)
+        print(message)
+
+    if maxItems is not None and len(items) > maxItems:
+        message = (
+            " ! %(count)i x %(itemname)s found, no more than %(max)i expected in this XML unit. Load ABORT. "
+            % {"count": len(items), "itemname": itemname, "max": maxItems}
+            + " in file "
+            + context
+        )
+        DataIssue.objects.create(parser="caves", message=message)
+        print(message)
+    if minItems == 0:
+        if not items:
+            items = [""]
+    return items
+
+def readentrance(filename, ent=None):
    """Reads an entrance description from the .html file
+    
+    If not called as part of initial import, then the global lists will not be correct
+    but this is OK, a search will find them in the db.
    """
    def getXMLmax1(field):
        return getXML(entrancecontents, field, maxItems=1, context=context)
@@ -362,17 +400,50 @@ def readentrance(filename):
        )


-def readcave(filename):
+def readcave(filename, cave=None):
    """Reads an entrance description from the .html file
-    Convoluted. Sorry.This is as I inherited it and I haven't fiddled with it. Needs rewriting
+    Convoluted. Sorry. Needs rewriting
    Assumes any area it hasn't seen before is a subarea of 1623
+    
+    If not called as part of initial import, then the global lists will not be correct
+    but this is OK, a search will find them in the db.
    """
+    def do_entrances():
+        for entrance in entrances:
+            eslug = getXML(entrance, "entranceslug", maxItems=1, context=context)[0]
+            letter = getXML(entrance, "letter", maxItems=1, context=context)[0]
+            if len(entrances) == 1 and not eslug:  # may be empty: <entranceslug></entranceslug>
+                set_dummy_entrance(slug[5:], slug, c, msg="DUMMY: no entrance slug read from file")
+            else:
+                try:
+                    if eslug in entrances_xslug:
+                        entrance = entrances_xslug[eslug]
+                    else:
+                        # entrance = Entrance.objects.get(entranceslug__slug=eslug)
+                        entrance = Entrance.objects.get(slug=eslug)
+                        entrances_xslug[eslug] = entrance
+                    CaveAndEntrance.objects.update_or_create(
+                        cave=c, entrance_letter=letter, entrance=entrance
+                    )
+                except:
+                    message = f' ! Entrance setting failure, slug:"{slug}" #entrances:{len(entrances)} {entrance} letter:"{letter}" cave:"{c}" filename:"cave_data/{filename}"'
+                    DataIssue.objects.create(parser="entrances", message=message, url=f"{c.url}_edit/")
+                    print(message)        
+                    
    global entrances_xslug
    global caves_xslug
    global areas_xslug

    # Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
-    with open(os.path.join(CAVEDESCRIPTIONS, filename)) as f:
+    fn = settings.CAVEDESCRIPTIONS / filename
+    # print(f" - Reading Cave from cave descriptions file {fn}")
+    if not fn.exists():
+        message = f" ! Cave_data file reading problem filename:'cave_data/{filename}'"
+        DataIssue.objects.create(parser="caves", message=message, url=None)
+        print(message)
+        return
+
+    with open(fn) as f:
        contents = f.read()
    context = filename
    cavecontentslist = getXML(contents, "cave", maxItems=1, context=context)
@@ -380,204 +451,271 @@ def readcave(filename):
        message = f'! BAD CAVE at "{filename}"'
        DataIssue.objects.create(parser="caves", message=message)
        print(message)
-    else:
-        cavecontents = cavecontentslist[0]
-        non_public = getXML(cavecontents, "non_public", maxItems=1, context=context)
-        slugs = getXML(cavecontents, "caveslug", maxItems=1, context=context)
-        official_name = getXML(cavecontents, "official_name", maxItems=1, context=context)
-        areas = getXML(cavecontents, "area", context=context)
-        kataster_code = getXML(cavecontents, "kataster_code", maxItems=1, context=context)
-        kataster_number = getXML(cavecontents, "kataster_number", maxItems=1, context=context)
-        unofficial_number = getXML(cavecontents, "unofficial_number", maxItems=1, context=context)
-        explorers = getXML(cavecontents, "explorers", maxItems=1, context=context)
-        underground_description = getXML(cavecontents, "underground_description", maxItems=1, context=context)
-        equipment = getXML(cavecontents, "equipment", maxItems=1, context=context)
-        references = getXML(cavecontents, "references", maxItems=1, context=context)
-        survey = getXML(cavecontents, "survey", maxItems=1, context=context)
-        kataster_status = getXML(cavecontents, "kataster_status", maxItems=1, context=context)
-        underground_centre_line = getXML(cavecontents, "underground_centre_line", maxItems=1, context=context)
-        notes = getXML(cavecontents, "notes", maxItems=1, context=context)
-        length = getXML(cavecontents, "length", maxItems=1, context=context)
-        depth = getXML(cavecontents, "depth", maxItems=1, context=context)
-        extent = getXML(cavecontents, "extent", maxItems=1, context=context)
-        survex_file = getXML(cavecontents, "survex_file", maxItems=1, context=context)
-        description_file = getXML(cavecontents, "description_file", maxItems=1, context=context)
-        url = getXML(cavecontents, "url", maxItems=1, context=context)
-        entrances = getXML(cavecontents, "entrance", context=context)
+        return
+        
+    cavecontents = cavecontentslist[0]
+    non_public = getXML(cavecontents, "non_public", maxItems=1, context=context)
+    slugs = getXML(cavecontents, "caveslug", maxItems=1, context=context)
+    official_name = getXML(cavecontents, "official_name", maxItems=1, context=context)
+    areas = getXML(cavecontents, "area", context=context)
+    kataster_code = getXML(cavecontents, "kataster_code", maxItems=1, context=context)
+    kataster_number = getXML(cavecontents, "kataster_number", maxItems=1, context=context)
+    unofficial_number = getXML(cavecontents, "unofficial_number", maxItems=1, context=context)
+    explorers = getXML(cavecontents, "explorers", maxItems=1, context=context)
+    underground_description = getXML(cavecontents, "underground_description", maxItems=1, context=context)
+    equipment = getXML(cavecontents, "equipment", maxItems=1, context=context)
+    references = getXML(cavecontents, "references", maxItems=1, context=context)
+    survey = getXML(cavecontents, "survey", maxItems=1, context=context)
+    kataster_status = getXML(cavecontents, "kataster_status", maxItems=1, context=context)
+    underground_centre_line = getXML(cavecontents, "underground_centre_line", maxItems=1, context=context)
+    notes = getXML(cavecontents, "notes", maxItems=1, context=context)
+    length = getXML(cavecontents, "length", maxItems=1, context=context)
+    depth = getXML(cavecontents, "depth", maxItems=1, context=context)
+    extent = getXML(cavecontents, "extent", maxItems=1, context=context)
+    survex_file = getXML(cavecontents, "survex_file", maxItems=1, context=context)
+    description_file = getXML(cavecontents, "description_file", maxItems=1, context=context)
+    url = getXML(cavecontents, "url", maxItems=1, context=context)
+    entrances = getXML(cavecontents, "entrance", context=context)

-        if (
-            len(non_public) == 1
-            and len(slugs) >= 1
-            and len(official_name) == 1
-            and len(areas) >= 1
-            and len(kataster_code) == 1
-            and len(kataster_number) == 1
-            and len(unofficial_number) == 1
-            and len(explorers) == 1
-            and len(underground_description) == 1
-            and len(equipment) == 1
-            and len(references) == 1
-            and len(survey) == 1
-            and len(kataster_status) == 1
-            and len(underground_centre_line) == 1
-            and len(notes) == 1
-            and len(length) == 1
-            and len(depth) == 1
-            and len(extent) == 1
-            and len(survex_file) == 1
-            and len(description_file) == 1
-            and len(url) == 1
-        ):
-            try:
-                c, state = Cave.objects.update_or_create(
-                    non_public={
-                        "True": True,
-                        "False": False,
-                        "true": True,
-                        "false": False,
-                    }[non_public[0]],
-                    official_name=official_name[0],
-                    kataster_code=kataster_code[0],
-                    kataster_number=kataster_number[0],
-                    unofficial_number=unofficial_number[0],
-                    explorers=explorers[0],
-                    underground_description=underground_description[0],
-                    equipment=equipment[0],
-                    references=references[0],
-                    survey=survey[0],
-                    kataster_status=kataster_status[0],
-                    underground_centre_line=underground_centre_line[0],
-                    notes=notes[0],
-                    length=length[0],
-                    depth=depth[0],
-                    extent=extent[0],
-                    survex_file=survex_file[0],
-                    description_file=description_file[0],
-                    url=url[0],
-                    filename=filename,
-                )
-            except:
-                print(" ! FAILED to get only one CAVE when updating using: " + filename)
-                kaves = Cave.objects.all().filter(kataster_number=kataster_number[0])
-                for k in kaves:
-                    message = " ! - DUPLICATES in db. kataster:" + str(k.kataster_number) + ", slug:" + str(k.slug())
-                    DataIssue.objects.create(parser="caves", message=message)
-                    print(message)
-                for k in kaves:
-                    if k.slug() is not None:
-                        print(" ! - OVERWRITING this one: slug:" + str(k.slug()))
-                        k.notes = "DUPLICATE kataster number found on import. Please fix\n" + k.notes
-                        c = k
-
-            for area_slug in areas:
-                if area_slug in areas_xslug:
-                    newArea = areas_xslug[area_slug]
-                else:
-                    area = Area.objects.filter(short_name=area_slug)
-                    if area:
-                        newArea = area[0]
-                    else:
-                        newArea = Area(short_name=area_slug, super=Area.objects.get(short_name="1623"))
-                        newArea.save()
-                    areas_xslug[area_slug] = newArea
-                c.area.add(newArea)
-            primary = True # this sets the first thing we find to be primary=True and all the others =False
-            for slug in slugs:
-                if slug in caves_xslug:
-                    cs = caves_xslug[slug]
-                else:
-                    try:  # we want to overwrite a PENDING cave if we are now importing the 1623-xxx.html file for it
-                        cs = CaveSlug.objects.update_or_create(cave=c, slug=slug, primary=primary)
-                        caves_xslug[slug] = cs
-                    except Exception as ex:
-                        # This fails to do an update! It just crashes.. to be fixed
-                        message = f" ! Cave update/create failure : {slug}, skipping file cave_data/{context} with exception\nException: {ex.__class__}"
-                        DataIssue.objects.create(parser="caves", message=message)
-                        print(message)
-
-                primary = False
-
-            if not entrances or len(entrances) < 1:
-                # missing entrance link in cave_data/1623-* .html file
-                set_dummy_entrance(slug[5:], slug, c, msg="DUMMY: no entrances")
-            else:
-                for entrance in entrances:
-                    eslug = getXML(entrance, "entranceslug", maxItems=1, context=context)[0]
-                    letter = getXML(entrance, "letter", maxItems=1, context=context)[0]
-                    if len(entrances) == 1 and not eslug:  # may be empty: <entranceslug></entranceslug>
-                        set_dummy_entrance(slug[5:], slug, c, msg="DUMMY: no entrance slug read from file")
-                    else:
-                        try:
-                            if eslug in entrances_xslug:
-                                entrance = entrances_xslug[eslug]
-                            else:
-                                # entrance = Entrance.objects.get(entranceslug__slug=eslug)
-                                entrance = Entrance.objects.get(slug=eslug)
-                                entrances_xslug[eslug] = entrance
-                            CaveAndEntrance.objects.update_or_create(
-                                cave=c, entrance_letter=letter, entrance=entrance
-                            )
-                        except:
-                            message = f' ! Entrance setting failure, slug:"{slug}" #entrances:{len(entrances)} {entrance} letter:"{letter}" cave:"{c}" filename:"cave_data/{filename}"'
-                            DataIssue.objects.create(parser="entrances", message=message, url=f"{c.url}_edit/")
-                            print(message)
-
-            if survex_file[0]:
-                if not (Path(SURVEX_DATA) / survex_file[0]).is_file():
-                    message = f' ! {slug:12} survex filename does not exist :LOSER:"{survex_file[0]}" in "{filename}"'
-                    DataIssue.objects.create(parser="caves", message=message, url=f"/{slug[0:4]}/{slug}_cave_edit/")
-                    print(message)
-
-            if description_file[0]:  # if not an empty string
-                message = f' - {slug:12} Note (not an error): complex description filename "{description_file[0]}" inside "{CAVEDESCRIPTIONS}/{filename}"'
-                DataIssue.objects.create(parser="caves ok", message=message, url=f"/{slug}_cave_edit/")
-                print(message)
-
-                if not (Path(EXPOWEB) / description_file[0]).is_file():
-                    message = f' ! {slug:12} description filename  "{EXPOWEB}/{description_file[0]}" does not refer to a real file'
-                    DataIssue.objects.create(parser="caves", message=message, url=f"/{slug}_cave_edit/")
-                    print(message)
-                    # c.description_file="" # done only once, to clear out cruft.
-                    # c.save()
-        else:  # more than one item in long list
-            message = f' ! ABORT loading this cave. in "{filename}"'
-            DataIssue.objects.create(parser="caves", message=message, url=f"/{slug}_cave_edit/")
+    if not (
+        len(non_public) == 1
+        and len(slugs) >= 1 # is this really correct ?
+        and len(official_name) == 1
+        and len(areas) >= 1 # we want to stop using the sub-ares in 2023
+        and len(kataster_code) == 1
+        and len(kataster_number) == 1
+        and len(unofficial_number) == 1
+        and len(explorers) == 1
+        and len(underground_description) == 1
+        and len(equipment) == 1
+        and len(references) == 1
+        and len(survey) == 1
+        and len(kataster_status) == 1
+        and len(underground_centre_line) == 1
+        and len(notes) == 1
+        and len(length) == 1
+        and len(depth) == 1
+        and len(extent) == 1
+        and len(survex_file) == 1
+        and len(description_file) == 1
+        and len(url) == 1
+    ):
+        # more than one item in long list
+        message = f' ! ABORT loading this cave. in "{filename}"'
+        DataIssue.objects.create(parser="caves", message=message, url=f"/{slugs}_cave_edit/")
+        print(message)
+        return
+        
+    if cave:
+        # this a re-load prior to editing and we already know the cave id
+        cave.non_public={
+            "True": True,
+            "False": False,
+            "true": True,
+            "false": False}[non_public[0]]
+        cave.official_name=official_name[0]
+        cave.kataster_code=kataster_code[0]
+        cave.kataster_number=kataster_number[0]
+        cave.unofficial_number=unofficial_number[0]
+        cave.explorers=explorers[0]
+        cave.underground_description=underground_description[0]
+        cave.equipment=equipment[0]
+        cave.references=references[0]
+        cave.survey=survey[0]
+        cave.kataster_status=kataster_status[0]
+        cave.underground_centre_line=underground_centre_line[0]
+        cave.notes=notes[0]
+        cave.length=length[0]
+        cave.depth=depth[0]
+        cave.extent=extent[0]
+        cave.survex_file=survex_file[0]
+        cave.description_file=description_file[0]
+        cave.url=url[0]
+                
+        if len(slugs) > 1:
+            message = f" ! Cave edit failure due to more than one slug: {slugs}, skipping this field edit. "
+            DataIssue.objects.create(parser="caves", message=message)
            print(message)

+        cave.areas = None
+        cave.save()
+        for area_slug in areas:
+            a = Area.objects.filter(short_name=area_slug)
+            if a:
+                cave.area.add(a[0]) 
+            else:
+                message = f" ! Cave edit failure due to unrecognised Area: {a}, skipping this field edit. "
+                DataIssue.objects.create(parser="caves", message=message)
+                print(message)
+            
+        c = cave
+        do_entrances()
+        cave.save()
+    else:
+        try:
+            c, state = Cave.objects.update_or_create(
+                non_public={
+                    "True": True,
+                    "False": False,
+                    "true": True,
+                    "false": False,
+                }[non_public[0]],
+                official_name=official_name[0],
+                kataster_code=kataster_code[0],
+                kataster_number=kataster_number[0],
+                unofficial_number=unofficial_number[0],
+                explorers=explorers[0],
+                underground_description=underground_description[0],
+                equipment=equipment[0],
+                references=references[0],
+                survey=survey[0],
+                kataster_status=kataster_status[0],
+                underground_centre_line=underground_centre_line[0],
+                notes=notes[0],
+                length=length[0],
+                depth=depth[0],
+                extent=extent[0],
+                survex_file=survex_file[0],
+                description_file=description_file[0],
+                url=url[0],
+                filename=filename,
+            )
+        except:
+            print(" ! FAILED to get only one CAVE when updating using: " + filename)
+            kaves = Cave.objects.all().filter(kataster_number=kataster_number[0])
+            for k in kaves:
+                message = " ! - DUPLICATES in db. kataster:" + str(k.kataster_number) + ", slug:" + str(k.slug())
+                DataIssue.objects.create(parser="caves", message=message)
+                print(message)
+            for k in kaves:
+                if k.slug() is not None:
+                    print(" ! - OVERWRITING this one: slug:" + str(k.slug()))
+                    k.notes = "DUPLICATE kataster number found on import. Please fix\n" + k.notes
+                    c = k
+                    
+        for area_slug in areas:
+            if area_slug in areas_xslug:
+                newArea = areas_xslug[area_slug]
+            else:
+                area = Area.objects.filter(short_name=area_slug)
+                if area:
+                    newArea = area[0]
+                else:
+                    newArea = Area(short_name=area_slug, super=Area.objects.get(short_name="1623"))
+                    newArea.save()
+                areas_xslug[area_slug] = newArea
+            c.area.add(newArea)
+            
+        primary = True # this sets the first thing we find to be primary=True and all the others =False
+        for slug in slugs:
+            if slug in caves_xslug:
+                cs = caves_xslug[slug]
+            else:
+               try:  # we want to overwrite a PENDING cave if we are now importing the 1623-xxx.html file for it
+                    cs = CaveSlug.objects.update_or_create(cave=c, slug=slug, primary=primary)
+                    caves_xslug[slug] = cs
+               except Exception as ex:
+                    #raise
+                    # This fails to do an update! It just crashes.. to be fixed
+                    message = f" ! Cave update/create failure : {slug}, skipping file cave_data/{context} with exception\nException: {ex.__class__}"
+                    DataIssue.objects.create(parser="caves", message=message)
+                    print(message)

-def getXML(text, itemname, minItems=1, maxItems=None, context=""):
-    """Reads a single XML tag
-    Should throw exception rather than producing error message here,
-    then handle exception in calling routine where it has the context.
-    """
-    items = re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S)
-    if len(items) < minItems:
-        message = (
-            " ! %(count)i x %(itemname)s found, at least %(min)i expected. Load ABORT. "
-            % {"count": len(items), "itemname": itemname, "min": minItems}
-            + " in file "
-            + context
-        )
-        DataIssue.objects.create(parser="caves", message=message, url="" + context)
+            primary = False
+
+        if not entrances or len(entrances) < 1:
+            # missing entrance link in cave_data/1623-* .html file
+            set_dummy_entrance(slug[5:], slug, c, msg="DUMMY: no entrances")
+        else:
+            do_entrances()
+
+    # From here on the code applies to both edited and newly-imported caves
+    if survex_file[0]:
+        if not (Path(SURVEX_DATA) / survex_file[0]).is_file():
+            message = f' ! {slug:12} survex filename does not exist :LOSER:"{survex_file[0]}" in "{filename}"'
+            DataIssue.objects.create(parser="caves", message=message, url=f"/{slug[0:4]}/{slug}_cave_edit/")
+            print(message)
+
+    if description_file[0]:  # if not an empty string
+        message = f' - {slug:12} Note (not an error): complex description filename "{description_file[0]}" inside "{CAVEDESCRIPTIONS}/{filename}"'
+        DataIssue.objects.create(parser="caves ok", message=message, url=f"/{slug}_cave_edit/")
        print(message)

-    if maxItems is not None and len(items) > maxItems:
-        message = (
-            " ! %(count)i x %(itemname)s found, no more than %(max)i expected in this XML unit. Load ABORT. "
-            % {"count": len(items), "itemname": itemname, "max": maxItems}
-            + " in file "
-            + context
-        )
-        DataIssue.objects.create(parser="caves", message=message)
-        print(message)
-    if minItems == 0:
-        if not items:
-            items = [""]
-    return items
+        if not (Path(EXPOWEB) / description_file[0]).is_file():
+            message = f' ! {slug:12} description filename  "{EXPOWEB}/{description_file[0]}" does not refer to a real file'
+            DataIssue.objects.create(parser="caves", message=message, url=f"/{slug}_cave_edit/")
+            print(message)
+            # c.description_file="" # done only once, to clear out cruft.
+    c.save()
+
+
+# ChatGPT replacement attempt 2023-04-21. Obviously very incomplete, but some useful ideas
+# import os
+# import xml.etree.ElementTree as ET
+
+# class BadCaveException(Exception):
+    # pass
+
+# class FailedCaveUpdateException(Exception):
+    # pass
+
+# def readcave_chatgpt(filename, entrances_xslug, caves_xslug, areas_xslug):
+    # """Reads an entrance description from the .html file and updates the corresponding Cave object"""
+    # tree = ET.parse(os.path.join(CAVEDESCRIPTIONS, filename))
+    # root = tree.getroot()
+
+    # cavecontents = root.find("cave")
+    # if cavecontents is None:
+        # raise BadCaveException(f'! BAD CAVE at "{filename}"')
+
+    # non_public = cavecontents.findtext("non_public")
+    # slugs = cavecontents.findtext("caveslug")
+    # official_name = cavecontents.findtext("official_name")
+    # kataster_code = cavecontents.findtext("kataster_code")
+    # kataster_number = cavecontents.findtext("kataster_number")
+    # unofficial_number = cavecontents.findtext("unofficial_number")
+    # explorers = cavecontents.findtext("explorers")
+    # underground_description = cavecontents.findtext("underground_description")
+    # equipment = cavecontents.findtext("equipment")
+    # references = cavecontents.findtext("references")
+    # survey = cavecontents.findtext("survey")
+    # kataster_status = cavecontents.findtext("kataster_status")
+    # underground_centre_line = cavecontents.findtext("underground_centre_line")
+    # notes = cavecontents.findtext("notes")
+    # length = cavecontents.findtext("length")
+    # depth = cavecontents.findtext("depth")
+    # extent = cavecontents.findtext("extent")
+    # survex_file = cavecontents.findtext("survex_file")
+    # description_file = cavecontents.findtext("description_file")
+    # url = cavecontents.findtext("url")
+
+    # areas = cavecontents.findall("area")
+    # entrances = cavecontents.findall("entrance")
+
+    # if (
+        # non_public is not None
+ # # etc.
+ # # wrong, some of these should be ==1 and some >=1 
+    # ):
+        # try:
+            # cave = caves_xslug.get(kataster_number)
+            # if cave is None:
+                # cave = Cave.objects.create(
+                    # non_public={
+                        # "True": True,
+                        # "False": False,
+                        # "true": True,
+                        # "false": False,
+                    # }[non_public],
+                    # official_name=official_name,
+                    # # kataster [truncated]
+


 def readcaves():
-    """Reads the xml-format HTML files in the EXPOWEB repo, not from the loser repo."""
+    """Called from databaseReset mass importer.
+    Reads the xml-format HTML 'cave' files in the EXPOWEB repo, the survex files from the loser repo.
+    """
    # Pending is for those caves which do not have cave_data/1623-xxx.html XML files even though 
    # they exist and have surveys. 
    pending = set()