edit cave reads from HTML file not just db

2023-04-22 01:24:32 +01:00
parent 116cfc7c6e
commit 2ed66fe3d0
3 changed files with 342 additions and 197 deletions
--- a/core/utils.py
+++ b/core/utils.py
@@ -117,7 +117,7 @@ def write_and_commit(files, message):
                kwargs = {}
            try:
                with open(filepath, mode, **kwargs) as f:
-                    print(f"WRITING{cwd}---{filename} ")
+                    print(f"WRITING {cwd}---{filename} ")
                    # as the wsgi process www-data, we have group write-access but are not owner, so cannot chmod.
                    # os.chmod(filepath, 0o664) # set file permissions to rw-rw-r--
                    f.write(content)
--- a/core/views/caves.py
+++ b/core/views/caves.py
@@ -14,6 +14,9 @@ from troggle.core.models.caves import Cave, CaveAndEntrance, Entrance, GetCaveLo
 from troggle.core.models.logbooks import CaveSlug, QM
 from troggle.core.utils import write_and_commit
 from troggle.core.views import expo
 from troggle.settings import CAVEDESCRIPTIONS, ENTRANCEDESCRIPTIONS
 from troggle.parsers.caves import readcave, readentrance
 from .auth import login_required_if_public
@@ -333,7 +336,7 @@ def edit_cave(request, path="", slug=None):
    The format for the file being saved is in templates/dataformat/cave.xml
    Warning. This uses Django deep magic.
-    It does save the data into into the database directly, not by parsing the file.
+    It saves the data into into the database and into the html file, which it then commits to git.
    """
    message = ""
    if slug is not None:
@@ -373,7 +376,7 @@ def edit_cave(request, path="", slug=None):
                ceinst.save()
            try:
                cave_file = cave.file_output()
-                print(cave_file)
+                # print(cave_file)
                write_and_commit([cave_file], f"Online edit of {cave}")
                # leave other exceptions unhandled so that they bubble up to user interface
            except PermissionError:
@@ -388,6 +391,10 @@ def edit_cave(request, path="", slug=None):
            message = f"! POST data is INVALID {cave}"
            print(message)
    else:
        # re-read cave data from file.
        filename = str(cave.slug() +".html")
        readcave(filename, cave=cave)
        form = CaveForm(instance=cave)
        ceFormSet = CaveAndEntranceFormSet(queryset=cave.caveandentrance_set.all())
--- a/parsers/caves.py
+++ b/parsers/caves.py
@@ -10,7 +10,12 @@ from troggle.core.models.logbooks import CaveSlug
 from troggle.core.models.troggle import DataIssue
 from troggle.settings import CAVEDESCRIPTIONS, ENTRANCEDESCRIPTIONS, EXPOWEB, SURVEX_DATA
-"""Reads all the cave description data by parsing the xml files (stored as e.g. :EXPOWEB:/cave_data/1623-161.html )
+"""Reads all the cave description data and entrance description data 
 by parsing the xml files stored as e.g. 
 :EXPOWEB:/cave_data/1623-161.html 
 or
 :EXPOWEB:/entrance_data/1623-161g.html 
 and creating the various Cave, Entrance and necessary Area objects.
 This is the first import that happens after the database is reinitialised. 
@@ -272,8 +277,41 @@ def do_pending_cave(k, url, area):
            print(message)
    return cave
-def readentrance(filename):
+def getXML(text, itemname, minItems=1, maxItems=None, context=""):
    """Reads a single XML tag
    Should throw exception rather than producing error message here,
    then handle exception in calling routine where it has the context.
    """
    items = re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S)
    if len(items) < minItems:
        message = (
            " ! %(count)i x %(itemname)s found, at least %(min)i expected. Load ABORT. "
            % {"count": len(items), "itemname": itemname, "min": minItems}
            + " in file "
            + context
        )
        DataIssue.objects.create(parser="caves", message=message, url="" + context)
        print(message)
    if maxItems is not None and len(items) > maxItems:
        message = (
            " ! %(count)i x %(itemname)s found, no more than %(max)i expected in this XML unit. Load ABORT. "
            % {"count": len(items), "itemname": itemname, "max": maxItems}
            + " in file "
            + context
        )
        DataIssue.objects.create(parser="caves", message=message)
        print(message)
    if minItems == 0:
        if not items:
            items = [""]
    return items
 def readentrance(filename, ent=None):
    """Reads an entrance description from the .html file
    If not called as part of initial import, then the global lists will not be correct
    but this is OK, a search will find them in the db.
    """
    def getXMLmax1(field):
        return getXML(entrancecontents, field, maxItems=1, context=context)
@@ -362,17 +400,50 @@ def readentrance(filename):
        )
-def readcave(filename):
+def readcave(filename, cave=None):
    """Reads an entrance description from the .html file
-    Convoluted. Sorry.This is as I inherited it and I haven't fiddled with it. Needs rewriting
+    Convoluted. Sorry. Needs rewriting
    Assumes any area it hasn't seen before is a subarea of 1623
    If not called as part of initial import, then the global lists will not be correct
    but this is OK, a search will find them in the db.
    """
    def do_entrances():
        for entrance in entrances:
            eslug = getXML(entrance, "entranceslug", maxItems=1, context=context)[0]
            letter = getXML(entrance, "letter", maxItems=1, context=context)[0]
            if len(entrances) == 1 and not eslug:  # may be empty: <entranceslug></entranceslug>
                set_dummy_entrance(slug[5:], slug, c, msg="DUMMY: no entrance slug read from file")
            else:
                try:
                    if eslug in entrances_xslug:
                        entrance = entrances_xslug[eslug]
                    else:
                        # entrance = Entrance.objects.get(entranceslug__slug=eslug)
                        entrance = Entrance.objects.get(slug=eslug)
                        entrances_xslug[eslug] = entrance
                    CaveAndEntrance.objects.update_or_create(
                        cave=c, entrance_letter=letter, entrance=entrance
                    )
                except:
                    message = f' ! Entrance setting failure, slug:"{slug}" #entrances:{len(entrances)} {entrance} letter:"{letter}" cave:"{c}" filename:"cave_data/{filename}"'
                    DataIssue.objects.create(parser="entrances", message=message, url=f"{c.url}_edit/")
                    print(message)        
    global entrances_xslug
    global caves_xslug
    global areas_xslug
    # Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
-    with open(os.path.join(CAVEDESCRIPTIONS, filename)) as f:
+    fn = settings.CAVEDESCRIPTIONS / filename
    # print(f" - Reading Cave from cave descriptions file {fn}")
    if not fn.exists():
        message = f" ! Cave_data file reading problem filename:'cave_data/{filename}'"
        DataIssue.objects.create(parser="caves", message=message, url=None)
        print(message)
        return
    with open(fn) as f:
        contents = f.read()
    context = filename
    cavecontentslist = getXML(contents, "cave", maxItems=1, context=context)
@@ -380,204 +451,271 @@ def readcave(filename):
        message = f'! BAD CAVE at "{filename}"'
        DataIssue.objects.create(parser="caves", message=message)
        print(message)
-    else:
+        return
-        cavecontents = cavecontentslist[0]
+        
-        non_public = getXML(cavecontents, "non_public", maxItems=1, context=context)
+    cavecontents = cavecontentslist[0]
-        slugs = getXML(cavecontents, "caveslug", maxItems=1, context=context)
+    non_public = getXML(cavecontents, "non_public", maxItems=1, context=context)
-        official_name = getXML(cavecontents, "official_name", maxItems=1, context=context)
+    slugs = getXML(cavecontents, "caveslug", maxItems=1, context=context)
-        areas = getXML(cavecontents, "area", context=context)
+    official_name = getXML(cavecontents, "official_name", maxItems=1, context=context)
-        kataster_code = getXML(cavecontents, "kataster_code", maxItems=1, context=context)
+    areas = getXML(cavecontents, "area", context=context)
-        kataster_number = getXML(cavecontents, "kataster_number", maxItems=1, context=context)
+    kataster_code = getXML(cavecontents, "kataster_code", maxItems=1, context=context)
-        unofficial_number = getXML(cavecontents, "unofficial_number", maxItems=1, context=context)
+    kataster_number = getXML(cavecontents, "kataster_number", maxItems=1, context=context)
-        explorers = getXML(cavecontents, "explorers", maxItems=1, context=context)
+    unofficial_number = getXML(cavecontents, "unofficial_number", maxItems=1, context=context)
-        underground_description = getXML(cavecontents, "underground_description", maxItems=1, context=context)
+    explorers = getXML(cavecontents, "explorers", maxItems=1, context=context)
-        equipment = getXML(cavecontents, "equipment", maxItems=1, context=context)
+    underground_description = getXML(cavecontents, "underground_description", maxItems=1, context=context)
-        references = getXML(cavecontents, "references", maxItems=1, context=context)
+    equipment = getXML(cavecontents, "equipment", maxItems=1, context=context)
-        survey = getXML(cavecontents, "survey", maxItems=1, context=context)
+    references = getXML(cavecontents, "references", maxItems=1, context=context)
-        kataster_status = getXML(cavecontents, "kataster_status", maxItems=1, context=context)
+    survey = getXML(cavecontents, "survey", maxItems=1, context=context)
-        underground_centre_line = getXML(cavecontents, "underground_centre_line", maxItems=1, context=context)
+    kataster_status = getXML(cavecontents, "kataster_status", maxItems=1, context=context)
-        notes = getXML(cavecontents, "notes", maxItems=1, context=context)
+    underground_centre_line = getXML(cavecontents, "underground_centre_line", maxItems=1, context=context)
-        length = getXML(cavecontents, "length", maxItems=1, context=context)
+    notes = getXML(cavecontents, "notes", maxItems=1, context=context)
-        depth = getXML(cavecontents, "depth", maxItems=1, context=context)
+    length = getXML(cavecontents, "length", maxItems=1, context=context)
-        extent = getXML(cavecontents, "extent", maxItems=1, context=context)
+    depth = getXML(cavecontents, "depth", maxItems=1, context=context)
-        survex_file = getXML(cavecontents, "survex_file", maxItems=1, context=context)
+    extent = getXML(cavecontents, "extent", maxItems=1, context=context)
-        description_file = getXML(cavecontents, "description_file", maxItems=1, context=context)
+    survex_file = getXML(cavecontents, "survex_file", maxItems=1, context=context)
-        url = getXML(cavecontents, "url", maxItems=1, context=context)
+    description_file = getXML(cavecontents, "description_file", maxItems=1, context=context)
-        entrances = getXML(cavecontents, "entrance", context=context)
+    url = getXML(cavecontents, "url", maxItems=1, context=context)
    entrances = getXML(cavecontents, "entrance", context=context)
-        if (
+    if not (
-            len(non_public) == 1
+        len(non_public) == 1
-            and len(slugs) >= 1
+        and len(slugs) >= 1 # is this really correct ?
-            and len(official_name) == 1
+        and len(official_name) == 1
-            and len(areas) >= 1
+        and len(areas) >= 1 # we want to stop using the sub-ares in 2023
-            and len(kataster_code) == 1
+        and len(kataster_code) == 1
-            and len(kataster_number) == 1
+        and len(kataster_number) == 1
-            and len(unofficial_number) == 1
+        and len(unofficial_number) == 1
-            and len(explorers) == 1
+        and len(explorers) == 1
-            and len(underground_description) == 1
+        and len(underground_description) == 1
-            and len(equipment) == 1
+        and len(equipment) == 1
-            and len(references) == 1
+        and len(references) == 1
-            and len(survey) == 1
+        and len(survey) == 1
-            and len(kataster_status) == 1
+        and len(kataster_status) == 1
-            and len(underground_centre_line) == 1
+        and len(underground_centre_line) == 1
-            and len(notes) == 1
+        and len(notes) == 1
-            and len(length) == 1
+        and len(length) == 1
-            and len(depth) == 1
+        and len(depth) == 1
-            and len(extent) == 1
+        and len(extent) == 1
-            and len(survex_file) == 1
+        and len(survex_file) == 1
-            and len(description_file) == 1
+        and len(description_file) == 1
-            and len(url) == 1
+        and len(url) == 1
-        ):
+    ):
-            try:
+        # more than one item in long list
-                c, state = Cave.objects.update_or_create(
+        message = f' ! ABORT loading this cave. in "{filename}"'
-                    non_public={
+        DataIssue.objects.create(parser="caves", message=message, url=f"/{slugs}_cave_edit/")
-                        "True": True,
+        print(message)
-                        "False": False,
+        return
-                        "true": True,
+        
-                        "false": False,
+    if cave:
-                    }[non_public[0]],
+        # this a re-load prior to editing and we already know the cave id
-                    official_name=official_name[0],
+        cave.non_public={
-                    kataster_code=kataster_code[0],
+            "True": True,
-                    kataster_number=kataster_number[0],
+            "False": False,
-                    unofficial_number=unofficial_number[0],
+            "true": True,
-                    explorers=explorers[0],
+            "false": False}[non_public[0]]
-                    underground_description=underground_description[0],
+        cave.official_name=official_name[0]
-                    equipment=equipment[0],
+        cave.kataster_code=kataster_code[0]
-                    references=references[0],
+        cave.kataster_number=kataster_number[0]
-                    survey=survey[0],
+        cave.unofficial_number=unofficial_number[0]
-                    kataster_status=kataster_status[0],
+        cave.explorers=explorers[0]
-                    underground_centre_line=underground_centre_line[0],
+        cave.underground_description=underground_description[0]
-                    notes=notes[0],
+        cave.equipment=equipment[0]
-                    length=length[0],
+        cave.references=references[0]
-                    depth=depth[0],
+        cave.survey=survey[0]
-                    extent=extent[0],
+        cave.kataster_status=kataster_status[0]
-                    survex_file=survex_file[0],
+        cave.underground_centre_line=underground_centre_line[0]
-                    description_file=description_file[0],
+        cave.notes=notes[0]
-                    url=url[0],
+        cave.length=length[0]
-                    filename=filename,
+        cave.depth=depth[0]
-                )
+        cave.extent=extent[0]
-            except:
+        cave.survex_file=survex_file[0]
-                print(" ! FAILED to get only one CAVE when updating using: " + filename)
+        cave.description_file=description_file[0]
-                kaves = Cave.objects.all().filter(kataster_number=kataster_number[0])
+        cave.url=url[0]
-                for k in kaves:
+                
-                    message = " ! - DUPLICATES in db. kataster:" + str(k.kataster_number) + ", slug:" + str(k.slug())
+        if len(slugs) > 1:
-                    DataIssue.objects.create(parser="caves", message=message)
+            message = f" ! Cave edit failure due to more than one slug: {slugs}, skipping this field edit. "
-                    print(message)
+            DataIssue.objects.create(parser="caves", message=message)
                for k in kaves:
                    if k.slug() is not None:
                        print(" ! - OVERWRITING this one: slug:" + str(k.slug()))
                        k.notes = "DUPLICATE kataster number found on import. Please fix\n" + k.notes
                        c = k
            for area_slug in areas:
                if area_slug in areas_xslug:
                    newArea = areas_xslug[area_slug]
                else:
                    area = Area.objects.filter(short_name=area_slug)
                    if area:
                        newArea = area[0]
                    else:
                        newArea = Area(short_name=area_slug, super=Area.objects.get(short_name="1623"))
                        newArea.save()
                    areas_xslug[area_slug] = newArea
                c.area.add(newArea)
            primary = True # this sets the first thing we find to be primary=True and all the others =False
            for slug in slugs:
                if slug in caves_xslug:
                    cs = caves_xslug[slug]
                else:
                    try:  # we want to overwrite a PENDING cave if we are now importing the 1623-xxx.html file for it
                        cs = CaveSlug.objects.update_or_create(cave=c, slug=slug, primary=primary)
                        caves_xslug[slug] = cs
                    except Exception as ex:
                        # This fails to do an update! It just crashes.. to be fixed
                        message = f" ! Cave update/create failure : {slug}, skipping file cave_data/{context} with exception\nException: {ex.__class__}"
                        DataIssue.objects.create(parser="caves", message=message)
                        print(message)
                primary = False
            if not entrances or len(entrances) < 1:
                # missing entrance link in cave_data/1623-* .html file
                set_dummy_entrance(slug[5:], slug, c, msg="DUMMY: no entrances")
            else:
                for entrance in entrances:
                    eslug = getXML(entrance, "entranceslug", maxItems=1, context=context)[0]
                    letter = getXML(entrance, "letter", maxItems=1, context=context)[0]
                    if len(entrances) == 1 and not eslug:  # may be empty: <entranceslug></entranceslug>
                        set_dummy_entrance(slug[5:], slug, c, msg="DUMMY: no entrance slug read from file")
                    else:
                        try:
                            if eslug in entrances_xslug:
                                entrance = entrances_xslug[eslug]
                            else:
                                # entrance = Entrance.objects.get(entranceslug__slug=eslug)
                                entrance = Entrance.objects.get(slug=eslug)
                                entrances_xslug[eslug] = entrance
                            CaveAndEntrance.objects.update_or_create(
                                cave=c, entrance_letter=letter, entrance=entrance
                            )
                        except:
                            message = f' ! Entrance setting failure, slug:"{slug}" #entrances:{len(entrances)} {entrance} letter:"{letter}" cave:"{c}" filename:"cave_data/{filename}"'
                            DataIssue.objects.create(parser="entrances", message=message, url=f"{c.url}_edit/")
                            print(message)
            if survex_file[0]:
                if not (Path(SURVEX_DATA) / survex_file[0]).is_file():
                    message = f' ! {slug:12} survex filename does not exist :LOSER:"{survex_file[0]}" in "{filename}"'
                    DataIssue.objects.create(parser="caves", message=message, url=f"/{slug[0:4]}/{slug}_cave_edit/")
                    print(message)
            if description_file[0]:  # if not an empty string
                message = f' - {slug:12} Note (not an error): complex description filename "{description_file[0]}" inside "{CAVEDESCRIPTIONS}/{filename}"'
                DataIssue.objects.create(parser="caves ok", message=message, url=f"/{slug}_cave_edit/")
                print(message)
                if not (Path(EXPOWEB) / description_file[0]).is_file():
                    message = f' ! {slug:12} description filename  "{EXPOWEB}/{description_file[0]}" does not refer to a real file'
                    DataIssue.objects.create(parser="caves", message=message, url=f"/{slug}_cave_edit/")
                    print(message)
                    # c.description_file="" # done only once, to clear out cruft.
                    # c.save()
        else:  # more than one item in long list
            message = f' ! ABORT loading this cave. in "{filename}"'
            DataIssue.objects.create(parser="caves", message=message, url=f"/{slug}_cave_edit/")
            print(message)
        cave.areas = None
        cave.save()
        for area_slug in areas:
            a = Area.objects.filter(short_name=area_slug)
            if a:
                cave.area.add(a[0]) 
            else:
                message = f" ! Cave edit failure due to unrecognised Area: {a}, skipping this field edit. "
                DataIssue.objects.create(parser="caves", message=message)
                print(message)
        c = cave
        do_entrances()
        cave.save()
    else:
        try:
            c, state = Cave.objects.update_or_create(
                non_public={
                    "True": True,
                    "False": False,
                    "true": True,
                    "false": False,
                }[non_public[0]],
                official_name=official_name[0],
                kataster_code=kataster_code[0],
                kataster_number=kataster_number[0],
                unofficial_number=unofficial_number[0],
                explorers=explorers[0],
                underground_description=underground_description[0],
                equipment=equipment[0],
                references=references[0],
                survey=survey[0],
                kataster_status=kataster_status[0],
                underground_centre_line=underground_centre_line[0],
                notes=notes[0],
                length=length[0],
                depth=depth[0],
                extent=extent[0],
                survex_file=survex_file[0],
                description_file=description_file[0],
                url=url[0],
                filename=filename,
            )
        except:
            print(" ! FAILED to get only one CAVE when updating using: " + filename)
            kaves = Cave.objects.all().filter(kataster_number=kataster_number[0])
            for k in kaves:
                message = " ! - DUPLICATES in db. kataster:" + str(k.kataster_number) + ", slug:" + str(k.slug())
                DataIssue.objects.create(parser="caves", message=message)
                print(message)
            for k in kaves:
                if k.slug() is not None:
                    print(" ! - OVERWRITING this one: slug:" + str(k.slug()))
                    k.notes = "DUPLICATE kataster number found on import. Please fix\n" + k.notes
                    c = k
        for area_slug in areas:
            if area_slug in areas_xslug:
                newArea = areas_xslug[area_slug]
            else:
                area = Area.objects.filter(short_name=area_slug)
                if area:
                    newArea = area[0]
                else:
                    newArea = Area(short_name=area_slug, super=Area.objects.get(short_name="1623"))
                    newArea.save()
                areas_xslug[area_slug] = newArea
            c.area.add(newArea)
        primary = True # this sets the first thing we find to be primary=True and all the others =False
        for slug in slugs:
            if slug in caves_xslug:
                cs = caves_xslug[slug]
            else:
               try:  # we want to overwrite a PENDING cave if we are now importing the 1623-xxx.html file for it
                    cs = CaveSlug.objects.update_or_create(cave=c, slug=slug, primary=primary)
                    caves_xslug[slug] = cs
               except Exception as ex:
                    #raise
                    # This fails to do an update! It just crashes.. to be fixed
                    message = f" ! Cave update/create failure : {slug}, skipping file cave_data/{context} with exception\nException: {ex.__class__}"
                    DataIssue.objects.create(parser="caves", message=message)
                    print(message)
-def getXML(text, itemname, minItems=1, maxItems=None, context=""):
+            primary = False
-    """Reads a single XML tag
+
-    Should throw exception rather than producing error message here,
+        if not entrances or len(entrances) < 1:
-    then handle exception in calling routine where it has the context.
+            # missing entrance link in cave_data/1623-* .html file
-    """
+            set_dummy_entrance(slug[5:], slug, c, msg="DUMMY: no entrances")
-    items = re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S)
+        else:
-    if len(items) < minItems:
+            do_entrances()
-        message = (
+
-            " ! %(count)i x %(itemname)s found, at least %(min)i expected. Load ABORT. "
+    # From here on the code applies to both edited and newly-imported caves
-            % {"count": len(items), "itemname": itemname, "min": minItems}
+    if survex_file[0]:
-            + " in file "
+        if not (Path(SURVEX_DATA) / survex_file[0]).is_file():
-            + context
+            message = f' ! {slug:12} survex filename does not exist :LOSER:"{survex_file[0]}" in "{filename}"'
-        )
+            DataIssue.objects.create(parser="caves", message=message, url=f"/{slug[0:4]}/{slug}_cave_edit/")
-        DataIssue.objects.create(parser="caves", message=message, url="" + context)
+            print(message)
    if description_file[0]:  # if not an empty string
        message = f' - {slug:12} Note (not an error): complex description filename "{description_file[0]}" inside "{CAVEDESCRIPTIONS}/{filename}"'
        DataIssue.objects.create(parser="caves ok", message=message, url=f"/{slug}_cave_edit/")
        print(message)
-    if maxItems is not None and len(items) > maxItems:
+        if not (Path(EXPOWEB) / description_file[0]).is_file():
-        message = (
+            message = f' ! {slug:12} description filename  "{EXPOWEB}/{description_file[0]}" does not refer to a real file'
-            " ! %(count)i x %(itemname)s found, no more than %(max)i expected in this XML unit. Load ABORT. "
+            DataIssue.objects.create(parser="caves", message=message, url=f"/{slug}_cave_edit/")
-            % {"count": len(items), "itemname": itemname, "max": maxItems}
+            print(message)
-            + " in file "
+            # c.description_file="" # done only once, to clear out cruft.
-            + context
+    c.save()
-        )
+
-        DataIssue.objects.create(parser="caves", message=message)
+
-        print(message)
+# ChatGPT replacement attempt 2023-04-21. Obviously very incomplete, but some useful ideas
-    if minItems == 0:
+# import os
-        if not items:
+# import xml.etree.ElementTree as ET
-            items = [""]
+
-    return items
+# class BadCaveException(Exception):
    # pass
 # class FailedCaveUpdateException(Exception):
    # pass
 # def readcave_chatgpt(filename, entrances_xslug, caves_xslug, areas_xslug):
    # """Reads an entrance description from the .html file and updates the corresponding Cave object"""
    # tree = ET.parse(os.path.join(CAVEDESCRIPTIONS, filename))
    # root = tree.getroot()
    # cavecontents = root.find("cave")
    # if cavecontents is None:
        # raise BadCaveException(f'! BAD CAVE at "{filename}"')
    # non_public = cavecontents.findtext("non_public")
    # slugs = cavecontents.findtext("caveslug")
    # official_name = cavecontents.findtext("official_name")
    # kataster_code = cavecontents.findtext("kataster_code")
    # kataster_number = cavecontents.findtext("kataster_number")
    # unofficial_number = cavecontents.findtext("unofficial_number")
    # explorers = cavecontents.findtext("explorers")
    # underground_description = cavecontents.findtext("underground_description")
    # equipment = cavecontents.findtext("equipment")
    # references = cavecontents.findtext("references")
    # survey = cavecontents.findtext("survey")
    # kataster_status = cavecontents.findtext("kataster_status")
    # underground_centre_line = cavecontents.findtext("underground_centre_line")
    # notes = cavecontents.findtext("notes")
    # length = cavecontents.findtext("length")
    # depth = cavecontents.findtext("depth")
    # extent = cavecontents.findtext("extent")
    # survex_file = cavecontents.findtext("survex_file")
    # description_file = cavecontents.findtext("description_file")
    # url = cavecontents.findtext("url")
    # areas = cavecontents.findall("area")
    # entrances = cavecontents.findall("entrance")
    # if (
        # non_public is not None
 # # etc.
 # # wrong, some of these should be ==1 and some >=1 
    # ):
        # try:
            # cave = caves_xslug.get(kataster_number)
            # if cave is None:
                # cave = Cave.objects.create(
                    # non_public={
                        # "True": True,
                        # "False": False,
                        # "true": True,
                        # "false": False,
                    # }[non_public],
                    # official_name=official_name,
                    # # kataster [truncated]
 def readcaves():
-    """Reads the xml-format HTML files in the EXPOWEB repo, not from the loser repo."""
+    """Called from databaseReset mass importer.
    Reads the xml-format HTML 'cave' files in the EXPOWEB repo, the survex files from the loser repo.
    """
    # Pending is for those caves which do not have cave_data/1623-xxx.html XML files even though 
    # they exist and have surveys. 
    pending = set()