debugging Cave page links..

2025-12-15 12:27:05 +00:00 · 2023-09-11 20:38:14 +03:00
parent 47db19f1a2
commit d323ff2700
9 changed files with 76 additions and 86 deletions
--- a/parsers/caves.py
+++ b/parsers/caves.py
@@ -27,6 +27,9 @@ So is the first thing that creates tables.
 """

 todo = """  
+- When reading cave data, to start off wit we do not know the cave id (slug) so we can't give a useful url in
+  the error message, but we do have the filename. Systematize this, and the same thing with reading entrance files.
+  
 - Cannot use Edit This Page for pendingcaves.txt_edit as Edit This Page is expecting an html file.
   So we will need a separate file-editing capability just for this configuration file ?!
   
@@ -35,15 +38,15 @@ todo = """
 - rewrite archaic regex
  re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S)
  in modern form and pre-compile it. 
-
- Semi-automagically import all the 1627- pending caves and create HTML files for them to be
-  edited individually. (These are caves we only know about because we have German survex files.)
   
 - crashes on MariaDB in databasereset.py on server when deleting Caves and complains Area needs a 
  non null parent, But this is not true.   The only solution we have found is to let it crash, then 
  stop and restart MariaDB (requires a logon able to sudo)   and then restart the databasereset.py 
-  again. (status as of July 2022)
+  again. (status as of July 2022). May not happen now that class Area is removed (Sept.2023).
 """
+AREACODES = {"1623", "1624", "1626", "1627"} # NB set not dict
+ARGEAREAS = {"1626", "1627"}
+
 entrances_xslug = {}
 caves_xslug = {}
 areas_xslug = {}
@@ -105,7 +108,7 @@ def create_new_cave(svxpath, msg=None):
    :loser: repo which is not recognised as a known cave.
    ALSO called by survex parser when it finds a cave it doesn't recognise
    """
-    # e.g. svxpath = "caves-1623/666/antig"
+    # e.g. svxpath = "caves-1623/666/beast" .svx
    print(f"Create new cave at {svxpath} - {msg}")
    #
    survex_file = svxpath + ".svx"
@@ -117,16 +120,13 @@ def create_new_cave(svxpath, msg=None):
    if a[0:3] == "162":
        areacode = a[0:4]
        url = f"{areacode}/{caveid}.html"  # Note we are appending the .html as we are believe in backwards compatability.
-        #url = f"{areacode}/{a[5:]}.html"  # This is original code, but a above is only defined as being 4 characters long, so it did not make sense and produced non unique urls
    else: 
        print(f"WARNING: parsers/caves/create_new_cave called with svxpath '{svxpath}'.  Surely it should start 'caves-162*'? {msg}")
        areacode = "1623"
        url = f"1623/{caveid}.html"
-        #url = f"1623/{k}.html" # This is original code, but a above is only defined as being 4 characters long, so it did not make sense and produced non unique urls
-        
+         
    k = f"{areacode}-{caveid}"

-
    caves = Cave.objects.filter(unofficial_number=caveid, areacode =areacode) 
    if caves:
        message = f" ! Already exists, caveid:{k} in areacode {areacode} {caves} - {msg}"
@@ -149,14 +149,15 @@ def create_new_cave(svxpath, msg=None):
    cave.save()
    return cave

-def do_ARGE_cave(slug, caveid, url, areacode, svxid):
-    """Only called by survex parser. 
+def do_ARGE_cave(slug, caveid, svxurl, areacode, svxid):
+    """Only called by survex parser, NOT the cave parser.
    Creates a new Cave object, but with abbreviated data as the survex file (from ARGE) is all we have.
    We already know the survex file.
-    We already know that it doesn't exist... though there are bugs..
+    We already know that the cave doesn't exist... though there are bugs..
    """
    
    default_note = "This is an ARGE cave where we only have the survex file and no other information"
+    url = f"{areacode}/{caveid}.html"

    urltest = Cave.objects.filter(url=url)
    if urltest:
@@ -171,11 +172,21 @@ def do_ARGE_cave(slug, caveid, url, areacode, svxid):
        DataIssue.objects.create(parser="caves", message=message, url=url)
        print(message)
        return numtest[0]
-
+    
+    sv = Path(settings.SURVEX_DATA, svxid + ".svx")
+    if sv.is_file:
+        with open(sv, "r") as s:
+            line1 = s.readline()
+            line2 = s.readline()
+            line3 = s.readline()
+    else:
+        print(f"not correct {sv}", file=sys.stderr)
+        
    cave = Cave( 
-    underground_description="ARGE cave.",
-    survex_file= f"{svxid}.svx", # or is this svxurl ?!
-    # url=url, No, the url spplied is that of the survexfile not of the cave file, e.g. /1626/254/254
+    underground_description="ARGE cave.\n3 lines of the survexfile:\n" + line1 +line2 +line3,
+    unofficial_number="ARGE",
+    survex_file= svxurl,
+    url=url, 
    notes=default_note,
    areacode=areacode,
    )
@@ -427,7 +438,7 @@ def read_entrance(filename, ent=None):
        DataIssue.objects.create(parser="entrances", message=message, url=f"/cave/{slug}/edit/")
        print(message)
        
-    lastvisit = getXML(entrancecontents, "lastvisit", maxItems=1, minItems=0, context=context)
+    lastvisit = getXML(entrancecontents, "lastvisit", maxItems=1, minItems=0, context=f"/cave/{slug}/edit/")

    alt = getXMLmax1("alt")
    approach = getXMLmax1("approach")
@@ -611,12 +622,12 @@ def read_cave(filename, cave=None):
    
    # Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
    fn = settings.CAVEDESCRIPTIONS / filename
-    context = filename
+    context = f"/cave_data/{filename}_edit"
    
    # print(f" - Reading Cave from cave descriptions file {fn}")
    if not fn.exists():
        message = f" ! Cave_data file reading problem filename:'cave_data/{filename}'"
-        DataIssue.objects.create(parser="caves", message=message, url=f"/cave_data/{filename}_edit")
+        DataIssue.objects.create(parser="caves", message=message, url=context)
        print(message)
        return None

@@ -626,7 +637,7 @@ def read_cave(filename, cave=None):

    if len(cavecontentslist) != 1:
        message = f'! BAD CAVE DATA in "{filename}". More than one cave. Edit file manually, click.'
-        DataIssue.objects.create(parser="caves", message=message, url=f"/cave_data/{filename}_edit")
+        DataIssue.objects.create(parser="caves", message=message, url=context)
        print(message)
        return None
        
@@ -634,10 +645,11 @@ def read_cave(filename, cave=None):
    slugs = getXML(cavecontents, "caveslug", maxItems=1, context=context)
    if len(slugs) > 1:
        message = f" ! - More than one slug for a cave: {cave}, slugs: {slugs}. Ignoring all except first."
-        DataIssue.objects.create(parser="caves", message=message, url=f"{cave.url}_edit/")
+        DataIssue.objects.create(parser="caves", message=message, url=context)
        print(message)
    slug = slugs[0]
-
+    context = url=f"/{slug[0:4]}/{slug}_cave_edit/"
+ 
    non_public = getXMLmax1("non_public")
    official_name = getXMLmax1("official_name")
    kataster_code = getXMLmax1("kataster_code")
@@ -668,7 +680,7 @@ def read_cave(filename, cave=None):
            kaves = Cave.objects.all().filter(filename=filename) # replace with slug when CaveSlug tidied up
            for k in kaves:
                message = " ! - DUPLICATES in db. kataster:" + str(k.kataster_number) + ", slug:" + str(k.slug())
-                DataIssue.objects.create(parser="caves", message=message)
+                DataIssue.objects.create(parser="caves", message=message, url=context)
                print(message)
            for k in kaves:
                if k.slug() is not None:
@@ -679,6 +691,7 @@ def read_cave(filename, cave=None):
    # From here on the code applies to both edited and newly-imported caves (mostly!)
    do_caveslugstuff() # needs cave!=None
    
+    
    cave.non_public=boolify(non_public)
    cave.official_name=official_name[0]
    cave.kataster_code=kataster_code[0]
@@ -701,11 +714,14 @@ def read_cave(filename, cave=None):
            
    areas = getXML(cavecontents, "area", context=context) # can be multiple <area> tags
    for area_slug in areas:
-        if area_slug in ["1623", "1624", "1626", "1627"]: # ignore sub areas which are in another <area> tag
+        if area_slug in AREACODES: # ignore sub areas which are in another <area> tag
            cave.areacode = area_slug 
        else:
            cave.subarea = area_slug
- 
+    if not cave.areacode:
+         if slug[0:4] in AREACODES:
+            cave.areacode = slug[0:4]
+            
    entrances = getXML(cavecontents, "entrance", context=context)
    do_entrances()
    # print(f"- {entrances_xslug=}")