From fab7adf07989c03289336c6f2117696129f82f19 Mon Sep 17 00:00:00 2001
From: Philip Sargent <philip.sargent@gmail.com>
Date: Wed, 26 Jul 2023 00:14:46 +0300
Subject: [PATCH] Cleaning up entrance importing robustly

---
 parsers/caves.py     | 40 ++++++++++++++++++++++++--
 parsers/locations.py | 68 +++++++++++++++++++++++++-------------------
 parsers/survex.py    |  1 -
 3 files changed, 76 insertions(+), 33 deletions(-)

diff --git a/parsers/caves.py b/parsers/caves.py
index f8b39e9..93d5cff 100644
--- a/parsers/caves.py
+++ b/parsers/caves.py
@@ -91,9 +91,12 @@ def set_dummy_entrance(id, slug, cave, msg="DUMMY"):
         # I am not sure if we want  entrances automagically created.          
         # Therefore I have commented it out. MJG
         
-        # entrance = dummy_entrance(id, slug, msg="DUMMY")
-        # entrances_xslug[slug] = entrance
-        # CaveAndEntrance.objects.update_or_create(cave=cave, entrance_letter="", entrance=entrance)
+        # Automagic  restored, as it creates a load of error message in the DataIssue log. 2023-07-25
+        # pending doing this properly, ie. properly creating an XML file for each of these pending caves.
+        
+        entrance = dummy_entrance(id, slug, msg="DUMMY")
+        entrances_xslug[slug] = entrance
+        CaveAndEntrance.objects.update_or_create(cave=cave, entrance_letter="", entrance=entrance)
         pass
     except:
         message = f' ! Entrance Dummy setting failure, slug:"{slug}" cave id :"{id}" '
@@ -343,6 +346,22 @@ def boolify(boolstrs):
             "False": False,
             "true": True,
             "false": False}[boolstrs[0]]
+            
+def validate_station(station):
+    """It is possible to break troggle entirely by getting this wrong.
+    These station identifiers are matched against other statsions using .endswith()
+    in parsers/locations.py
+    so a simple number here will match hundreds of SUrvexStation objects
+    It should be, e.g. "1623.p240"
+    """
+    if station == "":
+        return True
+    dot = station.find(".")
+    if dot == -1:
+        # no full stop found. Bad station identifier.
+        raise
+    else:
+        return True
 
 def read_entrance(filename, ent=None):
     """Reads an entrance description from the .html file.
@@ -440,6 +459,17 @@ def read_entrance(filename, ent=None):
     ent.underground_description=underground_description[0]
     ent.url=url[0]
     
+    for st in [ent.exact_station, ent.other_station, ent.tag_station]:
+        try:
+            validate_station(st)
+        except:
+            message = f" ! BAD ENTRANCE TAG '{st}' in '{filename}'. Must format like '1623.p204'. Edit file manually, click."
+            #http://localhost:8000/1623/2023-EBH-01/1623-2023-EBH-01:1623-2023-EBH-01_entrance_edit
+            DataIssue.objects.create(parser="entrances", message=message, url=f"/1623/{slug}/{slug}:{slug}_entrance_edit")
+            print(message)
+            # ent_issues = DataIssue.objects.filter(parser="entrances")
+            # print(f".. We now have  {len(ent_issues)} entrance DataIssues")
+            return None
     ent.save()
     return ent
 
@@ -704,6 +734,10 @@ def readcaves():
         print(" - Reading Entrances from entrance descriptions xml files")
         for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]:  # Should be a better way of getting a list of files
             read_entrance(filename)
+        
+        # WHy is this needed ? Without it, we lose these DataIssues!
+        ent_issues = DataIssue.objects.filter(parser="entrances")
+        print(f"__ We now have  {len(ent_issues)} entrance DataIssues")
 
         print(" - Reading Caves from cave descriptions xml files")
         for filename in next(os.walk(CAVEDESCRIPTIONS))[2]:  # Should be a better way of getting a list of files
diff --git a/parsers/locations.py b/parsers/locations.py
index b3a318c..8299a28 100644
--- a/parsers/locations.py
+++ b/parsers/locations.py
@@ -53,15 +53,16 @@ class MapLocations(object):
                     k = ent.caveandentrance_set.all()[0].cave
                 except:
                     message = f" ! Failed to get Cave linked to Entrance:{ent.name} from:{ent.filename} best:{ent.best_station()} {ent.caveandentrance_set.all()}"
-                    stash_data_issue(parser="entrances", message=message)
+                    stash_data_issue(parser="positions", message=message)
                     print(message)
                     continue  # skip this entrance
                 try:
                     areaName = k.getArea().short_name
                 except:
                     message = f" ! Failed to get Area on cave '{k}' linked to Entrance:{ent.name} from:{ent.filename} best:{ent.best_station()}"
-                    stash_data_issue(parser="entrances", message=message)
+                    stash_data_issue(parser="positions", message=message)
                     print(message)
+                    store_data_issues()
                     raise
                 self.p.append((ent.best_station(), f"{areaName}-{str(ent)[5:]}", ent.needs_surface_work(), str(ent)))
         message = f" -  {len(self.p)} entrances linked to caves."
@@ -83,6 +84,9 @@ def LoadPositions():
     """
     svx_t = 0
     d3d_t = 0
+    
+    DataIssue.objects.filter(parser="positions").delete()
+
 
     def runcavern3d():
         outputdir = Path(str(f"{topdata}.svx")).parent
@@ -100,7 +104,7 @@ def LoadPositions():
             )  # check=False means exception not raised
             if sp.returncode != 0:
                 message = f" ! Error: cavern: creating {file3d} in runcavern3()"
-                stash_data_issue(parser="entrances", message=message)
+                stash_data_issue(parser="positions", message=message)
                 print(message)
 
                 # find the errors in the 1623.log file
@@ -108,17 +112,17 @@ def LoadPositions():
                     ["grep", "error:", f"{topdata}.log"], capture_output=True, check=False, text=True
                 )  # check=False means exception not raised
                 message = f" ! Error: cavern: {sp.stdout} creating {file3d} "
-                stash_data_issue(parser="entrances", message=message)
+                stash_data_issue(parser="positions", message=message)
                 print(message)
 
         except:
             message = f" ! CalledProcessError 'cavern' in runcavern3() at {topdata}."
-            stash_data_issue(parser="entrances", message=message)
+            stash_data_issue(parser="positions", message=message)
             print(message)
 
             if file3d.is_file():
                 message = f" ! CalledProcessError. File permissions {file3d.stat().st_mode} on {str(file3d)}"
-                stash_data_issue(parser="entrances", message=message)
+                stash_data_issue(parser="positions", message=message)
                 print(message)
 
         if file3d.is_file():  # might be an old one though
@@ -140,11 +144,11 @@ def LoadPositions():
                     )
             except:
                 message = f" ! CalledProcessError 'survexport' in runcavern3() at {file3d}."
-                stash_data_issue(parser="entrances", message=message)
+                stash_data_issue(parser="positions", message=message)
                 print(message)
         else:
             message = f" ! Failed to find {file3d} so aborting generation of new .pos, using old one if present"
-            stash_data_issue(parser="entrances", message=message)
+            stash_data_issue(parser="positions", message=message)
             print(message)
 
     topdata = os.fspath(Path(settings.SURVEX_DATA) / settings.SURVEX_TOPNAME)
@@ -183,10 +187,13 @@ def LoadPositions():
     for pt in MapLocations().points():
         svxid, number, point_type, label = pt
         mappoints[svxid] = True
+        if svxid =="1":
+            print(f"BOGUS {pt}") # this is now checked for when importing the entrance tags in parsers/caves.py
 
     if not Path(pospath).is_file():
         message = f" ! Failed to find {pospath} so aborting generation of entrance locations. "
-        stash_data_issue(parser="entrances", message=message)
+        # DataIssue.objects.create(parser="positions", message=message, url=f"/entrance_data/{pospath}_edit")
+        stash_data_issue(parser="positions", message=message)
         print(message)
         return
 
@@ -202,7 +209,7 @@ def LoadPositions():
         # except:
             # message = " ! FAILED to find root SurvexBlock"
             # print(message)
-            # stash_data_issue(parser="entrances", message=message)
+            # stash_data_issue(parser="positions", message=message)
             # raise
     sbdict = {}
     dups = 0
@@ -216,35 +223,37 @@ def LoadPositions():
                 dups += 1
                 message = f" ! DUPLICATE SurvexBlock identifier in .pos file '{sbid}'\n{sbs[sbid]}\n{lineno} / {line}"
                 print(message)
-                stash_data_issue(parser="entrances", message=message)
+                stash_data_issue(parser="positions", message=message)
             else:
                 sbdict[sbid] = lineno
  
             for sid in mappoints:
                 if sbid.endswith(sid):
                     blockpath = "." + sbid[: -len(sid)].strip(".") # only the most recent one that is mappoints
+                    # print(f"# match {sid} {sbid} {blockpath}")
+                    
                     # But why are we doing this? Why do we want the survexblock id for each of these ?
                     # ..because mostly they don't actually appear in any SVX file. We should match them up
                     # via the cave data, not by this half-arsed syntactic match which almost never works. PMS.
                     
                     # We are reading the .pos file so we only know the SurvexFile not the SurvexBlock.
-                    # ghastly.
-                    if False:
-                        try:
-                            sbqs = SurvexBlock.objects.filter(survexpath=blockpath)
-                            if len(sbqs) == 1:
-                                sbqs[0]
-                            if len(sbqs) > 1:
-                                message = f" ! MULTIPLE {len(sbqs):3} SurvexBlocks '{blockpath}' from survex files mention Entrance point  '{sbid}' (line {lineno})"
-                                print(message)
-                                stash_data_issue(parser="entrances", message=message)
-                                for b in sbqs:
-                                    print(f" - {b}")
-                                sbqs[0]
-                        except:
-                            message = f" ! {lineno} FAIL in getting SurvexBlock matching Entrance point {blockpath} {sid}"
-                            print(message)
-                            stash_data_issue(parser="entrances", message=message)
+                    
+                    # if False:
+                        # try:
+                            # sbqs = SurvexBlock.objects.filter(survexpath=blockpath)
+                            # if len(sbqs) == 1:
+                                # sbqs[0]
+                            # if len(sbqs) > 1:
+                                # message = f" ! MULTIPLE {len(sbqs):3} SurvexBlocks '{blockpath}' from survex files mention Entrance point  '{sbid}' (line {lineno})"
+                                # print(message)
+                                # stash_data_issue(parser="positions", message=message)
+                                # for b in sbqs:
+                                    # print(f" - {b}")
+                                # sbqs[0]
+                        # except:
+                            # message = f" ! {lineno} FAIL in getting SurvexBlock matching Entrance point {blockpath} {sid}"
+                            # print(message)
+                            # stash_data_issue(parser="positions", message=message)
                     try:
                         ss = SurvexStation(name=sbid)
                         ss.x = float(x)
@@ -255,7 +264,8 @@ def LoadPositions():
                     except:
                         message = f" ! {lineno} FAIL to create SurvexStation Entrance point {blockpath} {sid}"
                         print(message)
-                        stash_data_issue(parser="entrances", message=message)
+                        stash_data_issue(parser="positions", message=message)
+                        store_data_issues()
                         raise
     print(f" -  {found} SurvexStation entrances found.")
     print(f" -  {dups} Duplicated SurvexStation entrances found")
diff --git a/parsers/survex.py b/parsers/survex.py
index 2be2217..4ee5f0e 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -2558,7 +2558,6 @@ def LoadSurvexBlocks():
     DataIssue.objects.filter(parser="survexleg").delete()
     DataIssue.objects.filter(parser="survexunits").delete()
     DataIssue.objects.filter(parser="survex team").delete()
-    DataIssue.objects.filter(parser="entrances").delete()
     DataIssue.objects.filter(parser="xEntrances").delete()
     print("  - survex Data Issues flushed")
     mem1 = get_process_memory()