Cleaning up entrance importing robustly

2023-07-26 00:14:46 +03:00 · 2023-07-26 00:14:46 +03:00 · fab7adf079
commit fab7adf079
parent 72a6b091e6
3 changed files with 76 additions and 33 deletions
--- a/parsers/caves.py
+++ b/parsers/caves.py
@ -91,9 +91,12 @@ def set_dummy_entrance(id, slug, cave, msg="DUMMY"):
        # I am not sure if we want  entrances automagically created.          
        # Therefore I have commented it out. MJG
-        # entrance = dummy_entrance(id, slug, msg="DUMMY")
+        # Automagic  restored, as it creates a load of error message in the DataIssue log. 2023-07-25
-        # entrances_xslug[slug] = entrance
+        # pending doing this properly, ie. properly creating an XML file for each of these pending caves.
-        # CaveAndEntrance.objects.update_or_create(cave=cave, entrance_letter="", entrance=entrance)
+        
        entrance = dummy_entrance(id, slug, msg="DUMMY")
        entrances_xslug[slug] = entrance
        CaveAndEntrance.objects.update_or_create(cave=cave, entrance_letter="", entrance=entrance)
        pass
    except:
        message = f' ! Entrance Dummy setting failure, slug:"{slug}" cave id :"{id}" '
@ -343,6 +346,22 @@ def boolify(boolstrs):
            "False": False,
            "true": True,
            "false": False}[boolstrs[0]]
 def validate_station(station):
    """It is possible to break troggle entirely by getting this wrong.
    These station identifiers are matched against other statsions using .endswith()
    in parsers/locations.py
    so a simple number here will match hundreds of SUrvexStation objects
    It should be, e.g. "1623.p240"
    """
    if station == "":
        return True
    dot = station.find(".")
    if dot == -1:
        # no full stop found. Bad station identifier.
        raise
    else:
        return True
 def read_entrance(filename, ent=None):
    """Reads an entrance description from the .html file.
@ -440,6 +459,17 @@ def read_entrance(filename, ent=None):
    ent.underground_description=underground_description[0]
    ent.url=url[0]
    for st in [ent.exact_station, ent.other_station, ent.tag_station]:
        try:
            validate_station(st)
        except:
            message = f" ! BAD ENTRANCE TAG '{st}' in '{filename}'. Must format like '1623.p204'. Edit file manually, click."
            #http://localhost:8000/1623/2023-EBH-01/1623-2023-EBH-01:1623-2023-EBH-01_entrance_edit
            DataIssue.objects.create(parser="entrances", message=message, url=f"/1623/{slug}/{slug}:{slug}_entrance_edit")
            print(message)
            # ent_issues = DataIssue.objects.filter(parser="entrances")
            # print(f".. We now have  {len(ent_issues)} entrance DataIssues")
            return None
    ent.save()
    return ent
@ -704,6 +734,10 @@ def readcaves():
        print(" - Reading Entrances from entrance descriptions xml files")
        for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]:  # Should be a better way of getting a list of files
            read_entrance(filename)
        # WHy is this needed ? Without it, we lose these DataIssues!
        ent_issues = DataIssue.objects.filter(parser="entrances")
        print(f"__ We now have  {len(ent_issues)} entrance DataIssues")
        print(" - Reading Caves from cave descriptions xml files")
        for filename in next(os.walk(CAVEDESCRIPTIONS))[2]:  # Should be a better way of getting a list of files
--- a/parsers/locations.py
+++ b/parsers/locations.py
@ -53,15 +53,16 @@ class MapLocations(object):
                    k = ent.caveandentrance_set.all()[0].cave
                except:
                    message = f" ! Failed to get Cave linked to Entrance:{ent.name} from:{ent.filename} best:{ent.best_station()} {ent.caveandentrance_set.all()}"
-                    stash_data_issue(parser="entrances", message=message)
+                    stash_data_issue(parser="positions", message=message)
                    print(message)
                    continue  # skip this entrance
                try:
                    areaName = k.getArea().short_name
                except:
                    message = f" ! Failed to get Area on cave '{k}' linked to Entrance:{ent.name} from:{ent.filename} best:{ent.best_station()}"
-                    stash_data_issue(parser="entrances", message=message)
+                    stash_data_issue(parser="positions", message=message)
                    print(message)
                    store_data_issues()
                    raise
                self.p.append((ent.best_station(), f"{areaName}-{str(ent)[5:]}", ent.needs_surface_work(), str(ent)))
        message = f" -  {len(self.p)} entrances linked to caves."
@ -83,6 +84,9 @@ def LoadPositions():
    """
    svx_t = 0
    d3d_t = 0
    DataIssue.objects.filter(parser="positions").delete()
    def runcavern3d():
        outputdir = Path(str(f"{topdata}.svx")).parent
@ -100,7 +104,7 @@ def LoadPositions():
            )  # check=False means exception not raised
            if sp.returncode != 0:
                message = f" ! Error: cavern: creating {file3d} in runcavern3()"
-                stash_data_issue(parser="entrances", message=message)
+                stash_data_issue(parser="positions", message=message)
                print(message)
                # find the errors in the 1623.log file
@ -108,17 +112,17 @@ def LoadPositions():
                    ["grep", "error:", f"{topdata}.log"], capture_output=True, check=False, text=True
                )  # check=False means exception not raised
                message = f" ! Error: cavern: {sp.stdout} creating {file3d} "
-                stash_data_issue(parser="entrances", message=message)
+                stash_data_issue(parser="positions", message=message)
                print(message)
        except:
            message = f" ! CalledProcessError 'cavern' in runcavern3() at {topdata}."
-            stash_data_issue(parser="entrances", message=message)
+            stash_data_issue(parser="positions", message=message)
            print(message)
            if file3d.is_file():
                message = f" ! CalledProcessError. File permissions {file3d.stat().st_mode} on {str(file3d)}"
-                stash_data_issue(parser="entrances", message=message)
+                stash_data_issue(parser="positions", message=message)
                print(message)
        if file3d.is_file():  # might be an old one though
@ -140,11 +144,11 @@ def LoadPositions():
                    )
            except:
                message = f" ! CalledProcessError 'survexport' in runcavern3() at {file3d}."
-                stash_data_issue(parser="entrances", message=message)
+                stash_data_issue(parser="positions", message=message)
                print(message)
        else:
            message = f" ! Failed to find {file3d} so aborting generation of new .pos, using old one if present"
-            stash_data_issue(parser="entrances", message=message)
+            stash_data_issue(parser="positions", message=message)
            print(message)
    topdata = os.fspath(Path(settings.SURVEX_DATA) / settings.SURVEX_TOPNAME)
@ -183,10 +187,13 @@ def LoadPositions():
    for pt in MapLocations().points():
        svxid, number, point_type, label = pt
        mappoints[svxid] = True
        if svxid =="1":
            print(f"BOGUS {pt}") # this is now checked for when importing the entrance tags in parsers/caves.py
    if not Path(pospath).is_file():
        message = f" ! Failed to find {pospath} so aborting generation of entrance locations. "
-        stash_data_issue(parser="entrances", message=message)
+        # DataIssue.objects.create(parser="positions", message=message, url=f"/entrance_data/{pospath}_edit")
        stash_data_issue(parser="positions", message=message)
        print(message)
        return
@ -202,7 +209,7 @@ def LoadPositions():
        # except:
            # message = " ! FAILED to find root SurvexBlock"
            # print(message)
-            # stash_data_issue(parser="entrances", message=message)
+            # stash_data_issue(parser="positions", message=message)
            # raise
    sbdict = {}
    dups = 0
@ -216,35 +223,37 @@ def LoadPositions():
                dups += 1
                message = f" ! DUPLICATE SurvexBlock identifier in .pos file '{sbid}'\n{sbs[sbid]}\n{lineno} / {line}"
                print(message)
-                stash_data_issue(parser="entrances", message=message)
+                stash_data_issue(parser="positions", message=message)
            else:
                sbdict[sbid] = lineno
            for sid in mappoints:
                if sbid.endswith(sid):
                    blockpath = "." + sbid[: -len(sid)].strip(".") # only the most recent one that is mappoints
                    # print(f"# match {sid} {sbid} {blockpath}")
                    # But why are we doing this? Why do we want the survexblock id for each of these ?
                    # ..because mostly they don't actually appear in any SVX file. We should match them up
                    # via the cave data, not by this half-arsed syntactic match which almost never works. PMS.
                    # We are reading the .pos file so we only know the SurvexFile not the SurvexBlock.
-                    # ghastly.
+                    
-                    if False:
+                    # if False:
-                        try:
+                        # try:
-                            sbqs = SurvexBlock.objects.filter(survexpath=blockpath)
+                            # sbqs = SurvexBlock.objects.filter(survexpath=blockpath)
-                            if len(sbqs) == 1:
+                            # if len(sbqs) == 1:
-                                sbqs[0]
+                                # sbqs[0]
-                            if len(sbqs) > 1:
+                            # if len(sbqs) > 1:
-                                message = f" ! MULTIPLE {len(sbqs):3} SurvexBlocks '{blockpath}' from survex files mention Entrance point  '{sbid}' (line {lineno})"
+                                # message = f" ! MULTIPLE {len(sbqs):3} SurvexBlocks '{blockpath}' from survex files mention Entrance point  '{sbid}' (line {lineno})"
-                                print(message)
+                                # print(message)
-                                stash_data_issue(parser="entrances", message=message)
+                                # stash_data_issue(parser="positions", message=message)
-                                for b in sbqs:
+                                # for b in sbqs:
-                                    print(f" - {b}")
+                                    # print(f" - {b}")
-                                sbqs[0]
+                                # sbqs[0]
-                        except:
+                        # except:
-                            message = f" ! {lineno} FAIL in getting SurvexBlock matching Entrance point {blockpath} {sid}"
+                            # message = f" ! {lineno} FAIL in getting SurvexBlock matching Entrance point {blockpath} {sid}"
-                            print(message)
+                            # print(message)
-                            stash_data_issue(parser="entrances", message=message)
+                            # stash_data_issue(parser="positions", message=message)
                    try:
                        ss = SurvexStation(name=sbid)
                        ss.x = float(x)
@ -255,7 +264,8 @@ def LoadPositions():
                    except:
                        message = f" ! {lineno} FAIL to create SurvexStation Entrance point {blockpath} {sid}"
                        print(message)
-                        stash_data_issue(parser="entrances", message=message)
+                        stash_data_issue(parser="positions", message=message)
                        store_data_issues()
                        raise
    print(f" -  {found} SurvexStation entrances found.")
    print(f" -  {dups} Duplicated SurvexStation entrances found")
--- a/parsers/survex.py
+++ b/parsers/survex.py
@ -2558,7 +2558,6 @@ def LoadSurvexBlocks():
    DataIssue.objects.filter(parser="survexleg").delete()
    DataIssue.objects.filter(parser="survexunits").delete()
    DataIssue.objects.filter(parser="survex team").delete()
    DataIssue.objects.filter(parser="entrances").delete()
    DataIssue.objects.filter(parser="xEntrances").delete()
    print("  - survex Data Issues flushed")
    mem1 = get_process_memory()