rearrange ref and comment detection

2024-11-25 16:51:54 +00:00 · 2020-06-24 22:46:18 +01:00 · 2020-06-24 22:46:18 +01:00 · 04f14c91f0
commit 04f14c91f0
parent 664c18ebbe
1 changed files with 191 additions and 175 deletions
--- a/parsers/survex.py
+++ b/parsers/survex.py
@ -38,7 +38,6 @@ class LoadSurvex():
    stardatadefault = {"type":"normal", "t":"leg", "from":0, "to":1, "tape":2, "compass":3, "clino":4}
    stardataparamconvert = {"length":"tape", "bearing":"compass", "gradient":"clino"}

-    rx_braskets= re.compile(r"[()]")
    rx_linelen = re.compile(r"[\d\-+.]+$")
    rx_team    = re.compile(r"(?i)(Insts|Notes|Tape|Dog|Useless|Pics|Helper|Disto|Consultant)\s+(.*)$")
    rx_person  = re.compile(r"(?i) and | / |, | & | \+ |^both$|^none$")
@ -46,21 +45,15 @@ class LoadSurvex():
 #   remember there is also QM_PATTERN used in views_other and set in settings.py

    rx_comment = re.compile(r"([^;]*?)\s*(?:;\s*(.*))?\n?$")
-    rx_ref     = re.compile(r'.*?ref.*?(\d+)\s*#\s*(X)?\s*(\d+)')
+    rx_ref     = re.compile(r'^\s*ref[\s.:]*(\d+)\s*#\s*(X)?\s*(\d+)')
    rx_star    = re.compile(r'\s*\*[\s,]*(\w+)\s*(.*?)\s*(?:;.*)?$')
-    # years from 1960 to 2039
    rx_starref = re.compile(r'(?i)^\s*\*ref[\s.:]*((?:19[6789]\d)|(?:20[0123]\d))\s*#?\s*(X)?\s*(.*?\d+.*?)$')
-    # rx_starref = re.compile("""?x   # VERBOSE mode - can't get this to work
-    # ^\s*\*ref       # look for *ref at start of line
-    # [\s.:]*         # some spaces, stops or colons
-    # ((?:19[6789]\d)|(?:20[0123]\d)) # a date from 1960 to 2039 - captured as one field
-    # \s*#            # spaces then hash separator 
-    # ?\s*(X)         # optional X - captured
-    # ?\s*(.*?\d+.*?) # maybe a space, then at least one digit in the string - captured
-    # $(?i)""", re.X) # the end  (do the whole thing case insensitively)
+    rx_argsref = re.compile(r'(?i)^[\s.:]*((?:19[6789]\d)|(?:20[0123]\d))\s*#?\s*(X)?\s*(.*?\d+.*?)$')

    survexlegsalllength = 0.0
    survexlegsnumber = 0
+    depthbegin = 0
+    lineno = 0
    insp = ""
    callcount = 0
    stardata ={}
@ -112,16 +105,21 @@ class LoadSurvex():
                survexblock.expeditionday = survexblock.expedition.get_expedition_day(survexblock.date)
                survexblock.save()

-    def LoadSurvexLineLeg(self, survexblock, stardata, sline, comment):
+    def LoadSurvexLineLeg(self, survexblock, svxline, sline, comment):
        """This reads compass, clino and tape data but only keeps the tape lengths,
        the rest is discarded after error-checking.
        """
-        ls = sline.lower().split()
+        stardata = self.stardata
        survexleg = SurvexLeg()
+
+        ls = sline.lower().split()
        # this next fails for two surface survey svx files which use / for decimal point 
        # e.g. '29/09' in the tape measurement, or use decimals but in brackets, e.g. (06.05)
-        if stardata["type"] == "normal":
-            tape = self.rx_braskets.sub("",ls[stardata["tape"]])
+        if stardata["type"] == "normal": # should use current flags setting for this
+            # print(" !! lineno '{}'\n !! svxline '{}'\n !! sline '{}'\n !! ls '{}'\n !! stardata {}".format(self.lineno, svxline, sline, ls,stardata))
+            tape = ls[stardata["tape"]]
+            tape = tape.replace("(",".")
+            tape = tape.replace(")",".")
            tape = tape.replace("/",".")
            try:
                survexleg.tape = float(tape)
@ -133,6 +131,13 @@ class LoadSurvex():
                message = ' ! Value Error: Tape misread in line %s in %s' % (ls, survexblock.survexfile.path)
                models.DataIssue.objects.create(parser='survex', message=message)
                survexleg.tape = 0
+            try:
+                survexblock.totalleglength += survexleg.tape
+                self.survexlegsalllength   += survexleg.tape
+            except ValueError:
+                message = ' ! Value Error: Tape length not added  %s in %s' % (ls, survexblock.survexfile.path)
+                models.DataIssue.objects.create(parser='survex', message=message)
+
            try:
                lclino = ls[stardata["clino"]]
            except:
@ -142,6 +147,7 @@ class LoadSurvex():
                message = ' ! Value Error: Clino misread in line %s in %s' % (ls, survexblock.survexfile.path)
                models.DataIssue.objects.create(parser='survex', message=message)
                lclino = error
+
            try:
                lcompass = ls[stardata["compass"]]
            except:
@ -151,6 +157,7 @@ class LoadSurvex():
                message = ' ! Value Error: Compass misread in line %s in %s' % (ls, survexblock.survexfile.path)
                models.DataIssue.objects.create(parser='survex', message=message)
                lcompass = error
+
            if lclino == "up":
                survexleg.compass = 0.0
                survexleg.clino = 90.0
@ -176,22 +183,23 @@ class LoadSurvex():
            # delete the object so that django autosaving doesn't save it.
            survexleg = None

-        itape = stardata.get("tape")
-        if itape:
-            try:
-                survexblock.totalleglength += float(ls[itape])
-                self.survexlegsalllength += float(ls[itape])
-            except ValueError:
-                print("! Length not added")
+    def LoadSurvexRef(self, survexblock, args):
+        # *REF but also ; Ref      years from 1960 to 2039
+        if len(args)< 4:
+            message = " ! Empty or BAD *REF command '{}' at {}".format(args, survexblock.survexfile.path)
+            print((self.insp+message))
+            models.DataIssue.objects.create(parser='survex', message=message)
+            return

+        argsgps = self.rx_argsref.match(args)
+        if argsgps:
+            yr, letterx, wallet = argsgps.groups()
+        else:
+            message = " ! BAD *REF command '{}' at {}".format(args, survexblock.survexfile.path)
+            print((self.insp+message))
+            models.DataIssue.objects.create(parser='survex', message=message)
+            return

-    def LoadSurvexLinePassage(self, survexblock, stardata, sline, comment):
-        # do not import this: *data passage.. data which is LRUD not tape/compass/clino
-        pass
-        
-    def LoadSurvexRef(self, survexblock, mstar):
-        # *REF but also ; Ref
-        yr,letterx,wallet = mstar.groups()
        if not letterx:
            letterx = ""
        else:
@ -199,23 +207,32 @@ class LoadSurvex():
        if len(wallet)<2:
            wallet = "0" + wallet
        assert (int(yr)>1960 and int(yr)<2039), "Wallet year out of bounds: %s" % yr
-        assert (int(wallet)<100), "Wallet number more than 100: %s" % wallet
        refscan = "%s#%s%s" % (yr, letterx, wallet)
+        try:
+            if int(wallet)>100:
+                message = " ! Wallet *REF {} - too big {}".format(refscan, survexblock.survexfile.path)
+                print((self.insp+message))
+                models.DataIssue.objects.create(parser='survex', message=message)
+        except:
+            message = " ! Wallet *REF {} - not numeric {}".format(refscan, survexblock.survexfile.path)
+            print((self.insp+message))
+            models.DataIssue.objects.create(parser='survex', message=message)
        manyscansfolders = models_survex.ScansFolder.objects.filter(walletname=refscan)
        if manyscansfolders:
            survexblock.scansfolder = manyscansfolders[0]
            survexblock.save()
            if len(manyscansfolders) > 1:
-                message = ' ! Wallet *REF {} - multiple scan folders found {}'.format(refscan, survexblock.survexfile.path)
+                message = " ! Wallet *REF {} - multiple scan folders found {}".format(refscan, survexblock.survexfile.path)
                print((self.insp+message))
                models.DataIssue.objects.create(parser='survex', message=message)
        else:
-            message = ' ! Wallet *REF {} - NOT found in manyscansfolders {}'.format(refscan, survexblock.survexfile.path)
+            message = " ! Wallet *REF {} - NOT found in manyscansfolders {}".format(refscan, survexblock.survexfile.path)
            print((self.insp+message))
            models.DataIssue.objects.create(parser='survex', message=message)


-    def LoadSurvexQM(self, insp, survexblock, qmline):
+    def LoadSurvexQM(self, survexblock, qmline):
+        insp = self.insp
        qm_no = qmline.group(1)
        qm_grade = qmline.group(2)
        qm_from_section = qmline.group(3)
@ -261,28 +278,33 @@ class LoadSurvex():
            models.DataIssue.objects.create(parser='survex', message=message)
            pass

+    def LoadSurvexComment(self, survexblock, comment):
+        # ignore all comments except ;ref and ;QM
+        refline = self.rx_ref.match(comment)
+        if refline:
+            comment = comment.replace("ref","").strip()
+            self.LoadSurvexRef(survexblock, comment)

-    def RecursiveLoad(self,survexblock, survexfile, fin):
-        """Follows the *include links in all the survex files from the root file 1623.svx
-        and reads in the survex blocks, other data and the wallet references (scansfolder) as it
-        goes. This part of the data import process is where the maximum memory is used and where it
-        crashes on memory-constrained machines. Begin-end blocks may also be nested.
-        """
-        iblankbegins = 0
-        stardata = self.stardatadefault
-        insp =self.insp
+        qmline = self.rx_qm.match(comment)
+        if qmline:
+            self.LoadSurvexQM(survexblock, qmline)
+
+    def LoadSurvexFlags(self, line, cmd):
+        # Here we could set on/off 'splay', 'not splay', 'surface', 'not surface', or 'duplicate'
+        # but this data is only used for sense-checking not to actually calculate anything important
+        pass
+
+    def LoadSurvexSetup(self,survexblock, survexfile):
+        self.depthbegin = 0
+        self.stardata = self.stardatadefault
        blocklegs = self.survexlegsnumber
-
-        print(insp+"  - MEM:{:.3f} Reading. parent:{}  <> {} ".format(models.get_process_memory(),survexblock.survexfile.path,survexfile.path))
-        stamp = datetime.now()
-        lineno = 0
-        
+        print(self.insp+"  - MEM:{:.3f} Reading. parent:{}  <> {} ".format(models.get_process_memory(),survexblock.survexfile.path, survexfile.path))
+        self.lineno = 0
        sys.stderr.flush();
        self.callcount +=1
        if self.callcount >=10:
            self.callcount=0
            print(".", file=sys.stderr,end='')
-
        # Try to find the cave in the DB if not use the string as before
        path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", survexblock.survexfile.path)
        if path_match:
@ -290,150 +312,144 @@ class LoadSurvex():
            cave = models_caves.getCaveByReference(pos_cave)
            if cave:
                survexfile.cave = cave
-        svxlines = ''
+
+    def RecursiveLoad(self, survexblock, survexfile, fin):
+        """Follows the *include links in all the survex files from the root file 1623.svx
+        and reads in the survex blocks, other data and the wallet references (scansfolder) as it
+        goes. This part of the data import process is where the maximum memory is used and where it
+        crashes on memory-constrained machines. Begin-end blocks may also be nested.
+        """
+        self.LoadSurvexSetup(survexblock, survexfile)
+        insp =self.insp
+        previousnlegs = 0
+
        svxlines = fin.read().splitlines()
-        # cannot close file now as it may be recursively called with the same file id fin if nested *begin
-        # occurs. 
+        # cannot close file now as  may be recursively called with the same fin if nested *begin-end
+        
        for svxline in svxlines:
-            lineno += 1
-            # break the line at the comment
+            self.lineno += 1
            sline, comment = self.rx_comment.match(svxline.strip()).groups()
-            mref = comment and self.rx_ref.match(comment)
-            if mref:
-                self.LoadSurvexRef(survexblock, mref)
-
-            qmline = comment and self.rx_qm.match(comment)
-            if qmline:
-                self.LoadSurvexQM(insp, survexblock, qmline)
-
+            if comment:
+                self.LoadSurvexComment(survexblock, comment)
            if not sline:
-                continue
-
-            # detect the star ref command 
-            rstar = self.rx_starref.match(sline)
-            if rstar:
-                self.LoadSurvexRef(survexblock, rstar)
+                continue # skip blank lines

            # detect the star command
            mstar = self.rx_star.match(sline)
-            if not mstar:
-                if "from" in stardata:
-                    self.LoadSurvexLineLeg(survexblock, stardata, sline, comment)
-                    pass
-                elif stardata["type"] == "passage":
-                    pass
-                    #self.LoadSurvexLinePassage(survexblock, stardata, sline, comment)
-                    #Missing "station" in stardata.
-                continue
-
-            # detect the star command
-            cmd, line = mstar.groups()
-            cmd = cmd.lower()
-            if re.match("include$(?i)", cmd):
-                includepath = os.path.normpath(os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", line)))
-                print((insp+'   - Include path found, including - ' + includepath))
-                # Try to find the cave in the DB. if not, use the string as before
-                path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", includepath)
-                if path_match:
-                    pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2))
-                    print((insp+'    - Match in DB (i) for cave {}.'.format(pos_cave)))
-                    cave = models_caves.getCaveByReference(pos_cave)
-                    if cave:
-                        survexfile.cave = cave
-                else:
-                    print((insp+'    - NO Match in DB (i) for a cave for {}'.format(includepath)))
-
-                includesurvexfile = models_survex.SurvexFile(path=includepath)
-                includesurvexfile.save()
-                includesurvexfile.SetDirectory()
-                if includesurvexfile.exists():
-                    survexblock.save()
-                    fininclude = includesurvexfile.OpenFile()
-                    self.survexlegsnumber = blocklegs
-                    self.insp += "> "
-                    self.RecursiveLoad(survexblock, includesurvexfile, fininclude)
-                    #--------------------------------------------------------
-                    fininclude.close()
-                    self.insp = self.insp[2:]
-                    insp = self.insp
-                    blocklegs = self.survexlegsnumber
-                else:
-                    print((insp+'    ! ERROR *include file not found for %s' % includesurvexfile))
-
-            elif re.match("begin$(?i)", cmd):
-                # On a *begin statement we start a new survexblock.
-                # There should not be any *include inside  a begin-end block, so this is a simple
-                # load not a recursive fileload. But there may be many blocks nested to any depth in one file.
-                if line:
-                    newsvxpath = os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", line))
-                    # Try to find the cave in the DB if not use the string as before
-                    path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", newsvxpath)
+            if mstar: # yes we are reading a *cmd
+                cmd, args = mstar.groups()
+                cmd = cmd.lower()
+                if re.match("include$(?i)", cmd):
+                    includepath = os.path.normpath(os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", args)))
+                    print((insp+'   - INCLUDE-go path found, including - ' + includepath))
+                    # Try to find the cave in the DB. if not, use the string as before
+                    path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", includepath)
                    if path_match:
                        pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2))
-                        # print(insp+pos_cave)
+                        print((insp+'    - Match in DB (i) for cave {}.'.format(pos_cave)))
                        cave = models_caves.getCaveByReference(pos_cave)
                        if cave:
                            survexfile.cave = cave
                    else:
-                        print((insp+'    - No match (b) for %s' % newsvxpath))
+                        print((insp+'    - NO Match in DB (i) for a cave for {}'.format(includepath)))

-                    previousnlegs = blocklegs
-                    name = line.lower()
-                    print(insp+'   - Begin found for:{}, creating new SurvexBlock '.format(name))
-                    survexblockdown = models_survex.SurvexBlock(name=name, parent=survexblock, 
-                            survexpath=survexblock.survexpath+"."+name, 
-                            cave=survexfile.cave, survexfile=survexfile, 
-                            legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0)
-                    survexblockdown.save()
-                    survexblock.save()
-                    survexblock = survexblockdown
-                    print(insp+"   - ENTERING nested *begin/*end block: {}".format(name))
-                    self.survexlegsnumber = blocklegs
-                    self.insp += "> "
-                    self.RecursiveLoad(survexblockdown, survexfile, fin)
-                    #--------------------------------------------------------
-                    # do not close the file as there may be more blocks in this one
-                    # and it is re-read afresh with every nested begin-end block.
-                    self.insp = self.insp[2:]
-                    insp = self.insp
-                    blocklegs = self.survexlegsnumber
-                else:
-                    iblankbegins += 1
+                    includesurvexfile = models_survex.SurvexFile(path=includepath)
+                    includesurvexfile.save()
+                    includesurvexfile.SetDirectory()
+                    if includesurvexfile.exists():
+                        survexblock.save()
+                        self.insp += "> "
+                        #--------------------------------------------------------
+                        fininclude = includesurvexfile.OpenFile()
+                        self.RecursiveLoad(survexblock, includesurvexfile, fininclude)
+                        fininclude.close()
+                        #--------------------------------------------------------
+                        self.insp = self.insp[2:]
+                        insp = self.insp
+                        print((insp+'   - INCLUDE-return from include - ' + includepath))
+                    else:
+                        print((insp+'    ! ERROR *include file not found for %s' % includesurvexfile))

-            elif re.match("end$(?i)", cmd):
-                if iblankbegins:
-                    print(insp+"   - RETURNING from nested *begin/*end block: {}".format(line))
-                    iblankbegins -= 1
+                elif re.match("begin$(?i)", cmd):
+                    # On a *begin statement we start a new survexblock.
+                    # There should not be any *include inside  a begin-end block, so this is a simple
+                    # load not a recursive fileload. But there may be many blocks nested to any depth in one file.
+                    if args:
+                        newsvxpath = os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", args))
+                        # Try to find the cave in the DB if not use the string as before
+                        path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", newsvxpath)
+                        if path_match:
+                            pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2))
+                            # print(insp+pos_cave)
+                            cave = models_caves.getCaveByReference(pos_cave)
+                            if cave:
+                                survexfile.cave = cave
+                        else:
+                            print((insp+'    - No match (b) for %s' % newsvxpath))
+
+                        previousnlegs = self.survexlegsnumber
+                        name = args.lower()
+                        print(insp+'   - Begin found for:{}, creating new SurvexBlock '.format(name))
+                        # the recursive call re-reads the entire file. This is wasteful. We should pass in only 
+                        # the un-parsed part of the file.
+                        survexblockdown = models_survex.SurvexBlock(name=name, parent=survexblock, 
+                                survexpath=survexblock.survexpath+"."+name, 
+                                cave=survexfile.cave, survexfile=survexfile, 
+                                legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0)
+                        survexblockdown.save()
+                        survexblock.save()
+                        survexblock = survexblockdown
+                        print(insp+"   - BLOCK-enter nested *begin/*end block: '{}'".format(name))
+                        self.insp += "> "
+                        #--------------------------------------------------------
+                        self.RecursiveLoad(survexblockdown, survexfile, fin)
+                        #--------------------------------------------------------
+                        # do not close the file as there may be more blocks in this one
+                        # and it is re-read afresh with every nested begin-end block.
+                        self.insp = self.insp[2:]
+                        insp = self.insp
+                    else:
+                        self.depthbegin += 1
+
+                elif re.match("end$(?i)", cmd):
+                    if self.depthbegin:
+                        print(insp+"   - BLOCK-return from nested *begin/*end block: '{}'".format(args))
+                        self.depthbegin -= 1
+                    else:
+                        legsinblock = self.survexlegsnumber - previousnlegs
+                        print(insp+"  - LEGS: {} (previous: {}, now:{})".format(legsinblock,previousnlegs,self.survexlegsnumber))
+                        survexblock.legsall = legsinblock
+                        survexblock.save()
+                        return
+                elif cmd == "ref":
+                    self.LoadSurvexRef(survexblock, args)
+                elif cmd == "flags":
+                    self.LoadSurvexFlags(args, cmd)
+                elif cmd == "data":
+                    ls = args.lower().split()
+                    stardata = { "type":ls[0] }
+                    for i in range(0, len(ls)):
+                        stardata[self.stardataparamconvert.get(ls[i], ls[i])] = i - 1
+                        self.stardata = stardata
+                    if ls[0] in ["normal", "cartesian", "nosurvey"]:
+                        assert (("from" in stardata and "to" in stardata) or "station" in stardata), args
+                    elif ls[0] == "default":
+                        stardata = self.stardatadefault
+                    else:
+                        assert ls[0] == "passage", args
+                elif cmd == "set" and re.match("names(?i)", args):
+                    pass
+                elif re.match("date$(?i)", cmd):
+                    self.LoadSurvexDate(survexblock, args)
+                elif re.match("team$(?i)", cmd):
+                    self.LoadSurvexTeam(survexblock, args)
                else:
-                    legsinblock = self.survexlegsnumber - previousnlegs
-                    print(insp+"  - LEGS: {} (previous: {}, now:{})".format(legsinblock,previousnlegs,self.survexlegsnumber))
-                    survexblock.legsall = legsinblock
-                    survexblock.save()
-                    return
-            elif cmd == "flags":
-                # Here we could set on/off 'splay', 'not splay', 'surface', 'not surface', or 'duplicate'
-                # but this data is only used for sense-checking not to actually calculate anything important
-                pass
-            elif cmd == "data":
-                ls = line.lower().split()
-                stardata = { "type":ls[0] }
-                for i in range(0, len(ls)):
-                    stardata[self.stardataparamconvert.get(ls[i], ls[i])] = i - 1
-                    self.stardata = stardata
-                if ls[0] in ["normal", "cartesian", "nosurvey"]:
-                    assert (("from" in stardata and "to" in stardata) or "station" in stardata), line
-                elif ls[0] == "default":
-                    stardata = self.stardatadefault
+                    self.LoadSurvexIgnore(survexblock, args, cmd)
+            else: # not a *cmd so we are reading data OR rx_comment failed
+                if "from" in self.stardata: # only interested in survey legs
+                    self.LoadSurvexLineLeg(survexblock, svxline, sline, comment)
                else:
-                    assert ls[0] == "passage", line
-            elif cmd == "set" and re.match("names(?i)", line):
-                pass
-            elif re.match("date$(?i)", cmd):
-                self.LoadSurvexDate(survexblock, line)
-            elif re.match("team$(?i)", cmd):
-                self.LoadSurvexTeam(survexblock, line)
-            else:
-                self.LoadSurvexIgnore(survexblock, line, cmd)
+                    pass # ignore all other sorts of data


 def FindAndLoadAllSurvex(survexblockroot, survexfileroot):