From 53e03d54624dabf5b609e4758df5351712178aa0 Mon Sep 17 00:00:00 2001
From: "Philip Sargent @ okchai" <philip.sargent@gmail.com>
Date: Tue, 26 Aug 2025 19:13:34 +0100
Subject: [PATCH] Much regex engineering, used co pilot

---
 parsers/survex.py | 62 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 45 insertions(+), 17 deletions(-)

diff --git a/parsers/survex.py b/parsers/survex.py
index c4ba3bb..74d085d 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -309,10 +309,47 @@ class LoadingSurvex:
     #   remember there is also QM_PATTERN used in views.other and set in settings.py    
 
     rx_qm = re.compile(
-        r"^\s*QM(\d+)\s+([A-DVXa-dvx?])\s+([\w\-\_]+\.)?(([\w\.\-]+))\s*(\-|([\w\-]+)(\.([\w\.\-]+))?)(\s+(.*))$"
+		r"^\s*QM(\d+)\s+([A-DVXa-dvx?])\s+([\w\-_]+\.[\w.\-]+)\s*(\-|[\w\-]+\.[\w.\-]+)(\s+.*)?$"
     )    
-    # This regex matches a QM survey line where station identifiers and locations may be optional or placeholders.
+    """
+    Regex explanation for:
+	    r"^\s*QM(\d+)\s+([A-DVXa-dvx?])\s+([\w\-_]+\.[\w.\-]+)\s*(\-|[\w\-]+\.[\w.\-]+)(\s+.*)?$"
 
+    Purpose:
+    Matches a structured line beginning with "QM", followed by a numeric ID, a status/type code,
+    two instances of a survey station identifier (with different roles), and optional trailing content.
+
+    Capturing Groups:
+    (1) (\d+)  
+	    - One or more digits following "QM" — typically a unique identifier or record number.
+
+    (2) ([A-DVXa-dvx?])  
+	    - A single character from the set A-D, V, X, a-d, v, x, or '?' — the grade.
+
+    (3) ([\w\-_]+\.[\w.\-]+)  
+	    - **Survey Station Identifier** — a compound name with a dot, such as "alpha.1" or "zone-3.site".
+	    - Represents the **primary station** involved in the record.
+
+    (4) (\-|[\w\-]+\.[\w.\-]+)  
+	    - Either a single hyphen "-" (indicating an UNTICKED QM), or another **Survey Station Identifier**.
+	    - Represents the **secondary station** which is the continuation of the passage as a ticked-off QM.
+	    - Example: "beta.1", "ref-1.zone".
+
+    (5) (\s+.*)?  
+	    - Optional trailing content preceded by whitespace.
+	    - Captures any additional notes, comments, or metadata.
+
+    Anchors and Whitespace:
+    ^       : Start of line  
+    \s*     : Optional leading whitespace  
+    \s+     : Required spacing between tokens  
+    $       : End of line  
+
+    Example Match:
+    "QM1    B    arcticenema.5   -    ongoing crawl"
+    """
+
+	# rx_starref
     """
     Regular expression explanation for rx_starref (MS CoPilot) - Not actually USED any more ?!
 
@@ -1488,7 +1525,8 @@ class LoadingSurvex:
         self.currentsurvexfile = self.stacksvxfiles.pop()
         
     def TickSurvexQM(self, survexblock, qmtick):
-        """Interpret the specially formatted comment which is a QM TICKED statement"""
+        """Interpret the specially formatted comment which is a QM TICKED statement
+	This is now not used, we have abandoned this experiment."""
         # Now we need to find the correct QM object. It will be in the same block and have the same number.
 
         try:
@@ -1515,10 +1553,6 @@ class LoadingSurvex:
 
     def LoadSurvexQM(self, survexblock, qmline):
         """Interpret the specially formatted comment which is a QM definition"""
-#       r"(?i)^\s*QM(\d+)\s+(.+)\s+([\w\-\_]+)\.([\w\.\-]+)\s+(([\w\-]+)\.([\w\.\-]+)|\-)\s+(.+)$"
-#       r"(?i)^\s*QM(\d+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+(.+)$"
-#       rx_qm_tick QMnn TICK date comment
-#       (r"(?i)^\s*QM(\d+)\s+TICK\s([\d\-]+)\s(.*)$")
 
         insp = self.insp
         # create a short, hopefully-unique name for this block to be used in the QM id
@@ -1531,7 +1565,7 @@ class LoadingSurvex:
         
         qm_no = qmline.group(1)  # this is NOT unique across multiple survex files
         qm_grade = qmline.group(2).strip().upper() #  TICK or [a-dA-DvVxX?]
-        if qm_grade == "TICK":
+        if qm_grade == "TICK": # not used now
             self.TickSurvexQM(survexblock, qmline)
             return
 
@@ -1542,25 +1576,19 @@ class LoadingSurvex:
                 parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
             )
         qm_nearest = qmline.group(3)
-        # if qmline.group(3):  # usual closest survey station
-            # qm_nearest = qmline.group(3)
-            # if qmline.group(4):
-                # qm_nearest = qm_nearest + "." + qmline.group(4)
 
         resolution_station_name = qmline.group(4)
         if (resolution_station_name=="-"): 
             pass
         else:
             qm_ticked = True
-            # print(f"{survexblock.survexfile.cave} {survexblock}:{qm_no}{qm_grade} {qmline.group(4)}", file=sys.stderr)
+            # print(f"{survexblock.survexfile.cave} {survexblock}:{qm_no=}{qm_grade=} {qmline.group(4)=}", file=sys.stderr)
             
         qm_notes = qmline.group(5)
-        # qm_notes = qmline.group(8)
-
         # Spec of QM in SVX files:
         # ;Serial number   grade(A/B/C/D/V/X)  nearest-station  resolution-station description
         # ;QM1	a	hobnob_hallway_2.42	hobnob-hallway_3.42	junction of keyhole passage
-        # ;QM1	a	hobnob_hallway_2.42	-	junction of keyhole passage
+        # ;QM1	a	hobnob_hallway_2.42	-	                junction of keyhole passage
         
         #;QM1    A    B6 - see plan drawing there is definitely a QM 
 
@@ -1568,7 +1596,7 @@ class LoadingSurvex:
         # we would have to create one. But that is not obligatory and no QMs loaded from CSVs have one
 
         # Older troggle/CSV assumes a logbook entry 'found_by' for each QM, with a date.
-        # We don't need this anymore so we don't need to create a placeholder logbook entry.
+        # We don't use this anymore.
     
     
         if survexblock.survexfile.cave: