From 53e03d54624dabf5b609e4758df5351712178aa0 Mon Sep 17 00:00:00 2001 From: "Philip Sargent @ okchai" Date: Tue, 26 Aug 2025 19:13:34 +0100 Subject: [PATCH] Much regex engineering, used co pilot --- parsers/survex.py | 62 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 45 insertions(+), 17 deletions(-) diff --git a/parsers/survex.py b/parsers/survex.py index c4ba3bb..74d085d 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -309,10 +309,47 @@ class LoadingSurvex: # remember there is also QM_PATTERN used in views.other and set in settings.py rx_qm = re.compile( - r"^\s*QM(\d+)\s+([A-DVXa-dvx?])\s+([\w\-\_]+\.)?(([\w\.\-]+))\s*(\-|([\w\-]+)(\.([\w\.\-]+))?)(\s+(.*))$" + r"^\s*QM(\d+)\s+([A-DVXa-dvx?])\s+([\w\-_]+\.[\w.\-]+)\s*(\-|[\w\-]+\.[\w.\-]+)(\s+.*)?$" ) - # This regex matches a QM survey line where station identifiers and locations may be optional or placeholders. + """ + Regex explanation for: + r"^\s*QM(\d+)\s+([A-DVXa-dvx?])\s+([\w\-_]+\.[\w.\-]+)\s*(\-|[\w\-]+\.[\w.\-]+)(\s+.*)?$" + Purpose: + Matches a structured line beginning with "QM", followed by a numeric ID, a status/type code, + two instances of a survey station identifier (with different roles), and optional trailing content. + + Capturing Groups: + (1) (\d+) + - One or more digits following "QM" — typically a unique identifier or record number. + + (2) ([A-DVXa-dvx?]) + - A single character from the set A-D, V, X, a-d, v, x, or '?' — the grade. + + (3) ([\w\-_]+\.[\w.\-]+) + - **Survey Station Identifier** — a compound name with a dot, such as "alpha.1" or "zone-3.site". + - Represents the **primary station** involved in the record. + + (4) (\-|[\w\-]+\.[\w.\-]+) + - Either a single hyphen "-" (indicating an UNTICKED QM), or another **Survey Station Identifier**. + - Represents the **secondary station** which is the continuation of the passage as a ticked-off QM. + - Example: "beta.1", "ref-1.zone". + + (5) (\s+.*)? + - Optional trailing content preceded by whitespace. + - Captures any additional notes, comments, or metadata. + + Anchors and Whitespace: + ^ : Start of line + \s* : Optional leading whitespace + \s+ : Required spacing between tokens + $ : End of line + + Example Match: + "QM1 B arcticenema.5 - ongoing crawl" + """ + + # rx_starref """ Regular expression explanation for rx_starref (MS CoPilot) - Not actually USED any more ?! @@ -1488,7 +1525,8 @@ class LoadingSurvex: self.currentsurvexfile = self.stacksvxfiles.pop() def TickSurvexQM(self, survexblock, qmtick): - """Interpret the specially formatted comment which is a QM TICKED statement""" + """Interpret the specially formatted comment which is a QM TICKED statement + This is now not used, we have abandoned this experiment.""" # Now we need to find the correct QM object. It will be in the same block and have the same number. try: @@ -1515,10 +1553,6 @@ class LoadingSurvex: def LoadSurvexQM(self, survexblock, qmline): """Interpret the specially formatted comment which is a QM definition""" -# r"(?i)^\s*QM(\d+)\s+(.+)\s+([\w\-\_]+)\.([\w\.\-]+)\s+(([\w\-]+)\.([\w\.\-]+)|\-)\s+(.+)$" -# r"(?i)^\s*QM(\d+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+(.+)$" -# rx_qm_tick QMnn TICK date comment -# (r"(?i)^\s*QM(\d+)\s+TICK\s([\d\-]+)\s(.*)$") insp = self.insp # create a short, hopefully-unique name for this block to be used in the QM id @@ -1531,7 +1565,7 @@ class LoadingSurvex: qm_no = qmline.group(1) # this is NOT unique across multiple survex files qm_grade = qmline.group(2).strip().upper() # TICK or [a-dA-DvVxX?] - if qm_grade == "TICK": + if qm_grade == "TICK": # not used now self.TickSurvexQM(survexblock, qmline) return @@ -1542,25 +1576,19 @@ class LoadingSurvex: parser="survex", message=message, url=None, sb=(survexblock.survexfile.path) ) qm_nearest = qmline.group(3) - # if qmline.group(3): # usual closest survey station - # qm_nearest = qmline.group(3) - # if qmline.group(4): - # qm_nearest = qm_nearest + "." + qmline.group(4) resolution_station_name = qmline.group(4) if (resolution_station_name=="-"): pass else: qm_ticked = True - # print(f"{survexblock.survexfile.cave} {survexblock}:{qm_no}{qm_grade} {qmline.group(4)}", file=sys.stderr) + # print(f"{survexblock.survexfile.cave} {survexblock}:{qm_no=}{qm_grade=} {qmline.group(4)=}", file=sys.stderr) qm_notes = qmline.group(5) - # qm_notes = qmline.group(8) - # Spec of QM in SVX files: # ;Serial number grade(A/B/C/D/V/X) nearest-station resolution-station description # ;QM1 a hobnob_hallway_2.42 hobnob-hallway_3.42 junction of keyhole passage - # ;QM1 a hobnob_hallway_2.42 - junction of keyhole passage + # ;QM1 a hobnob_hallway_2.42 - junction of keyhole passage #;QM1 A B6 - see plan drawing there is definitely a QM @@ -1568,7 +1596,7 @@ class LoadingSurvex: # we would have to create one. But that is not obligatory and no QMs loaded from CSVs have one # Older troggle/CSV assumes a logbook entry 'found_by' for each QM, with a date. - # We don't need this anymore so we don't need to create a placeholder logbook entry. + # We don't use this anymore. if survexblock.survexfile.cave: