AI comments on regexes

2025-12-17 09:47:24 +00:00 · 2025-01-09 21:59:27 +00:00
parent 5b97cd83dd
commit 219b8b792e
2 changed files with 66 additions and 0 deletions
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -139,6 +139,18 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
    # print(f'# {tid}')
    # print(f" -  {tid} '{trippeople}'  ")

+    """
+    re.split(r",|\+|&amp;|&(?!\w+;)| and ", trippeople)
+
+    ,             : The comma character
+    \+            : The plus sign (+); escaped to treat as a literal character
+    &amp;         : The literal string "&amp;" (HTML-encoded ampersand)
+    &(?!\w+;)     : An ampersand (&) not followed by one or more word characters (\w+) and a semicolon (;)
+                    : Uses negative lookahead assertion (?!...) to ensure it's not part of an HTML entity like "&nbsp;"
+     and          : The literal string " and " (with spaces before and after)
+
+    This will split the 'trippeople' string at any of these delimiters.
+    """
    for tripperson in re.split(r",|\+|&amp;|&(?!\w+;)| and ", trippeople):
        tripperson = tripperson.strip()
        # author_u = re.match(r"(?i)<u>(.*?)</u>$", tripperson)
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -298,7 +298,61 @@ class LoadingSurvex:
    rx_commteam = re.compile(r"(?i)\s*(Messteam|Zeichner)\s*[:]?(.*)")
    rx_quotedtitle = re.compile(r'(?i)^"(.*)"$')
    
+    """
+    Regular expression explanation for rx_starref (MS CoPilot)

+    (?i)        : Case-insensitive flag for the regex
+    ^           : Asserts the position at the start of a line
+    \s*         : Matches zero or more whitespace characters
+    \*ref       : Matches the literal string "*ref"
+    [\s.:]*     : Matches zero or more whitespace characters, periods, or colons
+
+    ((?:19[6789]\d)|(?:20[0123]\d))
+                 : Capturing group that matches a year in the 1960s-1990s or 2000s-2030s
+                 : (?:...) is a non-capturing group
+                 : 19[6789]\d matches years from 1960 to 1999
+                 : 20[0123]\d matches years from 2000 to 2039
+
+    \s*         : Matches zero or more whitespace characters
+    #?          : Matches zero or one "#" character
+    \s*         : Matches zero or more whitespace characters
+
+    (X)?        : Capturing group that optionally matches the character "X"
+    \s*         : Matches zero or more whitespace characters
+
+    (.*?\d+.*?) : Capturing group that matches any character sequence containing at least one digit
+                 : .*? matches any character (except newline), as few times as possible (non-greedy)
+                 : \d+ matches one or more digits
+                 : .*? matches any character (except newline), as few times as possible (non-greedy)
+
+    $           : Asserts the position at the end of a line
+
+    Regular expression explanation for rx_argsref
+
+    (?i)        : Case-insensitive flag for the regex
+    ^           : Asserts the position at the start of a line
+    [\s.:]*     : Matches zero or more whitespace characters, periods, or colons
+
+    ((?:19[6789]\d)|(?:20[012345]\d))
+                 : Capturing group that matches a year in the 1960s-1990s or 2000s-2050s
+                 : (?:...) is a non-capturing group
+                 : 19[6789]\d matches years from 1960 to 1999
+                 : 20[012345]\d matches years from 2000 to 2059
+
+    \s*         : Matches zero or more whitespace characters
+    #?          : Matches zero or one "#" character
+    \s*         : Matches zero or more whitespace characters
+
+    (X)?        : Capturing group that optionally matches the character "X"
+    \s*         : Matches zero or more whitespace characters
+
+    (.*?\d+.*?) : Capturing group that matches any character sequence containing at least one digit
+                 : .*? matches any character (except newline), as few times as possible (non-greedy)
+                 : \d+ matches one or more digits
+                 : .*? matches any character (except newline), as few times as possible (non-greedy)
+
+    $           : Asserts the position at the end of a
+    """
    
    
    # This interprets the survex "*data normal" command which sets out the order of the fields in the data, e.g.