AI comments on regexes

2026-02-08 13:18:15 +00:00 · 2025-01-09 21:59:27 +00:00
parent 5b97cd83dd
commit 219b8b792e
2 changed files with 66 additions and 0 deletions
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -139,6 +139,18 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
    # print(f'# {tid}')
    # print(f" -  {tid} '{trippeople}'  ")
    """
    re.split(r",|\+|&amp;|&(?!\w+;)| and ", trippeople)
    ,             : The comma character
    \+            : The plus sign (+); escaped to treat as a literal character
    &amp;         : The literal string "&amp;" (HTML-encoded ampersand)
    &(?!\w+;)     : An ampersand (&) not followed by one or more word characters (\w+) and a semicolon (;)
                    : Uses negative lookahead assertion (?!...) to ensure it's not part of an HTML entity like "&nbsp;"
     and          : The literal string " and " (with spaces before and after)
    This will split the 'trippeople' string at any of these delimiters.
    """
    for tripperson in re.split(r",|\+|&amp;|&(?!\w+;)| and ", trippeople):
        tripperson = tripperson.strip()
        # author_u = re.match(r"(?i)<u>(.*?)</u>$", tripperson)
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -298,7 +298,61 @@ class LoadingSurvex:
    rx_commteam = re.compile(r"(?i)\s*(Messteam|Zeichner)\s*[:]?(.*)")
    rx_quotedtitle = re.compile(r'(?i)^"(.*)"$')
    """
    Regular expression explanation for rx_starref (MS CoPilot)
    (?i)        : Case-insensitive flag for the regex
    ^           : Asserts the position at the start of a line
    \s*         : Matches zero or more whitespace characters
    \*ref       : Matches the literal string "*ref"
    [\s.:]*     : Matches zero or more whitespace characters, periods, or colons
    ((?:19[6789]\d)|(?:20[0123]\d))
                 : Capturing group that matches a year in the 1960s-1990s or 2000s-2030s
                 : (?:...) is a non-capturing group
                 : 19[6789]\d matches years from 1960 to 1999
                 : 20[0123]\d matches years from 2000 to 2039
    \s*         : Matches zero or more whitespace characters
    #?          : Matches zero or one "#" character
    \s*         : Matches zero or more whitespace characters
    (X)?        : Capturing group that optionally matches the character "X"
    \s*         : Matches zero or more whitespace characters
    (.*?\d+.*?) : Capturing group that matches any character sequence containing at least one digit
                 : .*? matches any character (except newline), as few times as possible (non-greedy)
                 : \d+ matches one or more digits
                 : .*? matches any character (except newline), as few times as possible (non-greedy)
    $           : Asserts the position at the end of a line
    Regular expression explanation for rx_argsref
    (?i)        : Case-insensitive flag for the regex
    ^           : Asserts the position at the start of a line
    [\s.:]*     : Matches zero or more whitespace characters, periods, or colons
    ((?:19[6789]\d)|(?:20[012345]\d))
                 : Capturing group that matches a year in the 1960s-1990s or 2000s-2050s
                 : (?:...) is a non-capturing group
                 : 19[6789]\d matches years from 1960 to 1999
                 : 20[012345]\d matches years from 2000 to 2059
    \s*         : Matches zero or more whitespace characters
    #?          : Matches zero or one "#" character
    \s*         : Matches zero or more whitespace characters
    (X)?        : Capturing group that optionally matches the character "X"
    \s*         : Matches zero or more whitespace characters
    (.*?\d+.*?) : Capturing group that matches any character sequence containing at least one digit
                 : .*? matches any character (except newline), as few times as possible (non-greedy)
                 : \d+ matches one or more digits
                 : .*? matches any character (except newline), as few times as possible (non-greedy)
    $           : Asserts the position at the end of a
    """
    # This interprets the survex "*data normal" command which sets out the order of the fields in the data, e.g.