2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2025-12-17 09:47:24 +00:00

AI comments on regexes

This commit is contained in:
2025-01-09 21:59:27 +00:00
parent 5b97cd83dd
commit 219b8b792e
2 changed files with 66 additions and 0 deletions

View File

@@ -139,6 +139,18 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
# print(f'# {tid}') # print(f'# {tid}')
# print(f" - {tid} '{trippeople}' ") # print(f" - {tid} '{trippeople}' ")
"""
re.split(r",|\+|&|&(?!\w+;)| and ", trippeople)
, : The comma character
\+ : The plus sign (+); escaped to treat as a literal character
& : The literal string "&" (HTML-encoded ampersand)
&(?!\w+;) : An ampersand (&) not followed by one or more word characters (\w+) and a semicolon (;)
: Uses negative lookahead assertion (?!...) to ensure it's not part of an HTML entity like " "
and : The literal string " and " (with spaces before and after)
This will split the 'trippeople' string at any of these delimiters.
"""
for tripperson in re.split(r",|\+|&|&(?!\w+;)| and ", trippeople): for tripperson in re.split(r",|\+|&|&(?!\w+;)| and ", trippeople):
tripperson = tripperson.strip() tripperson = tripperson.strip()
# author_u = re.match(r"(?i)<u>(.*?)</u>$", tripperson) # author_u = re.match(r"(?i)<u>(.*?)</u>$", tripperson)

View File

@@ -298,7 +298,61 @@ class LoadingSurvex:
rx_commteam = re.compile(r"(?i)\s*(Messteam|Zeichner)\s*[:]?(.*)") rx_commteam = re.compile(r"(?i)\s*(Messteam|Zeichner)\s*[:]?(.*)")
rx_quotedtitle = re.compile(r'(?i)^"(.*)"$') rx_quotedtitle = re.compile(r'(?i)^"(.*)"$')
"""
Regular expression explanation for rx_starref (MS CoPilot)
(?i) : Case-insensitive flag for the regex
^ : Asserts the position at the start of a line
\s* : Matches zero or more whitespace characters
\*ref : Matches the literal string "*ref"
[\s.:]* : Matches zero or more whitespace characters, periods, or colons
((?:19[6789]\d)|(?:20[0123]\d))
: Capturing group that matches a year in the 1960s-1990s or 2000s-2030s
: (?:...) is a non-capturing group
: 19[6789]\d matches years from 1960 to 1999
: 20[0123]\d matches years from 2000 to 2039
\s* : Matches zero or more whitespace characters
#? : Matches zero or one "#" character
\s* : Matches zero or more whitespace characters
(X)? : Capturing group that optionally matches the character "X"
\s* : Matches zero or more whitespace characters
(.*?\d+.*?) : Capturing group that matches any character sequence containing at least one digit
: .*? matches any character (except newline), as few times as possible (non-greedy)
: \d+ matches one or more digits
: .*? matches any character (except newline), as few times as possible (non-greedy)
$ : Asserts the position at the end of a line
Regular expression explanation for rx_argsref
(?i) : Case-insensitive flag for the regex
^ : Asserts the position at the start of a line
[\s.:]* : Matches zero or more whitespace characters, periods, or colons
((?:19[6789]\d)|(?:20[012345]\d))
: Capturing group that matches a year in the 1960s-1990s or 2000s-2050s
: (?:...) is a non-capturing group
: 19[6789]\d matches years from 1960 to 1999
: 20[012345]\d matches years from 2000 to 2059
\s* : Matches zero or more whitespace characters
#? : Matches zero or one "#" character
\s* : Matches zero or more whitespace characters
(X)? : Capturing group that optionally matches the character "X"
\s* : Matches zero or more whitespace characters
(.*?\d+.*?) : Capturing group that matches any character sequence containing at least one digit
: .*? matches any character (except newline), as few times as possible (non-greedy)
: \d+ matches one or more digits
: .*? matches any character (except newline), as few times as possible (non-greedy)
$ : Asserts the position at the end of a
"""
# This interprets the survex "*data normal" command which sets out the order of the fields in the data, e.g. # This interprets the survex "*data normal" command which sets out the order of the fields in the data, e.g.