diff --git a/parsers/drawings.py b/parsers/drawings.py index b3ce8c8..1ef03a9 100644 --- a/parsers/drawings.py +++ b/parsers/drawings.py @@ -18,9 +18,13 @@ from troggle.core.utils import save_carefully for tunnel and therion files ''' -todo='''Rename functions more consistently between tunnel and therion variants +todo='''- Rename functions more consistently between tunnel and therion variants + +- Recode rx_valid_ext to use profile suffix() function ''' +rx_valid_ext = re.compile(r'(?i)\.(?:png|jpg|pdf|jpeg|gif|txt)$') + def find_dwg_file(dwgfile, path): '''Is given a line of text 'path' which may or may not contain a recognisable name of a scanned file which we have already seen when we imported all the files we could find in the surveyscans direstories @@ -54,7 +58,7 @@ def find_dwg_file(dwgfile, path): if scansfile: dwgfile.scans.add(scansfile) - elif path and not re.search(r"\.(?:png|jpg|pdf|jpeg|gif|txt)$(?i)", path): + elif path and not rx_valid_ext.search(path): name = os.path.split(path)[1] rdwgfilel = DrawingFile.objects.filter(dwgname=name) if len(rdwgfilel): diff --git a/parsers/logbooks.py b/parsers/logbooks.py index c704182..864f1b2 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -81,20 +81,23 @@ def set_trip_id(year, seq): tid= f"{year}_s{seq:02d}" return tid +rx_tripperson = re.compile(r'(?i)(.*?)$') +rx_round_bracket = re.compile(r"[\(\[].*?[\)\]]") + def GetTripPersons(trippeople, expedition, logtime_underground, tid=None): res = [ ] author = None - round_bracket_regex = re.compile(r"[\(\[].*?[\)\]]") #print(f'# {tid}') for tripperson in re.split(r",|\+|&|&(?!\w+;)| and ", trippeople): tripperson = tripperson.strip() - mul = re.match(r"(.*?)$(?i)", tripperson) + # mul = re.match(r"(?i)(.*?)$", tripperson) + mul = rx_tripperson.match(tripperson) if mul: tripperson = mul.group(1).strip() if tripperson and tripperson[0] != '*': - tripperson = re.sub(round_bracket_regex, "", tripperson).strip() + tripperson = re.sub(rx_round_bracket, "", tripperson).strip() if tripperson =="Wiggy": tripperson = "Phil Wigglesworth" @@ -134,6 +137,7 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_ DataIssue.objects.create(parser='logbooks', message=message) logdataissues["title"]=message print(message) + raise return if not author: diff --git a/parsers/survex.py b/parsers/survex.py index 60c0330..3754035 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -118,6 +118,8 @@ class LoadingSurvex(): other survex files. A 'scanswallet' is what we today call a "survey scans folder" or a "wallet". """ + # python regex flags (?i) means case-insentitive, (?s) means . matches newline too + # see https://docs.python.org/3/library/re.html rx_begin = re.compile(r'(?i)begin') rx_end = re.compile(r'(?i)end$') rx_title = re.compile(r'(?i)title$')