From 86a18c3ebccf737142abedfc3b3ba40296c6aecd Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Fri, 15 Jul 2022 14:09:32 +0300 Subject: [PATCH] catch nonUTF8 survex files, DataIssues url editor --- core/views/survex.py | 11 ++++++++--- parsers/survex.py | 15 +++++++++++++-- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/core/views/survex.py b/core/views/survex.py index 21603bc..caeaba6 100644 --- a/core/views/survex.py +++ b/core/views/survex.py @@ -119,9 +119,14 @@ class SvxForm(forms.Form): print(">>> >>> WARNING - svx file not found, showing TEMPLATE SVX",fname, flush=True) self.template = True return survextemplatefile - fin = open(fname, "r",encoding='utf8',newline='') - svxtext = fin.read() - fin.close() + try: + fin = open(fname, "r",encoding='utf8',newline='') + svxtext = fin.read() + fin.close() + except: + fin = open(fname, "r",encoding='iso-8859-1',newline='') + svxtext = fin.read() + fin.close() return svxtext def DiffCode(self, rcode): diff --git a/parsers/survex.py b/parsers/survex.py index 70ff0d3..8baa9bf 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -1140,7 +1140,18 @@ class LoadingSurvex(): return self.svxfileslist.append(path) - svxlines = fin.read().splitlines() + try: + svxlines = fin.read().splitlines() + except UnicodeDecodeError: + # some bugger put an umlaut in a non-UTF survex file ?! + message = f" ! ERROR *include file '{path}' in '{survexblock}' has UnicodeDecodeError" + print(message) + print(message,file=sys.stderr) + offendingfile = "/survexfile/" + path + ".svx" + DataIssue.objects.create(parser='survex', message=message, url=offendingfile) + return # skip this survex file and all things *included in it + + for svxline in svxlines: self.lineno += 1 thissvxline += 1 @@ -1153,7 +1164,7 @@ class LoadingSurvex(): print(message) print(message,file=sys.stderr) DataIssue.objects.create(parser='survex', message=message) - return # skip this survex file + return # skip this survex file and all things *included in it includestmt =self.rx_include.match(svxline) if not includestmt: