From 28914916b6174043bcfb1cb769b83cd5392fec47 Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Fri, 24 Oct 2025 22:40:26 +0300 Subject: [PATCH] refactored to reduce duplicated code --- parsers/drawings.py | 69 +++++++++++++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 25 deletions(-) diff --git a/parsers/drawings.py b/parsers/drawings.py index 08cb2e0..5ed0e4d 100644 --- a/parsers/drawings.py +++ b/parsers/drawings.py @@ -66,6 +66,41 @@ rx_wallet = re.compile(r""" $ # Anchor, ensuring the match is at the end of the string """, re.VERBOSE | re.IGNORECASE) +def _set_filesize_and_check(fullpath, model_obj, parser_label, url_prefix="/dwgdataraw"): + """Set model_obj.filesize from filesystem and create DataIssue if missing/zero. + Returns True if file exists and has size > 0, False otherwise. + """ + try: + size = Path(fullpath).stat().st_size + except Exception as e: + message = f"! Unable to stat file {fullpath}: {e}" + print(message) + DataIssue.objects.create(parser=parser_label, message=message, url=f"{url_prefix}/{getattr(model_obj, 'dwgpath', '')}") + return False + + model_obj.filesize = size + model_obj.save() + if size <= 0: + message = f"! Zero length {parser_label.lower()} file {fullpath}" + print(message) + DataIssue.objects.create(parser=parser_label, message=message, url=f"{url_prefix}/{getattr(model_obj, 'dwgpath', '')}") + return False + + return True + + +def _read_text_file(fullpath): + """Read text file robustly, returning a str (falls back to binary decode).""" + try: + with open(fullpath, "r", encoding="utf-8", errors="replace") as fh: + return fh.read() + except Exception: + try: + with open(fullpath, "rb") as fh: + return fh.read().decode("utf-8", errors="replace") + except Exception as e: + print(f"! Unable to read file {fullpath}: {e}") + return "" def parse_tnl_file(dwgfile, path): """Is given a line of text 'path' which may or may not contain a recognisable name of a scanned file @@ -177,15 +212,10 @@ def settherionfileinfo(filetuple): thtype, therionfile = filetuple ff = os.path.join(settings.DRAWINGS_DATA, therionfile.dwgpath) - therionfile.filesize = os.stat(ff)[stat.ST_SIZE] - if therionfile.filesize <= 0: - message = f"! Zero length therion file {ff}" - print(message) - DataIssue.objects.create(parser="Therion", message=message, url=f"/dwgdataraw/{therionfile.dwgpath}") + if not _set_filesize_and_check(ff, therionfile, "Therion"): return - fin = open(ff, "r") - ttext = fin.read() - fin.close() + + ttext = _read_text_file(ff) # The equivalent for a tunnel 'path' would be a .th2 'line wall' or 'scrap' # print(len(re.findall(r"line", ttext))) @@ -243,15 +273,10 @@ def settnlfileinfo(dwgfile): *ref wallet identifiers may be found in at least two different places in tunnel files. """ ff = os.path.join(settings.DRAWINGS_DATA, dwgfile.dwgpath) - dwgfile.filesize = os.stat(ff)[stat.ST_SIZE] - if dwgfile.filesize <= 0: - message = f"! Zero length tunnel file {ff}" - print(message) - DataIssue.objects.create(parser="Tunnel", message=message, url=f"/dwgdataraw/{dwgfile.dwgpath}") + if not _set_filesize_and_check(ff, dwgfile, "Tunnel"): return - fin = open(ff, "r") # tunnel files are now all utf-8 after fixup 24/10/2025. - ttext = fin.read() - fin.close() + + ttext = _read_text_file(ff) dwgfile.npaths = len(rx_skpath.findall(ttext)) # dwgfile.save() @@ -286,18 +311,12 @@ def settnlfileinfo(dwgfile): def setdrwfileinfo(dwgfile): """Read in the drawing file contents and sets values on the dwgfile object, but these are SVGs, PDFs or .txt files, so there is no useful format to search for - This function is a placeholder in case we thnk of a way to do something - to recognise generic survex filenames. + """ ff = Path(settings.DRAWINGS_DATA) / dwgfile.dwgpath - dwgfile.filesize = ff.stat().st_size - dwgfile.save() - if dwgfile.filesize <= 0: - message = f"! Zero length drawing file {ff}" - print(message) - DataIssue.objects.create(parser="drawings", message=message, url=f"/dwgdataraw/{dwgfile.dwgpath}") + if not _set_filesize_and_check(ff, dwgfile, "drawings"): return - + # nothing more to parse for generic files def load_drawings_files(): """Breadth first search of drawings directory looking for sub-directories and *.xml filesize