2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2025-12-19 16:57:05 +00:00

refactored to reduce duplicated code

This commit is contained in:
2025-10-24 22:40:26 +03:00
parent 179ba32c5a
commit 28914916b6

View File

@@ -66,6 +66,41 @@ rx_wallet = re.compile(r"""
$ # Anchor, ensuring the match is at the end of the string $ # Anchor, ensuring the match is at the end of the string
""", re.VERBOSE | re.IGNORECASE) """, re.VERBOSE | re.IGNORECASE)
def _set_filesize_and_check(fullpath, model_obj, parser_label, url_prefix="/dwgdataraw"):
"""Set model_obj.filesize from filesystem and create DataIssue if missing/zero.
Returns True if file exists and has size > 0, False otherwise.
"""
try:
size = Path(fullpath).stat().st_size
except Exception as e:
message = f"! Unable to stat file {fullpath}: {e}"
print(message)
DataIssue.objects.create(parser=parser_label, message=message, url=f"{url_prefix}/{getattr(model_obj, 'dwgpath', '')}")
return False
model_obj.filesize = size
model_obj.save()
if size <= 0:
message = f"! Zero length {parser_label.lower()} file {fullpath}"
print(message)
DataIssue.objects.create(parser=parser_label, message=message, url=f"{url_prefix}/{getattr(model_obj, 'dwgpath', '')}")
return False
return True
def _read_text_file(fullpath):
"""Read text file robustly, returning a str (falls back to binary decode)."""
try:
with open(fullpath, "r", encoding="utf-8", errors="replace") as fh:
return fh.read()
except Exception:
try:
with open(fullpath, "rb") as fh:
return fh.read().decode("utf-8", errors="replace")
except Exception as e:
print(f"! Unable to read file {fullpath}: {e}")
return ""
def parse_tnl_file(dwgfile, path): def parse_tnl_file(dwgfile, path):
"""Is given a line of text 'path' which may or may not contain a recognisable name of a scanned file """Is given a line of text 'path' which may or may not contain a recognisable name of a scanned file
@@ -177,15 +212,10 @@ def settherionfileinfo(filetuple):
thtype, therionfile = filetuple thtype, therionfile = filetuple
ff = os.path.join(settings.DRAWINGS_DATA, therionfile.dwgpath) ff = os.path.join(settings.DRAWINGS_DATA, therionfile.dwgpath)
therionfile.filesize = os.stat(ff)[stat.ST_SIZE] if not _set_filesize_and_check(ff, therionfile, "Therion"):
if therionfile.filesize <= 0:
message = f"! Zero length therion file {ff}"
print(message)
DataIssue.objects.create(parser="Therion", message=message, url=f"/dwgdataraw/{therionfile.dwgpath}")
return return
fin = open(ff, "r")
ttext = fin.read() ttext = _read_text_file(ff)
fin.close()
# The equivalent for a tunnel 'path' would be a .th2 'line wall' or 'scrap' # The equivalent for a tunnel 'path' would be a .th2 'line wall' or 'scrap'
# print(len(re.findall(r"line", ttext))) # print(len(re.findall(r"line", ttext)))
@@ -243,15 +273,10 @@ def settnlfileinfo(dwgfile):
*ref wallet identifiers may be found in at least two different places in tunnel files. *ref wallet identifiers may be found in at least two different places in tunnel files.
""" """
ff = os.path.join(settings.DRAWINGS_DATA, dwgfile.dwgpath) ff = os.path.join(settings.DRAWINGS_DATA, dwgfile.dwgpath)
dwgfile.filesize = os.stat(ff)[stat.ST_SIZE] if not _set_filesize_and_check(ff, dwgfile, "Tunnel"):
if dwgfile.filesize <= 0:
message = f"! Zero length tunnel file {ff}"
print(message)
DataIssue.objects.create(parser="Tunnel", message=message, url=f"/dwgdataraw/{dwgfile.dwgpath}")
return return
fin = open(ff, "r") # tunnel files are now all utf-8 after fixup 24/10/2025.
ttext = fin.read() ttext = _read_text_file(ff)
fin.close()
dwgfile.npaths = len(rx_skpath.findall(ttext)) dwgfile.npaths = len(rx_skpath.findall(ttext))
# dwgfile.save() # dwgfile.save()
@@ -286,18 +311,12 @@ def settnlfileinfo(dwgfile):
def setdrwfileinfo(dwgfile): def setdrwfileinfo(dwgfile):
"""Read in the drawing file contents and sets values on the dwgfile object, """Read in the drawing file contents and sets values on the dwgfile object,
but these are SVGs, PDFs or .txt files, so there is no useful format to search for but these are SVGs, PDFs or .txt files, so there is no useful format to search for
This function is a placeholder in case we thnk of a way to do something
to recognise generic survex filenames.
""" """
ff = Path(settings.DRAWINGS_DATA) / dwgfile.dwgpath ff = Path(settings.DRAWINGS_DATA) / dwgfile.dwgpath
dwgfile.filesize = ff.stat().st_size if not _set_filesize_and_check(ff, dwgfile, "drawings"):
dwgfile.save()
if dwgfile.filesize <= 0:
message = f"! Zero length drawing file {ff}"
print(message)
DataIssue.objects.create(parser="drawings", message=message, url=f"/dwgdataraw/{dwgfile.dwgpath}")
return return
# nothing more to parse for generic files
def load_drawings_files(): def load_drawings_files():
"""Breadth first search of drawings directory looking for sub-directories and *.xml filesize """Breadth first search of drawings directory looking for sub-directories and *.xml filesize