2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2025-12-17 16:27:07 +00:00

bulk updates for all drawings now, after parsing.

This commit is contained in:
2025-12-15 19:56:55 +00:00
parent f3b46856ee
commit d6b2811457

View File

@@ -122,7 +122,7 @@ def _set_filesize_and_check(fullpath, model_obj, parser_label, url_prefix="/dwgd
return False
model_obj.filesize = size
model_obj.save()
# Do not save here; caller should include 'filesize' in the bulk update set.
if size <= 0:
message = f"! Zero length {parser_label.lower()} file {fullpath}"
print(message)
@@ -218,7 +218,6 @@ def _process_reference(dwgfile, path, parser_label="Tunnel"):
if hasattr(dwgfile, 'dwgcontains'):
dwgfile.dwgcontains.add(rdwgfile)
dwgfile.save()
return None, None
@@ -246,8 +245,9 @@ def settherionfileinfo(filetuple):
thtype, therionfile = filetuple
ff = Path(settings.DRAWINGS_DATA) / therionfile.dwgpath
if not _set_filesize_and_check(ff, therionfile, "Therion"):
return
modified = set()
if _set_filesize_and_check(ff, therionfile, "Therion"):
modified.add("filesize")
ttext = _read_text_file(ff)
@@ -255,6 +255,7 @@ def settherionfileinfo(filetuple):
# print(len(re.findall(r"line", ttext)))
if thtype == "th":
therionfile.npaths = len(rx_input.findall(ttext))
modified.add("npaths")
if wallet_texts := rx_ref.findall(ttext):
# print(f"#ref {therionfile.dwgname} : {wallet_text}")
if wallets := Wallet.objects.filter(walletname__in=wallet_texts): # ! Django idiom not used elsewhere. A filter using a list of strings.
@@ -262,6 +263,7 @@ def settherionfileinfo(filetuple):
therionfile.dwgwallets.add(w)
elif thtype == "th2":
therionfile.npaths = len(rx_line.findall(ttext))
modified.add("npaths")
# scan and look for survex blocks that might have been included, and image scans (as for tunnel drawings)
# which would populate dwgfile.survexfile
@@ -293,7 +295,8 @@ def settherionfileinfo(filetuple):
DataIssue.objects.create(parser="xTherion", message=message, url=f"/dwgdataraw/{therionfile.dwgpath}")
findimportinsert(therionfile, inp)
therionfile.save()
# Defer saving scalar fields; caller will perform bulk_update.
return modified
def settnlfileinfo(dwgfile):
@@ -307,13 +310,14 @@ def settnlfileinfo(dwgfile):
*ref wallet identifiers may be found in at least two different places in tunnel files.
"""
ff = Path(settings.DRAWINGS_DATA) / dwgfile.dwgpath
if not _set_filesize_and_check(ff, dwgfile, "Tunnel"):
return
modified = set()
if _set_filesize_and_check(ff, dwgfile, "Tunnel"):
modified.add("filesize")
ttext = _read_text_file(ff)
dwgfile.npaths = len(rx_skpath.findall(ttext))
# dwgfile.save()
modified.add("npaths")
# example drawing file in Tunnel format.
# <tunnelxml tunnelversion="version2009-06-21 Matienzo" tunnelproject="ireby" tunneluser="goatchurch" tunneldate="2009-06-29 23:22:17">
@@ -335,11 +339,11 @@ def settnlfileinfo(dwgfile):
message = f" ! wallet not found referenced from {dwgfile} -- '{refs}' ({e}) "
print(message)
DataIssue.objects.create(parser="Tunnel", message=message, url=f"/dwgdataraw/{dwgfile}")
# should also scan and look for survex blocks that might have been included, and image scans
# which would populate dwgfile.survexfile
dwgfile.save()
# Defer scalar saves to bulk_update; return set of modified fields
return modified
def setdrwfileinfo(dwgfile):
@@ -348,13 +352,13 @@ def setdrwfileinfo(dwgfile):
"""
ff = Path(settings.DRAWINGS_DATA) / dwgfile.dwgpath
# Set filesize (if available) but do not save; return modified field names
if not _set_filesize_and_check(ff, dwgfile, "drawings"):
return
# nothing more to parse for generic files
return set()
return {"filesize"}
def load_drawings_files():
"""Breadth first search of drawings directory looking for sub-directories and *.xml filesize
"""
Why do we have all this detection of file types/! Why not use get_mime_types ?
What is it all for ??
@@ -418,18 +422,39 @@ def load_drawings_files():
ext_priority = {'th2': 0, 'th': 1}
all_xml.sort(key=lambda t: ext_priority.get(t[0], 2))
print(f" - Drawing found, starting parsing...")
# Process files and collect modified scalar fields for bulk update
modified_map = {} # {DrawingFile instance: set(fields)}
for extension, filename, pathobj in all_xml:
modified = set()
if extension in {"pdf", "txt", "svg", "jpg", "png", ""}:
setdrwfileinfo(filename)
modified = setdrwfileinfo(filename) or set()
elif extension == "xml":
settnlfileinfo(filename)
modified = settnlfileinfo(filename) or set()
# important to import .th2 files before .th so that we can assign them when found in .th files
elif extension == "th2":
settherionfileinfo(("th2", filename))
modified = settherionfileinfo(("th2", filename)) or set()
elif extension == "th":
settherionfileinfo(("th", filename))
modified = settherionfileinfo(("th", filename)) or set()
print(f" - Drawings parsed")
if modified:
modified_map.setdefault(filename, set()).update(modified)
# Bulk update scalar fields grouped by identical field-sets to use bulk_update efficiently
from collections import defaultdict
groups = defaultdict(list) # {tuple(fields): [instances]}
for inst, fields in modified_map.items():
key = tuple(sorted(fields))
groups[key].append(inst)
for fields_tuple, instances in groups.items():
fields_list = list(fields_tuple)
# Use a conservative batch size
DrawingFile.objects.bulk_update(instances, fields_list, batch_size=500)
print(f" - Drawings parsed and database updated.")
# for drawfile in DrawingFile.objects.all():
# SetTunnelfileInfo(drawfile)