2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2025-12-17 18:17:07 +00:00

bulk updates for all drawings now, after parsing.

This commit is contained in:
2025-12-15 19:56:55 +00:00
parent f3b46856ee
commit d6b2811457

View File

@@ -122,7 +122,7 @@ def _set_filesize_and_check(fullpath, model_obj, parser_label, url_prefix="/dwgd
return False return False
model_obj.filesize = size model_obj.filesize = size
model_obj.save() # Do not save here; caller should include 'filesize' in the bulk update set.
if size <= 0: if size <= 0:
message = f"! Zero length {parser_label.lower()} file {fullpath}" message = f"! Zero length {parser_label.lower()} file {fullpath}"
print(message) print(message)
@@ -218,7 +218,6 @@ def _process_reference(dwgfile, path, parser_label="Tunnel"):
if hasattr(dwgfile, 'dwgcontains'): if hasattr(dwgfile, 'dwgcontains'):
dwgfile.dwgcontains.add(rdwgfile) dwgfile.dwgcontains.add(rdwgfile)
dwgfile.save()
return None, None return None, None
@@ -246,8 +245,9 @@ def settherionfileinfo(filetuple):
thtype, therionfile = filetuple thtype, therionfile = filetuple
ff = Path(settings.DRAWINGS_DATA) / therionfile.dwgpath ff = Path(settings.DRAWINGS_DATA) / therionfile.dwgpath
if not _set_filesize_and_check(ff, therionfile, "Therion"): modified = set()
return if _set_filesize_and_check(ff, therionfile, "Therion"):
modified.add("filesize")
ttext = _read_text_file(ff) ttext = _read_text_file(ff)
@@ -255,6 +255,7 @@ def settherionfileinfo(filetuple):
# print(len(re.findall(r"line", ttext))) # print(len(re.findall(r"line", ttext)))
if thtype == "th": if thtype == "th":
therionfile.npaths = len(rx_input.findall(ttext)) therionfile.npaths = len(rx_input.findall(ttext))
modified.add("npaths")
if wallet_texts := rx_ref.findall(ttext): if wallet_texts := rx_ref.findall(ttext):
# print(f"#ref {therionfile.dwgname} : {wallet_text}") # print(f"#ref {therionfile.dwgname} : {wallet_text}")
if wallets := Wallet.objects.filter(walletname__in=wallet_texts): # ! Django idiom not used elsewhere. A filter using a list of strings. if wallets := Wallet.objects.filter(walletname__in=wallet_texts): # ! Django idiom not used elsewhere. A filter using a list of strings.
@@ -262,6 +263,7 @@ def settherionfileinfo(filetuple):
therionfile.dwgwallets.add(w) therionfile.dwgwallets.add(w)
elif thtype == "th2": elif thtype == "th2":
therionfile.npaths = len(rx_line.findall(ttext)) therionfile.npaths = len(rx_line.findall(ttext))
modified.add("npaths")
# scan and look for survex blocks that might have been included, and image scans (as for tunnel drawings) # scan and look for survex blocks that might have been included, and image scans (as for tunnel drawings)
# which would populate dwgfile.survexfile # which would populate dwgfile.survexfile
@@ -293,7 +295,8 @@ def settherionfileinfo(filetuple):
DataIssue.objects.create(parser="xTherion", message=message, url=f"/dwgdataraw/{therionfile.dwgpath}") DataIssue.objects.create(parser="xTherion", message=message, url=f"/dwgdataraw/{therionfile.dwgpath}")
findimportinsert(therionfile, inp) findimportinsert(therionfile, inp)
therionfile.save() # Defer saving scalar fields; caller will perform bulk_update.
return modified
def settnlfileinfo(dwgfile): def settnlfileinfo(dwgfile):
@@ -307,13 +310,14 @@ def settnlfileinfo(dwgfile):
*ref wallet identifiers may be found in at least two different places in tunnel files. *ref wallet identifiers may be found in at least two different places in tunnel files.
""" """
ff = Path(settings.DRAWINGS_DATA) / dwgfile.dwgpath ff = Path(settings.DRAWINGS_DATA) / dwgfile.dwgpath
if not _set_filesize_and_check(ff, dwgfile, "Tunnel"): modified = set()
return if _set_filesize_and_check(ff, dwgfile, "Tunnel"):
modified.add("filesize")
ttext = _read_text_file(ff) ttext = _read_text_file(ff)
dwgfile.npaths = len(rx_skpath.findall(ttext)) dwgfile.npaths = len(rx_skpath.findall(ttext))
# dwgfile.save() modified.add("npaths")
# example drawing file in Tunnel format. # example drawing file in Tunnel format.
# <tunnelxml tunnelversion="version2009-06-21 Matienzo" tunnelproject="ireby" tunneluser="goatchurch" tunneldate="2009-06-29 23:22:17"> # <tunnelxml tunnelversion="version2009-06-21 Matienzo" tunnelproject="ireby" tunneluser="goatchurch" tunneldate="2009-06-29 23:22:17">
@@ -335,11 +339,11 @@ def settnlfileinfo(dwgfile):
message = f" ! wallet not found referenced from {dwgfile} -- '{refs}' ({e}) " message = f" ! wallet not found referenced from {dwgfile} -- '{refs}' ({e}) "
print(message) print(message)
DataIssue.objects.create(parser="Tunnel", message=message, url=f"/dwgdataraw/{dwgfile}") DataIssue.objects.create(parser="Tunnel", message=message, url=f"/dwgdataraw/{dwgfile}")
# should also scan and look for survex blocks that might have been included, and image scans # should also scan and look for survex blocks that might have been included, and image scans
# which would populate dwgfile.survexfile # which would populate dwgfile.survexfile
dwgfile.save() # Defer scalar saves to bulk_update; return set of modified fields
return modified
def setdrwfileinfo(dwgfile): def setdrwfileinfo(dwgfile):
@@ -348,13 +352,13 @@ def setdrwfileinfo(dwgfile):
""" """
ff = Path(settings.DRAWINGS_DATA) / dwgfile.dwgpath ff = Path(settings.DRAWINGS_DATA) / dwgfile.dwgpath
# Set filesize (if available) but do not save; return modified field names
if not _set_filesize_and_check(ff, dwgfile, "drawings"): if not _set_filesize_and_check(ff, dwgfile, "drawings"):
return return set()
# nothing more to parse for generic files return {"filesize"}
def load_drawings_files(): def load_drawings_files():
"""Breadth first search of drawings directory looking for sub-directories and *.xml filesize """
Why do we have all this detection of file types/! Why not use get_mime_types ? Why do we have all this detection of file types/! Why not use get_mime_types ?
What is it all for ?? What is it all for ??
@@ -418,18 +422,39 @@ def load_drawings_files():
ext_priority = {'th2': 0, 'th': 1} ext_priority = {'th2': 0, 'th': 1}
all_xml.sort(key=lambda t: ext_priority.get(t[0], 2)) all_xml.sort(key=lambda t: ext_priority.get(t[0], 2))
print(f" - Drawing found, starting parsing...")
# Process files and collect modified scalar fields for bulk update
modified_map = {} # {DrawingFile instance: set(fields)}
for extension, filename, pathobj in all_xml: for extension, filename, pathobj in all_xml:
modified = set()
if extension in {"pdf", "txt", "svg", "jpg", "png", ""}: if extension in {"pdf", "txt", "svg", "jpg", "png", ""}:
setdrwfileinfo(filename) modified = setdrwfileinfo(filename) or set()
elif extension == "xml": elif extension == "xml":
settnlfileinfo(filename) modified = settnlfileinfo(filename) or set()
# important to import .th2 files before .th so that we can assign them when found in .th files # important to import .th2 files before .th so that we can assign them when found in .th files
elif extension == "th2": elif extension == "th2":
settherionfileinfo(("th2", filename)) modified = settherionfileinfo(("th2", filename)) or set()
elif extension == "th": elif extension == "th":
settherionfileinfo(("th", filename)) modified = settherionfileinfo(("th", filename)) or set()
print(f" - Drawings parsed") if modified:
modified_map.setdefault(filename, set()).update(modified)
# Bulk update scalar fields grouped by identical field-sets to use bulk_update efficiently
from collections import defaultdict
groups = defaultdict(list) # {tuple(fields): [instances]}
for inst, fields in modified_map.items():
key = tuple(sorted(fields))
groups[key].append(inst)
for fields_tuple, instances in groups.items():
fields_list = list(fields_tuple)
# Use a conservative batch size
DrawingFile.objects.bulk_update(instances, fields_list, batch_size=500)
print(f" - Drawings parsed and database updated.")
# for drawfile in DrawingFile.objects.all(): # for drawfile in DrawingFile.objects.all():
# SetTunnelfileInfo(drawfile) # SetTunnelfileInfo(drawfile)