bulk updates for all drawings now, after parsing.

2026-01-19 13:22:58 +00:00 · 2025-12-15 19:56:55 +00:00
parent f3b46856ee
commit d6b2811457
1 changed files with 44 additions and 19 deletions
--- a/parsers/drawings.py
+++ b/parsers/drawings.py
@@ -122,7 +122,7 @@ def _set_filesize_and_check(fullpath, model_obj, parser_label, url_prefix="/dwgd
        return False

    model_obj.filesize = size
-    model_obj.save()
+    # Do not save here; caller should include 'filesize' in the bulk update set.
    if size <= 0:
        message = f"! Zero length {parser_label.lower()} file {fullpath}"
        print(message)
@@ -218,7 +218,6 @@ def _process_reference(dwgfile, path, parser_label="Tunnel"):
        if hasattr(dwgfile, 'dwgcontains'):
            dwgfile.dwgcontains.add(rdwgfile)

-    dwgfile.save()
    return None, None


@@ -246,8 +245,9 @@ def settherionfileinfo(filetuple):
    thtype, therionfile = filetuple

    ff = Path(settings.DRAWINGS_DATA) / therionfile.dwgpath
-    if not _set_filesize_and_check(ff, therionfile, "Therion"):
-        return
+    modified = set()
+    if _set_filesize_and_check(ff, therionfile, "Therion"):
+        modified.add("filesize")

    ttext = _read_text_file(ff)

@@ -255,6 +255,7 @@ def settherionfileinfo(filetuple):
    # print(len(re.findall(r"line", ttext)))
    if thtype == "th":
        therionfile.npaths = len(rx_input.findall(ttext))
+        modified.add("npaths")
        if wallet_texts := rx_ref.findall(ttext):
            # print(f"#ref {therionfile.dwgname} : {wallet_text}")
            if wallets := Wallet.objects.filter(walletname__in=wallet_texts): # ! Django idiom not used elsewhere. A filter using a list of strings.
@@ -262,6 +263,7 @@ def settherionfileinfo(filetuple):
                    therionfile.dwgwallets.add(w)                
    elif thtype == "th2":
        therionfile.npaths = len(rx_line.findall(ttext))
+        modified.add("npaths")

        # scan and look for survex blocks that might have been included, and image scans (as for tunnel drawings)
        # which would populate dwgfile.survexfile
@@ -293,7 +295,8 @@ def settherionfileinfo(filetuple):
        DataIssue.objects.create(parser="xTherion", message=message, url=f"/dwgdataraw/{therionfile.dwgpath}")
        findimportinsert(therionfile, inp)

-    therionfile.save()
+    # Defer saving scalar fields; caller will perform bulk_update.
+    return modified


 def settnlfileinfo(dwgfile):
@@ -307,13 +310,14 @@ def settnlfileinfo(dwgfile):
    *ref wallet identifiers may be found in at least two different places in tunnel files.
    """
    ff = Path(settings.DRAWINGS_DATA) / dwgfile.dwgpath
-    if not _set_filesize_and_check(ff, dwgfile, "Tunnel"):
-        return
+    modified = set()
+    if _set_filesize_and_check(ff, dwgfile, "Tunnel"):
+        modified.add("filesize")

    ttext = _read_text_file(ff)

    dwgfile.npaths = len(rx_skpath.findall(ttext))
-    # dwgfile.save()
+    modified.add("npaths")

    # example drawing file in Tunnel format.
    # <tunnelxml tunnelversion="version2009-06-21 Matienzo" tunnelproject="ireby" tunneluser="goatchurch" tunneldate="2009-06-29 23:22:17">
@@ -335,11 +339,11 @@ def settnlfileinfo(dwgfile):
            message =  f"  ! wallet not found referenced from {dwgfile} -- '{refs}' ({e}) "
            print(message)
            DataIssue.objects.create(parser="Tunnel", message=message, url=f"/dwgdataraw/{dwgfile}")
-
    # should also scan and look for survex blocks that might have been included, and image scans
    # which would populate dwgfile.survexfile

-    dwgfile.save()
+    # Defer scalar saves to bulk_update; return set of modified fields
+    return modified


 def setdrwfileinfo(dwgfile):
@@ -348,13 +352,13 @@ def setdrwfileinfo(dwgfile):

    """
    ff = Path(settings.DRAWINGS_DATA) / dwgfile.dwgpath
+    # Set filesize (if available) but do not save; return modified field names
    if not _set_filesize_and_check(ff, dwgfile, "drawings"):
-        return
-    # nothing more to parse for generic files
+        return set()
+    return {"filesize"}

 def load_drawings_files():
-    """Breadth first search of drawings directory looking for sub-directories and *.xml filesize
-    
+    """    
    Why do we have all this detection of file types/! Why not use get_mime_types ?
    What is it all for ??

@@ -418,18 +422,39 @@ def load_drawings_files():
    ext_priority = {'th2': 0, 'th': 1}
    all_xml.sort(key=lambda t: ext_priority.get(t[0], 2))

+    print(f" - Drawing found, starting parsing...")
+
+    # Process files and collect modified scalar fields for bulk update
+    modified_map = {}  # {DrawingFile instance: set(fields)}
    for extension, filename, pathobj in all_xml:
+        modified = set()
        if extension in {"pdf", "txt", "svg", "jpg", "png", ""}:
-            setdrwfileinfo(filename)
+            modified = setdrwfileinfo(filename) or set()
        elif extension == "xml":
-            settnlfileinfo(filename)
+            modified = settnlfileinfo(filename) or set()
        # important to import .th2 files before .th so that we can assign them when found in .th files
        elif extension == "th2":
-            settherionfileinfo(("th2", filename))
+            modified = settherionfileinfo(("th2", filename)) or set()
        elif extension == "th":
-            settherionfileinfo(("th", filename))
+            modified = settherionfileinfo(("th", filename)) or set()

-    print(f" - Drawings parsed")
+        if modified:
+            modified_map.setdefault(filename, set()).update(modified)
+
+    # Bulk update scalar fields grouped by identical field-sets to use bulk_update efficiently
+    from collections import defaultdict
+
+    groups = defaultdict(list)  # {tuple(fields): [instances]}
+    for inst, fields in modified_map.items():
+        key = tuple(sorted(fields))
+        groups[key].append(inst)
+
+    for fields_tuple, instances in groups.items():
+        fields_list = list(fields_tuple)
+        # Use a conservative batch size
+        DrawingFile.objects.bulk_update(instances, fields_list, batch_size=500)
+
+    print(f" - Drawings parsed and database updated.")

    # for drawfile in DrawingFile.objects.all():
    # SetTunnelfileInfo(drawfile)