From f3b46856ee5e480b5f0dc83d3e29e7c9f2fa8fb6 Mon Sep 17 00:00:00 2001
From: Philip Sargent <philip.sargent@gmail.com>
Date: Mon, 15 Dec 2025 19:42:23 +0000
Subject: [PATCH] chunked helper to return DrawinGFile objects

---
 core/TESTS/test_drawings.py | 19 +++++++++++++++++++
 parsers/drawings.py         | 32 +++++++++++++++++++++++++++-----
 2 files changed, 46 insertions(+), 5 deletions(-)

diff --git a/core/TESTS/test_drawings.py b/core/TESTS/test_drawings.py
index 4133707..60acca2 100644
--- a/core/TESTS/test_drawings.py
+++ b/core/TESTS/test_drawings.py
@@ -145,3 +145,22 @@ class DrawingsPathlibTests(TestCase):
         self.assertEqual(set(drawings.IMAGE_LIKE_EXTS), set(drawings.IMAGE_EXTS))
         self.assertIn('.th', drawings.SUPPORTED_EXTENSIONS)
         self.assertIn('.png', drawings.SUPPORTED_EXTENSIONS)
+
+    def test_fetch_drawingfiles_by_paths_chunks(self):
+        # Create more items than typical SQLite parameter limit to ensure chunking
+        count = 1200
+        rel_paths = []
+        objs = []
+        for i in range(count):
+            rel = f'bigdir/file{i}.txt'
+            rel_paths.append(rel)
+            objs.append(DrawingFile(dwgpath=rel, dwgname=f'name{i}'))
+
+        # Bulk create them efficiently
+        DrawingFile.objects.bulk_create(objs)
+
+        mapping = drawings.fetch_drawingfiles_by_paths(rel_paths, chunk_size=500)
+        self.assertEqual(len(mapping), count)
+        # Spot-check a few entries
+        self.assertIn('bigdir/file0.txt', mapping)
+        self.assertIn(f'bigdir/file{count-1}.txt', mapping)
diff --git a/parsers/drawings.py b/parsers/drawings.py
index 3bfa310..453a9b1 100644
--- a/parsers/drawings.py
+++ b/parsers/drawings.py
@@ -54,6 +54,30 @@ def _is_image_suffix(suffix: str) -> bool:
         return False
     return suffix.lower() in IMAGE_EXTS
 
+
+def fetch_drawingfiles_by_paths(paths, chunk_size: int = 500):
+    """Fetch DrawingFile objects for the given iterable of paths in chunks.
+
+    This avoids building a very large SQL IN(...) clause which can exceed DB
+    parameter limits (SQLite defaults to 999 bound variables). A default
+    chunk_size of 500 is conservative and works well across backends.
+
+    Returns a dict mapping dwgpath -> DrawingFile (first match if duplicates).
+    """
+    mapping = {}
+    if not paths:
+        return mapping
+
+    # Ensure we iterate over a list to allow slicing in chunks
+    rel_paths = list(paths)
+    for i in range(0, len(rel_paths), chunk_size):
+        chunk = rel_paths[i : i + chunk_size]
+        for obj in DrawingFile.objects.filter(dwgpath__in=chunk):
+            # if duplicates exist, preserve the first one seen
+            mapping.setdefault(obj.dwgpath, obj)
+
+    return mapping
+
 rx_wallet = re.compile(r"""
     # r"(\d\d\d\d#X?\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg|gif|txt))$", path
     # This regex is designed to extract a specific directory prefix and a filename
@@ -330,8 +354,7 @@ def setdrwfileinfo(dwgfile):
 
 def load_drawings_files():
     """Breadth first search of drawings directory looking for sub-directories and *.xml filesize
-    This is brain-damaged very early code. Should be replaced with proper use of pathlib.
-
+    
     Why do we have all this detection of file types/! Why not use get_mime_types ?
     What is it all for ??
 
@@ -379,10 +402,9 @@ def load_drawings_files():
         for i in range(0, len(objs_to_create), chunk_size):
             DrawingFile.objects.bulk_create(objs_to_create[i : i + chunk_size])
 
-        # Re-fetch created objects and map by dwgpath
+        # Re-fetch created objects and map by dwgpath using a chunked fetch helper
         rel_paths = [rel for (_, rel, _, _) in files_meta]
-        created_objs = DrawingFile.objects.filter(dwgpath__in=rel_paths)
-        mapping = {obj.dwgpath: obj for obj in created_objs}
+        mapping = fetch_drawingfiles_by_paths(rel_paths, chunk_size=500)
 
         # Reconstruct all_xml using the created model instances
         for ext, rel, _, p in files_meta: