From ba2fa3caf5262979fc16eb5f55c68dd1d2b296c9 Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Mon, 15 Dec 2025 16:38:50 +0000 Subject: [PATCH] now using bulk_update --- core/TESTS/test_drawings.py | 13 +++++++++++++ parsers/drawings.py | 24 +++++++++++++++++++++--- 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/core/TESTS/test_drawings.py b/core/TESTS/test_drawings.py index 92d8933..d1d09bb 100644 --- a/core/TESTS/test_drawings.py +++ b/core/TESTS/test_drawings.py @@ -70,3 +70,16 @@ class DrawingsPathlibTests(TestCase): drawings.load_drawings_files() self.assertFalse(DrawingFile.objects.filter(dwgpath='.git/secret.txt').exists()) + + def test_bulk_create_chunks(self): + # Create more than chunk size files to ensure bulk_create is called in multiple chunks + count = 800 + with tempfile.TemporaryDirectory() as td: + p = pathlib.Path(td) + for i in range(count): + (p / f'file{i}.txt').write_text('x') + settings.DRAWINGS_DATA = td + + drawings.load_drawings_files() + + self.assertEqual(DrawingFile.objects.count(), count) diff --git a/parsers/drawings.py b/parsers/drawings.py index e293cc2..3d64698 100644 --- a/parsers/drawings.py +++ b/parsers/drawings.py @@ -339,6 +339,7 @@ def load_drawings_files(): supported_extensions = {".txt", ".xml", ".th", ".th2", ".pdf", ".png", ".svg", ".jpg"} # Walk the tree with pathlib, skip hidden and backup files + files_meta = [] # list of tuples (ext, rel_path, dwgname, pathobj) for p in drawdatadir.rglob('*'): # Ignore anything under a .git directory if '.git' in p.parts: @@ -358,9 +359,26 @@ def load_drawings_files(): dwgname = p.stem ext = suffix[1:] - dwgfile = DrawingFile(dwgpath=rel, dwgname=dwgname) - dwgfile.save() - all_xml.append((ext, dwgfile, p)) + files_meta.append((ext, rel, dwgname, p)) + + # Bulk create DrawingFile instances to avoid many individual DB saves + if files_meta: + objs_to_create = [DrawingFile(dwgpath=rel, dwgname=dwgname) for (_, rel, dwgname, _) in files_meta] + # Use chunks to avoid huge single queries + chunk_size = 700 + for i in range(0, len(objs_to_create), chunk_size): + DrawingFile.objects.bulk_create(objs_to_create[i : i + chunk_size]) + + # Re-fetch created objects and map by dwgpath + rel_paths = [rel for (_, rel, _, _) in files_meta] + created_objs = DrawingFile.objects.filter(dwgpath__in=rel_paths) + mapping = {obj.dwgpath: obj for obj in created_objs} + + # Reconstruct all_xml using the created model instances + for ext, rel, _, p in files_meta: + dwgfile = mapping.get(rel) + if dwgfile: + all_xml.append((ext, dwgfile, p)) print(f" - {len(all_xml)} Drawings files found")