mirror of
https://expo.survex.com/repositories/troggle/.git
synced 2025-12-17 11:07:19 +00:00
chunked helper to return DrawinGFile objects
This commit is contained in:
@@ -145,3 +145,22 @@ class DrawingsPathlibTests(TestCase):
|
|||||||
self.assertEqual(set(drawings.IMAGE_LIKE_EXTS), set(drawings.IMAGE_EXTS))
|
self.assertEqual(set(drawings.IMAGE_LIKE_EXTS), set(drawings.IMAGE_EXTS))
|
||||||
self.assertIn('.th', drawings.SUPPORTED_EXTENSIONS)
|
self.assertIn('.th', drawings.SUPPORTED_EXTENSIONS)
|
||||||
self.assertIn('.png', drawings.SUPPORTED_EXTENSIONS)
|
self.assertIn('.png', drawings.SUPPORTED_EXTENSIONS)
|
||||||
|
|
||||||
|
def test_fetch_drawingfiles_by_paths_chunks(self):
|
||||||
|
# Create more items than typical SQLite parameter limit to ensure chunking
|
||||||
|
count = 1200
|
||||||
|
rel_paths = []
|
||||||
|
objs = []
|
||||||
|
for i in range(count):
|
||||||
|
rel = f'bigdir/file{i}.txt'
|
||||||
|
rel_paths.append(rel)
|
||||||
|
objs.append(DrawingFile(dwgpath=rel, dwgname=f'name{i}'))
|
||||||
|
|
||||||
|
# Bulk create them efficiently
|
||||||
|
DrawingFile.objects.bulk_create(objs)
|
||||||
|
|
||||||
|
mapping = drawings.fetch_drawingfiles_by_paths(rel_paths, chunk_size=500)
|
||||||
|
self.assertEqual(len(mapping), count)
|
||||||
|
# Spot-check a few entries
|
||||||
|
self.assertIn('bigdir/file0.txt', mapping)
|
||||||
|
self.assertIn(f'bigdir/file{count-1}.txt', mapping)
|
||||||
|
|||||||
@@ -54,6 +54,30 @@ def _is_image_suffix(suffix: str) -> bool:
|
|||||||
return False
|
return False
|
||||||
return suffix.lower() in IMAGE_EXTS
|
return suffix.lower() in IMAGE_EXTS
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_drawingfiles_by_paths(paths, chunk_size: int = 500):
|
||||||
|
"""Fetch DrawingFile objects for the given iterable of paths in chunks.
|
||||||
|
|
||||||
|
This avoids building a very large SQL IN(...) clause which can exceed DB
|
||||||
|
parameter limits (SQLite defaults to 999 bound variables). A default
|
||||||
|
chunk_size of 500 is conservative and works well across backends.
|
||||||
|
|
||||||
|
Returns a dict mapping dwgpath -> DrawingFile (first match if duplicates).
|
||||||
|
"""
|
||||||
|
mapping = {}
|
||||||
|
if not paths:
|
||||||
|
return mapping
|
||||||
|
|
||||||
|
# Ensure we iterate over a list to allow slicing in chunks
|
||||||
|
rel_paths = list(paths)
|
||||||
|
for i in range(0, len(rel_paths), chunk_size):
|
||||||
|
chunk = rel_paths[i : i + chunk_size]
|
||||||
|
for obj in DrawingFile.objects.filter(dwgpath__in=chunk):
|
||||||
|
# if duplicates exist, preserve the first one seen
|
||||||
|
mapping.setdefault(obj.dwgpath, obj)
|
||||||
|
|
||||||
|
return mapping
|
||||||
|
|
||||||
rx_wallet = re.compile(r"""
|
rx_wallet = re.compile(r"""
|
||||||
# r"(\d\d\d\d#X?\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg|gif|txt))$", path
|
# r"(\d\d\d\d#X?\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg|gif|txt))$", path
|
||||||
# This regex is designed to extract a specific directory prefix and a filename
|
# This regex is designed to extract a specific directory prefix and a filename
|
||||||
@@ -330,7 +354,6 @@ def setdrwfileinfo(dwgfile):
|
|||||||
|
|
||||||
def load_drawings_files():
|
def load_drawings_files():
|
||||||
"""Breadth first search of drawings directory looking for sub-directories and *.xml filesize
|
"""Breadth first search of drawings directory looking for sub-directories and *.xml filesize
|
||||||
This is brain-damaged very early code. Should be replaced with proper use of pathlib.
|
|
||||||
|
|
||||||
Why do we have all this detection of file types/! Why not use get_mime_types ?
|
Why do we have all this detection of file types/! Why not use get_mime_types ?
|
||||||
What is it all for ??
|
What is it all for ??
|
||||||
@@ -379,10 +402,9 @@ def load_drawings_files():
|
|||||||
for i in range(0, len(objs_to_create), chunk_size):
|
for i in range(0, len(objs_to_create), chunk_size):
|
||||||
DrawingFile.objects.bulk_create(objs_to_create[i : i + chunk_size])
|
DrawingFile.objects.bulk_create(objs_to_create[i : i + chunk_size])
|
||||||
|
|
||||||
# Re-fetch created objects and map by dwgpath
|
# Re-fetch created objects and map by dwgpath using a chunked fetch helper
|
||||||
rel_paths = [rel for (_, rel, _, _) in files_meta]
|
rel_paths = [rel for (_, rel, _, _) in files_meta]
|
||||||
created_objs = DrawingFile.objects.filter(dwgpath__in=rel_paths)
|
mapping = fetch_drawingfiles_by_paths(rel_paths, chunk_size=500)
|
||||||
mapping = {obj.dwgpath: obj for obj in created_objs}
|
|
||||||
|
|
||||||
# Reconstruct all_xml using the created model instances
|
# Reconstruct all_xml using the created model instances
|
||||||
for ext, rel, _, p in files_meta:
|
for ext, rel, _, p in files_meta:
|
||||||
|
|||||||
Reference in New Issue
Block a user