diff --git a/parsers/drawings.py b/parsers/drawings.py index 453a9b1d7..69700e39b 100644 --- a/parsers/drawings.py +++ b/parsers/drawings.py @@ -122,7 +122,7 @@ def _set_filesize_and_check(fullpath, model_obj, parser_label, url_prefix="/dwgd return False model_obj.filesize = size - model_obj.save() + # Do not save here; caller should include 'filesize' in the bulk update set. if size <= 0: message = f"! Zero length {parser_label.lower()} file {fullpath}" print(message) @@ -218,7 +218,6 @@ def _process_reference(dwgfile, path, parser_label="Tunnel"): if hasattr(dwgfile, 'dwgcontains'): dwgfile.dwgcontains.add(rdwgfile) - dwgfile.save() return None, None @@ -246,8 +245,9 @@ def settherionfileinfo(filetuple): thtype, therionfile = filetuple ff = Path(settings.DRAWINGS_DATA) / therionfile.dwgpath - if not _set_filesize_and_check(ff, therionfile, "Therion"): - return + modified = set() + if _set_filesize_and_check(ff, therionfile, "Therion"): + modified.add("filesize") ttext = _read_text_file(ff) @@ -255,6 +255,7 @@ def settherionfileinfo(filetuple): # print(len(re.findall(r"line", ttext))) if thtype == "th": therionfile.npaths = len(rx_input.findall(ttext)) + modified.add("npaths") if wallet_texts := rx_ref.findall(ttext): # print(f"#ref {therionfile.dwgname} : {wallet_text}") if wallets := Wallet.objects.filter(walletname__in=wallet_texts): # ! Django idiom not used elsewhere. A filter using a list of strings. @@ -262,6 +263,7 @@ def settherionfileinfo(filetuple): therionfile.dwgwallets.add(w) elif thtype == "th2": therionfile.npaths = len(rx_line.findall(ttext)) + modified.add("npaths") # scan and look for survex blocks that might have been included, and image scans (as for tunnel drawings) # which would populate dwgfile.survexfile @@ -293,7 +295,8 @@ def settherionfileinfo(filetuple): DataIssue.objects.create(parser="xTherion", message=message, url=f"/dwgdataraw/{therionfile.dwgpath}") findimportinsert(therionfile, inp) - therionfile.save() + # Defer saving scalar fields; caller will perform bulk_update. + return modified def settnlfileinfo(dwgfile): @@ -307,13 +310,14 @@ def settnlfileinfo(dwgfile): *ref wallet identifiers may be found in at least two different places in tunnel files. """ ff = Path(settings.DRAWINGS_DATA) / dwgfile.dwgpath - if not _set_filesize_and_check(ff, dwgfile, "Tunnel"): - return + modified = set() + if _set_filesize_and_check(ff, dwgfile, "Tunnel"): + modified.add("filesize") ttext = _read_text_file(ff) dwgfile.npaths = len(rx_skpath.findall(ttext)) - # dwgfile.save() + modified.add("npaths") # example drawing file in Tunnel format. # @@ -335,11 +339,11 @@ def settnlfileinfo(dwgfile): message = f" ! wallet not found referenced from {dwgfile} -- '{refs}' ({e}) " print(message) DataIssue.objects.create(parser="Tunnel", message=message, url=f"/dwgdataraw/{dwgfile}") - # should also scan and look for survex blocks that might have been included, and image scans # which would populate dwgfile.survexfile - dwgfile.save() + # Defer scalar saves to bulk_update; return set of modified fields + return modified def setdrwfileinfo(dwgfile): @@ -348,13 +352,13 @@ def setdrwfileinfo(dwgfile): """ ff = Path(settings.DRAWINGS_DATA) / dwgfile.dwgpath + # Set filesize (if available) but do not save; return modified field names if not _set_filesize_and_check(ff, dwgfile, "drawings"): - return - # nothing more to parse for generic files + return set() + return {"filesize"} def load_drawings_files(): - """Breadth first search of drawings directory looking for sub-directories and *.xml filesize - + """ Why do we have all this detection of file types/! Why not use get_mime_types ? What is it all for ?? @@ -418,18 +422,39 @@ def load_drawings_files(): ext_priority = {'th2': 0, 'th': 1} all_xml.sort(key=lambda t: ext_priority.get(t[0], 2)) + print(f" - Drawing found, starting parsing...") + + # Process files and collect modified scalar fields for bulk update + modified_map = {} # {DrawingFile instance: set(fields)} for extension, filename, pathobj in all_xml: + modified = set() if extension in {"pdf", "txt", "svg", "jpg", "png", ""}: - setdrwfileinfo(filename) + modified = setdrwfileinfo(filename) or set() elif extension == "xml": - settnlfileinfo(filename) + modified = settnlfileinfo(filename) or set() # important to import .th2 files before .th so that we can assign them when found in .th files elif extension == "th2": - settherionfileinfo(("th2", filename)) + modified = settherionfileinfo(("th2", filename)) or set() elif extension == "th": - settherionfileinfo(("th", filename)) + modified = settherionfileinfo(("th", filename)) or set() - print(f" - Drawings parsed") + if modified: + modified_map.setdefault(filename, set()).update(modified) + + # Bulk update scalar fields grouped by identical field-sets to use bulk_update efficiently + from collections import defaultdict + + groups = defaultdict(list) # {tuple(fields): [instances]} + for inst, fields in modified_map.items(): + key = tuple(sorted(fields)) + groups[key].append(inst) + + for fields_tuple, instances in groups.items(): + fields_list = list(fields_tuple) + # Use a conservative batch size + DrawingFile.objects.bulk_update(instances, fields_list, batch_size=500) + + print(f" - Drawings parsed and database updated.") # for drawfile in DrawingFile.objects.all(): # SetTunnelfileInfo(drawfile)