mirror of
https://expo.survex.com/repositories/troggle/.git
synced 2025-12-17 18:17:07 +00:00
bulk updates for all drawings now, after parsing.
This commit is contained in:
@@ -122,7 +122,7 @@ def _set_filesize_and_check(fullpath, model_obj, parser_label, url_prefix="/dwgd
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
model_obj.filesize = size
|
model_obj.filesize = size
|
||||||
model_obj.save()
|
# Do not save here; caller should include 'filesize' in the bulk update set.
|
||||||
if size <= 0:
|
if size <= 0:
|
||||||
message = f"! Zero length {parser_label.lower()} file {fullpath}"
|
message = f"! Zero length {parser_label.lower()} file {fullpath}"
|
||||||
print(message)
|
print(message)
|
||||||
@@ -218,7 +218,6 @@ def _process_reference(dwgfile, path, parser_label="Tunnel"):
|
|||||||
if hasattr(dwgfile, 'dwgcontains'):
|
if hasattr(dwgfile, 'dwgcontains'):
|
||||||
dwgfile.dwgcontains.add(rdwgfile)
|
dwgfile.dwgcontains.add(rdwgfile)
|
||||||
|
|
||||||
dwgfile.save()
|
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
|
|
||||||
@@ -246,8 +245,9 @@ def settherionfileinfo(filetuple):
|
|||||||
thtype, therionfile = filetuple
|
thtype, therionfile = filetuple
|
||||||
|
|
||||||
ff = Path(settings.DRAWINGS_DATA) / therionfile.dwgpath
|
ff = Path(settings.DRAWINGS_DATA) / therionfile.dwgpath
|
||||||
if not _set_filesize_and_check(ff, therionfile, "Therion"):
|
modified = set()
|
||||||
return
|
if _set_filesize_and_check(ff, therionfile, "Therion"):
|
||||||
|
modified.add("filesize")
|
||||||
|
|
||||||
ttext = _read_text_file(ff)
|
ttext = _read_text_file(ff)
|
||||||
|
|
||||||
@@ -255,6 +255,7 @@ def settherionfileinfo(filetuple):
|
|||||||
# print(len(re.findall(r"line", ttext)))
|
# print(len(re.findall(r"line", ttext)))
|
||||||
if thtype == "th":
|
if thtype == "th":
|
||||||
therionfile.npaths = len(rx_input.findall(ttext))
|
therionfile.npaths = len(rx_input.findall(ttext))
|
||||||
|
modified.add("npaths")
|
||||||
if wallet_texts := rx_ref.findall(ttext):
|
if wallet_texts := rx_ref.findall(ttext):
|
||||||
# print(f"#ref {therionfile.dwgname} : {wallet_text}")
|
# print(f"#ref {therionfile.dwgname} : {wallet_text}")
|
||||||
if wallets := Wallet.objects.filter(walletname__in=wallet_texts): # ! Django idiom not used elsewhere. A filter using a list of strings.
|
if wallets := Wallet.objects.filter(walletname__in=wallet_texts): # ! Django idiom not used elsewhere. A filter using a list of strings.
|
||||||
@@ -262,6 +263,7 @@ def settherionfileinfo(filetuple):
|
|||||||
therionfile.dwgwallets.add(w)
|
therionfile.dwgwallets.add(w)
|
||||||
elif thtype == "th2":
|
elif thtype == "th2":
|
||||||
therionfile.npaths = len(rx_line.findall(ttext))
|
therionfile.npaths = len(rx_line.findall(ttext))
|
||||||
|
modified.add("npaths")
|
||||||
|
|
||||||
# scan and look for survex blocks that might have been included, and image scans (as for tunnel drawings)
|
# scan and look for survex blocks that might have been included, and image scans (as for tunnel drawings)
|
||||||
# which would populate dwgfile.survexfile
|
# which would populate dwgfile.survexfile
|
||||||
@@ -293,7 +295,8 @@ def settherionfileinfo(filetuple):
|
|||||||
DataIssue.objects.create(parser="xTherion", message=message, url=f"/dwgdataraw/{therionfile.dwgpath}")
|
DataIssue.objects.create(parser="xTherion", message=message, url=f"/dwgdataraw/{therionfile.dwgpath}")
|
||||||
findimportinsert(therionfile, inp)
|
findimportinsert(therionfile, inp)
|
||||||
|
|
||||||
therionfile.save()
|
# Defer saving scalar fields; caller will perform bulk_update.
|
||||||
|
return modified
|
||||||
|
|
||||||
|
|
||||||
def settnlfileinfo(dwgfile):
|
def settnlfileinfo(dwgfile):
|
||||||
@@ -307,13 +310,14 @@ def settnlfileinfo(dwgfile):
|
|||||||
*ref wallet identifiers may be found in at least two different places in tunnel files.
|
*ref wallet identifiers may be found in at least two different places in tunnel files.
|
||||||
"""
|
"""
|
||||||
ff = Path(settings.DRAWINGS_DATA) / dwgfile.dwgpath
|
ff = Path(settings.DRAWINGS_DATA) / dwgfile.dwgpath
|
||||||
if not _set_filesize_and_check(ff, dwgfile, "Tunnel"):
|
modified = set()
|
||||||
return
|
if _set_filesize_and_check(ff, dwgfile, "Tunnel"):
|
||||||
|
modified.add("filesize")
|
||||||
|
|
||||||
ttext = _read_text_file(ff)
|
ttext = _read_text_file(ff)
|
||||||
|
|
||||||
dwgfile.npaths = len(rx_skpath.findall(ttext))
|
dwgfile.npaths = len(rx_skpath.findall(ttext))
|
||||||
# dwgfile.save()
|
modified.add("npaths")
|
||||||
|
|
||||||
# example drawing file in Tunnel format.
|
# example drawing file in Tunnel format.
|
||||||
# <tunnelxml tunnelversion="version2009-06-21 Matienzo" tunnelproject="ireby" tunneluser="goatchurch" tunneldate="2009-06-29 23:22:17">
|
# <tunnelxml tunnelversion="version2009-06-21 Matienzo" tunnelproject="ireby" tunneluser="goatchurch" tunneldate="2009-06-29 23:22:17">
|
||||||
@@ -335,11 +339,11 @@ def settnlfileinfo(dwgfile):
|
|||||||
message = f" ! wallet not found referenced from {dwgfile} -- '{refs}' ({e}) "
|
message = f" ! wallet not found referenced from {dwgfile} -- '{refs}' ({e}) "
|
||||||
print(message)
|
print(message)
|
||||||
DataIssue.objects.create(parser="Tunnel", message=message, url=f"/dwgdataraw/{dwgfile}")
|
DataIssue.objects.create(parser="Tunnel", message=message, url=f"/dwgdataraw/{dwgfile}")
|
||||||
|
|
||||||
# should also scan and look for survex blocks that might have been included, and image scans
|
# should also scan and look for survex blocks that might have been included, and image scans
|
||||||
# which would populate dwgfile.survexfile
|
# which would populate dwgfile.survexfile
|
||||||
|
|
||||||
dwgfile.save()
|
# Defer scalar saves to bulk_update; return set of modified fields
|
||||||
|
return modified
|
||||||
|
|
||||||
|
|
||||||
def setdrwfileinfo(dwgfile):
|
def setdrwfileinfo(dwgfile):
|
||||||
@@ -348,13 +352,13 @@ def setdrwfileinfo(dwgfile):
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
ff = Path(settings.DRAWINGS_DATA) / dwgfile.dwgpath
|
ff = Path(settings.DRAWINGS_DATA) / dwgfile.dwgpath
|
||||||
|
# Set filesize (if available) but do not save; return modified field names
|
||||||
if not _set_filesize_and_check(ff, dwgfile, "drawings"):
|
if not _set_filesize_and_check(ff, dwgfile, "drawings"):
|
||||||
return
|
return set()
|
||||||
# nothing more to parse for generic files
|
return {"filesize"}
|
||||||
|
|
||||||
def load_drawings_files():
|
def load_drawings_files():
|
||||||
"""Breadth first search of drawings directory looking for sub-directories and *.xml filesize
|
"""
|
||||||
|
|
||||||
Why do we have all this detection of file types/! Why not use get_mime_types ?
|
Why do we have all this detection of file types/! Why not use get_mime_types ?
|
||||||
What is it all for ??
|
What is it all for ??
|
||||||
|
|
||||||
@@ -418,18 +422,39 @@ def load_drawings_files():
|
|||||||
ext_priority = {'th2': 0, 'th': 1}
|
ext_priority = {'th2': 0, 'th': 1}
|
||||||
all_xml.sort(key=lambda t: ext_priority.get(t[0], 2))
|
all_xml.sort(key=lambda t: ext_priority.get(t[0], 2))
|
||||||
|
|
||||||
|
print(f" - Drawing found, starting parsing...")
|
||||||
|
|
||||||
|
# Process files and collect modified scalar fields for bulk update
|
||||||
|
modified_map = {} # {DrawingFile instance: set(fields)}
|
||||||
for extension, filename, pathobj in all_xml:
|
for extension, filename, pathobj in all_xml:
|
||||||
|
modified = set()
|
||||||
if extension in {"pdf", "txt", "svg", "jpg", "png", ""}:
|
if extension in {"pdf", "txt", "svg", "jpg", "png", ""}:
|
||||||
setdrwfileinfo(filename)
|
modified = setdrwfileinfo(filename) or set()
|
||||||
elif extension == "xml":
|
elif extension == "xml":
|
||||||
settnlfileinfo(filename)
|
modified = settnlfileinfo(filename) or set()
|
||||||
# important to import .th2 files before .th so that we can assign them when found in .th files
|
# important to import .th2 files before .th so that we can assign them when found in .th files
|
||||||
elif extension == "th2":
|
elif extension == "th2":
|
||||||
settherionfileinfo(("th2", filename))
|
modified = settherionfileinfo(("th2", filename)) or set()
|
||||||
elif extension == "th":
|
elif extension == "th":
|
||||||
settherionfileinfo(("th", filename))
|
modified = settherionfileinfo(("th", filename)) or set()
|
||||||
|
|
||||||
print(f" - Drawings parsed")
|
if modified:
|
||||||
|
modified_map.setdefault(filename, set()).update(modified)
|
||||||
|
|
||||||
|
# Bulk update scalar fields grouped by identical field-sets to use bulk_update efficiently
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
groups = defaultdict(list) # {tuple(fields): [instances]}
|
||||||
|
for inst, fields in modified_map.items():
|
||||||
|
key = tuple(sorted(fields))
|
||||||
|
groups[key].append(inst)
|
||||||
|
|
||||||
|
for fields_tuple, instances in groups.items():
|
||||||
|
fields_list = list(fields_tuple)
|
||||||
|
# Use a conservative batch size
|
||||||
|
DrawingFile.objects.bulk_update(instances, fields_list, batch_size=500)
|
||||||
|
|
||||||
|
print(f" - Drawings parsed and database updated.")
|
||||||
|
|
||||||
# for drawfile in DrawingFile.objects.all():
|
# for drawfile in DrawingFile.objects.all():
|
||||||
# SetTunnelfileInfo(drawfile)
|
# SetTunnelfileInfo(drawfile)
|
||||||
|
|||||||
Reference in New Issue
Block a user