mirror of
https://expo.survex.com/repositories/troggle/.git
synced 2025-12-17 17:47:13 +00:00
refactored wallet identification
This commit is contained in:
@@ -164,3 +164,33 @@ class DrawingsPathlibTests(TestCase):
|
|||||||
# Spot-check a few entries
|
# Spot-check a few entries
|
||||||
self.assertIn('bigdir/file0.txt', mapping)
|
self.assertIn('bigdir/file0.txt', mapping)
|
||||||
self.assertIn(f'bigdir/file{count-1}.txt', mapping)
|
self.assertIn(f'bigdir/file{count-1}.txt', mapping)
|
||||||
|
|
||||||
|
def test_assign_wallets_for_model_assigns_and_returns_wallets(self):
|
||||||
|
w = Wallet.objects.create(fpath='x', walletname='2025#20')
|
||||||
|
df = DrawingFile.objects.create(dwgpath='assign.th', dwgname='assign')
|
||||||
|
|
||||||
|
res = drawings._assign_wallets_for_model(df, '2025#20', parser_label='AssignTest')
|
||||||
|
|
||||||
|
self.assertTrue(res)
|
||||||
|
self.assertIn(w, df.dwgwallets.all())
|
||||||
|
|
||||||
|
def test_assign_wallets_for_model_creates_dataissue_on_missing(self):
|
||||||
|
df = DrawingFile.objects.create(dwgpath='missing.th', dwgname='missing')
|
||||||
|
|
||||||
|
drawings._assign_wallets_for_model(df, 'NONEXISTENT', parser_label='AssignMissing')
|
||||||
|
|
||||||
|
di = DataIssue.objects.filter(parser='AssignMissing', message__contains='not found')
|
||||||
|
self.assertTrue(di.exists())
|
||||||
|
|
||||||
|
def test_assign_wallets_for_model_records_dataissue_on_exception(self):
|
||||||
|
# Patch Wallet.objects.filter to raise an exception
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
df = DrawingFile.objects.create(dwgpath='err.th', dwgname='err')
|
||||||
|
|
||||||
|
with patch('troggle.core.models.wallets.Wallet.objects.filter') as mock_filter:
|
||||||
|
mock_filter.side_effect = RuntimeError('boom')
|
||||||
|
drawings._assign_wallets_for_model(df, 'WHATEVER', parser_label='AssignError')
|
||||||
|
|
||||||
|
di = DataIssue.objects.filter(parser='AssignError', message__contains='Exception')
|
||||||
|
self.assertTrue(di.exists())
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
import time
|
||||||
|
|
||||||
import settings
|
import settings
|
||||||
from troggle.core.models.survex import DrawingFile
|
from troggle.core.models.survex import DrawingFile
|
||||||
@@ -84,15 +85,32 @@ rx_wallet = re.compile(r"""
|
|||||||
# from the end of a path string.
|
# from the end of a path string.
|
||||||
|
|
||||||
# --- Group 1: Directory or Survey Prefix ---
|
# --- Group 1: Directory or Survey Prefix ---
|
||||||
|
# but current AND HISTORIC wallet namings, as images have been edited over the years
|
||||||
( # Start of Capture Group 1
|
( # Start of Capture Group 1
|
||||||
\d{4}\#X?\d+\w? # Matches patterns like "2025#123", "2016#X04" or "1999#45a", NB # must be escaped in VERBOSE mode
|
\d{4}\#X?\d+\w? # Matches patterns like "2025#123", "2016#X04" or "1999#45a", NB # must be escaped in VERBOSE mode
|
||||||
| # OR
|
| # OR
|
||||||
1995-96kh # Matches the literal string "1995-96kh"
|
1995-96kh # Matches the literal string "1995-96kh"
|
||||||
|
|
|
||||||
|
1989LUSS # Matches the literal string "1989LUSS"
|
||||||
|
| # OR
|
||||||
|
1989Surveybook # Matches the literal string "1989Surveybook"
|
||||||
|
| # OR
|
||||||
|
1990NotKHsurveybook # Matches the literal string "1990NotKHsurveybook"
|
||||||
|
| # OR
|
||||||
|
199?kh #
|
||||||
|
| # OR
|
||||||
|
199?notkh #
|
||||||
|
| # OR
|
||||||
|
199?Surveybookkh # Matches the literal string "92-94Surveybookkh"
|
||||||
|
| # OR
|
||||||
|
1992-94NotKHSurveybook # Matches the literal string "92-94Surveybookkh"
|
||||||
| # OR
|
| # OR
|
||||||
92-94Surveybookkh # Matches the literal string "92-94Surveybookkh"
|
92-94Surveybookkh # Matches the literal string "92-94Surveybookkh"
|
||||||
| # OR
|
| # OR
|
||||||
1991surveybook # Matches the literal string "1991surveybook"
|
1991surveybook # Matches the literal string "1991surveybook"
|
||||||
| # OR
|
| # OR
|
||||||
|
1991surveybook # Matches the literal string "1991surveybook"
|
||||||
|
| # OR
|
||||||
smkhs # Matches the literal string "smkhs"
|
smkhs # Matches the literal string "smkhs"
|
||||||
) # End of Capture Group 1
|
) # End of Capture Group 1
|
||||||
|
|
||||||
@@ -108,6 +126,7 @@ rx_wallet = re.compile(r"""
|
|||||||
|
|
||||||
$ # Anchor, ensuring the match is at the end of the string
|
$ # Anchor, ensuring the match is at the end of the string
|
||||||
""", re.VERBOSE | re.IGNORECASE)
|
""", re.VERBOSE | re.IGNORECASE)
|
||||||
|
|
||||||
def _set_filesize_and_check(fullpath, model_obj, parser_label, url_prefix="/dwgdataraw"):
|
def _set_filesize_and_check(fullpath, model_obj, parser_label, url_prefix="/dwgdataraw"):
|
||||||
"""Set model_obj.filesize from filesystem and create DataIssue if missing/zero.
|
"""Set model_obj.filesize from filesystem and create DataIssue if missing/zero.
|
||||||
Returns True if file exists and has size > 0, False otherwise.
|
Returns True if file exists and has size > 0, False otherwise.
|
||||||
@@ -151,7 +170,8 @@ def parse_tnl_file(dwgfile, path):
|
|||||||
which we have already seen when we imported all the files we could find in the surveyscans direstories.
|
which we have already seen when we imported all the files we could find in the surveyscans direstories.
|
||||||
|
|
||||||
The purpose is to find cross-references between Tunnel drawing files and wallets
|
The purpose is to find cross-references between Tunnel drawing files and wallets
|
||||||
AND to find the names of the scanfiles in that wallet - from reading the Tunnel file not from interrogating the wallet.
|
AND to find the names of the scanfiles in that wallet - from reading the Tunnel file not from
|
||||||
|
interrogating the wallet.
|
||||||
Note that this means that the list of scanfiles will be as it was when the drawing was created, not as it is now. Perhaps
|
Note that this means that the list of scanfiles will be as it was when the drawing was created, not as it is now. Perhaps
|
||||||
we should not actually do it this way ? Or at least, label the table heading.
|
we should not actually do it this way ? Or at least, label the table heading.
|
||||||
|
|
||||||
@@ -166,7 +186,7 @@ def _process_reference(dwgfile, path, parser_label="Tunnel"):
|
|||||||
"""Unified processor to link drawing files to wallets/scans or referenced drawings.
|
"""Unified processor to link drawing files to wallets/scans or referenced drawings.
|
||||||
|
|
||||||
- If `path` matches a wallet pattern (rx_wallet), link the wallet and try to find the scan file in the wallet.
|
- If `path` matches a wallet pattern (rx_wallet), link the wallet and try to find the scan file in the wallet.
|
||||||
- If `path` looks like an image, do nothing (images are not treated as references here).
|
- If `path` looks like an image, do nothing (images are not treated as references here - yet).
|
||||||
- Otherwise, treat `path` as a possible reference to another drawing (by name) and link via `dwgcontains`.
|
- Otherwise, treat `path` as a possible reference to another drawing (by name) and link via `dwgcontains`.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -215,7 +235,7 @@ def _process_reference(dwgfile, path, parser_label="Tunnel"):
|
|||||||
print(message)
|
print(message)
|
||||||
DataIssue.objects.create(parser=parser_label, message=message, url=f"/dwgdataraw/{path}")
|
DataIssue.objects.create(parser=parser_label, message=message, url=f"/dwgdataraw/{path}")
|
||||||
rdwgfile = rdwgfilel[0]
|
rdwgfile = rdwgfilel[0]
|
||||||
if hasattr(dwgfile, 'dwgcontains'):
|
if hasattr(dwgfile, 'dwgcontains'): # implement model change in models/survex.py to use this
|
||||||
dwgfile.dwgcontains.add(rdwgfile)
|
dwgfile.dwgcontains.add(rdwgfile)
|
||||||
|
|
||||||
return None, None
|
return None, None
|
||||||
@@ -240,6 +260,45 @@ def findimportinsert(therionfile, imp):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _assign_wallets_for_model(model_obj, wallet_names, parser_label="Tunnel"):
|
||||||
|
"""Assign wallets to `model_obj` by wallet name(s).
|
||||||
|
|
||||||
|
wallet_names may be a single string or an iterable of names. This function
|
||||||
|
will add any Wallets found via Wallet.objects.filter(walletname__in=...) to
|
||||||
|
model_obj.dwgwallets and return the list of matched Wallet objects. If none
|
||||||
|
are found, or an exception occurs, a DataIssue is recorded with parser
|
||||||
|
set to `parser_label`.
|
||||||
|
"""
|
||||||
|
if not wallet_names:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Normalize to list of names
|
||||||
|
if isinstance(wallet_names, (str, bytes)):
|
||||||
|
names = [str(wallet_names)]
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
names = [str(n) for n in wallet_names]
|
||||||
|
except Exception:
|
||||||
|
names = [str(wallet_names)]
|
||||||
|
|
||||||
|
try:
|
||||||
|
wallets = list(Wallet.objects.filter(walletname__in=names))
|
||||||
|
if wallets:
|
||||||
|
for w in wallets:
|
||||||
|
model_obj.dwgwallets.add(w)
|
||||||
|
return wallets
|
||||||
|
# Nothing found: record a DataIssue
|
||||||
|
message = f" ! wallet(s) '{names}' not found from {getattr(model_obj, 'dwgpath', model_obj)}"
|
||||||
|
print(message)
|
||||||
|
DataIssue.objects.create(parser=parser_label, message=message, url=f"/dwgdataraw/{getattr(model_obj, 'dwgpath', '')}")
|
||||||
|
return []
|
||||||
|
except Exception as e:
|
||||||
|
message = f" ! Exception while looking up wallet(s) '{names}' from {getattr(model_obj, 'dwgpath', model_obj)} -- ({e})"
|
||||||
|
print(message)
|
||||||
|
DataIssue.objects.create(parser=parser_label, message=message, url=f"/dwgdataraw/{getattr(model_obj, 'dwgpath', '')}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
def settherionfileinfo(filetuple):
|
def settherionfileinfo(filetuple):
|
||||||
"""Read in the drawing file contents and sets values on the dwgfile object"""
|
"""Read in the drawing file contents and sets values on the dwgfile object"""
|
||||||
thtype, therionfile = filetuple
|
thtype, therionfile = filetuple
|
||||||
@@ -257,10 +316,8 @@ def settherionfileinfo(filetuple):
|
|||||||
therionfile.npaths = len(rx_input.findall(ttext))
|
therionfile.npaths = len(rx_input.findall(ttext))
|
||||||
modified.add("npaths")
|
modified.add("npaths")
|
||||||
if wallet_texts := rx_ref.findall(ttext):
|
if wallet_texts := rx_ref.findall(ttext):
|
||||||
# print(f"#ref {therionfile.dwgname} : {wallet_text}")
|
# Delegate wallet assignment to helper; use parser_label 'Therion'
|
||||||
if wallets := Wallet.objects.filter(walletname__in=wallet_texts): # ! Django idiom not used elsewhere. A filter using a list of strings.
|
_assign_wallets_for_model(therionfile, wallet_texts, parser_label="Therion")
|
||||||
for w in wallets:
|
|
||||||
therionfile.dwgwallets.add(w)
|
|
||||||
elif thtype == "th2":
|
elif thtype == "th2":
|
||||||
therionfile.npaths = len(rx_line.findall(ttext))
|
therionfile.npaths = len(rx_line.findall(ttext))
|
||||||
modified.add("npaths")
|
modified.add("npaths")
|
||||||
@@ -330,15 +387,9 @@ def settnlfileinfo(dwgfile):
|
|||||||
# <pathcodes>
|
# <pathcodes>
|
||||||
# <pctext style="survey" nodeposxrel="-1.0" nodeposyrel="-1.0"> *file_begin "/home/expo/loser/caves-1623/2025-dw-01/trip1.svx" "trip1.svx" | *begin 1 | *export 1 25 | | ; Cave: 2025-dw-01 | ; Area in cave/QM: Entrance series | *title "2025-dw-01" | *date 2025.07.13 | *team "Dylan Wase" notes | *team "Daniel Gorst" dog | *instrument SAP "SAP6 Dylan" | *ref 2025#20 |
|
# <pctext style="survey" nodeposxrel="-1.0" nodeposyrel="-1.0"> *file_begin "/home/expo/loser/caves-1623/2025-dw-01/trip1.svx" "trip1.svx" | *begin 1 | *export 1 25 | | ; Cave: 2025-dw-01 | ; Area in cave/QM: Entrance series | *title "2025-dw-01" | *date 2025.07.13 | *team "Dylan Wase" notes | *team "Daniel Gorst" dog | *instrument SAP "SAP6 Dylan" | *ref 2025#20 |
|
||||||
for refs in rx_pctext.findall(ttext):
|
for refs in rx_pctext.findall(ttext):
|
||||||
try:
|
if refs:
|
||||||
wallets = Wallet.objects.filter(walletname=refs)
|
# Delegate wallet lookup/assignment to helper for consistent handling
|
||||||
if wallets:
|
_assign_wallets_for_model(dwgfile, refs, parser_label="Tunnel")
|
||||||
for w in wallets:
|
|
||||||
dwgfile.dwgwallets.add(w)
|
|
||||||
except Exception as e:
|
|
||||||
message = f" ! wallet not found referenced from {dwgfile} -- '{refs}' ({e}) "
|
|
||||||
print(message)
|
|
||||||
DataIssue.objects.create(parser="Tunnel", message=message, url=f"/dwgdataraw/{dwgfile}")
|
|
||||||
# should also scan and look for survex blocks that might have been included, and image scans
|
# should also scan and look for survex blocks that might have been included, and image scans
|
||||||
# which would populate dwgfile.survexfile
|
# which would populate dwgfile.survexfile
|
||||||
|
|
||||||
@@ -365,6 +416,8 @@ def load_drawings_files():
|
|||||||
We import JPG, PNG and SVG files; which have already been put on the server,
|
We import JPG, PNG and SVG files; which have already been put on the server,
|
||||||
but the upload form intentionally refuses to upload PNG and JPG (though it does allow SVG)
|
but the upload form intentionally refuses to upload PNG and JPG (though it does allow SVG)
|
||||||
"""
|
"""
|
||||||
|
# Track elapsed time
|
||||||
|
start_time = time.perf_counter()
|
||||||
all_xml = []
|
all_xml = []
|
||||||
drawdatadir = Path(settings.DRAWINGS_DATA)
|
drawdatadir = Path(settings.DRAWINGS_DATA)
|
||||||
DrawingFile.objects.all().delete()
|
DrawingFile.objects.all().delete()
|
||||||
@@ -416,14 +469,13 @@ def load_drawings_files():
|
|||||||
if dwgfile:
|
if dwgfile:
|
||||||
all_xml.append((ext, dwgfile, p))
|
all_xml.append((ext, dwgfile, p))
|
||||||
|
|
||||||
print(f" - {len(all_xml)} Drawings files found")
|
elapsed = time.perf_counter() - start_time
|
||||||
|
print(f" - {len(all_xml)} Drawings files found ({elapsed:.2f}s)")
|
||||||
|
|
||||||
# Process in a deterministic order; ensure .th2 are handled before .th
|
# Process in a deterministic order; ensure .th2 are handled before .th
|
||||||
ext_priority = {'th2': 0, 'th': 1}
|
ext_priority = {'th2': 0, 'th': 1}
|
||||||
all_xml.sort(key=lambda t: ext_priority.get(t[0], 2))
|
all_xml.sort(key=lambda t: ext_priority.get(t[0], 2))
|
||||||
|
|
||||||
print(f" - Drawing found, starting parsing...")
|
|
||||||
|
|
||||||
# Process files and collect modified scalar fields for bulk update
|
# Process files and collect modified scalar fields for bulk update
|
||||||
modified_map = {} # {DrawingFile instance: set(fields)}
|
modified_map = {} # {DrawingFile instance: set(fields)}
|
||||||
for extension, filename, pathobj in all_xml:
|
for extension, filename, pathobj in all_xml:
|
||||||
@@ -441,7 +493,9 @@ def load_drawings_files():
|
|||||||
if modified:
|
if modified:
|
||||||
modified_map.setdefault(filename, set()).update(modified)
|
modified_map.setdefault(filename, set()).update(modified)
|
||||||
|
|
||||||
# Bulk update scalar fields grouped by identical field-sets to use bulk_update efficiently
|
elapsed = time.perf_counter() - start_time
|
||||||
|
print(f" - Drawings parsed ({elapsed:.2f}s)")
|
||||||
|
# Bulk update scalar fields grouped by identical field-sets to use bulk_update efficiently
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
groups = defaultdict(list) # {tuple(fields): [instances]}
|
groups = defaultdict(list) # {tuple(fields): [instances]}
|
||||||
@@ -454,7 +508,8 @@ def load_drawings_files():
|
|||||||
# Use a conservative batch size
|
# Use a conservative batch size
|
||||||
DrawingFile.objects.bulk_update(instances, fields_list, batch_size=500)
|
DrawingFile.objects.bulk_update(instances, fields_list, batch_size=500)
|
||||||
|
|
||||||
print(f" - Drawings parsed and database updated.")
|
elapsed = time.perf_counter() - start_time
|
||||||
|
print(f" - Database updated ({elapsed:.2f}s)")
|
||||||
|
|
||||||
# for drawfile in DrawingFile.objects.all():
|
# for drawfile in DrawingFile.objects.all():
|
||||||
# SetTunnelfileInfo(drawfile)
|
# SetTunnelfileInfo(drawfile)
|
||||||
|
|||||||
Reference in New Issue
Block a user