From e23cab50ed0c58d652909d4f91eeb0d1f428c82a Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Mon, 15 Dec 2025 20:51:07 +0000 Subject: [PATCH] refactored wallet identification --- core/TESTS/test_drawings.py | 30 ++++++++++++ parsers/drawings.py | 97 +++++++++++++++++++++++++++++-------- 2 files changed, 106 insertions(+), 21 deletions(-) diff --git a/core/TESTS/test_drawings.py b/core/TESTS/test_drawings.py index 60acca224..fa0f7b2d8 100644 --- a/core/TESTS/test_drawings.py +++ b/core/TESTS/test_drawings.py @@ -164,3 +164,33 @@ class DrawingsPathlibTests(TestCase): # Spot-check a few entries self.assertIn('bigdir/file0.txt', mapping) self.assertIn(f'bigdir/file{count-1}.txt', mapping) + + def test_assign_wallets_for_model_assigns_and_returns_wallets(self): + w = Wallet.objects.create(fpath='x', walletname='2025#20') + df = DrawingFile.objects.create(dwgpath='assign.th', dwgname='assign') + + res = drawings._assign_wallets_for_model(df, '2025#20', parser_label='AssignTest') + + self.assertTrue(res) + self.assertIn(w, df.dwgwallets.all()) + + def test_assign_wallets_for_model_creates_dataissue_on_missing(self): + df = DrawingFile.objects.create(dwgpath='missing.th', dwgname='missing') + + drawings._assign_wallets_for_model(df, 'NONEXISTENT', parser_label='AssignMissing') + + di = DataIssue.objects.filter(parser='AssignMissing', message__contains='not found') + self.assertTrue(di.exists()) + + def test_assign_wallets_for_model_records_dataissue_on_exception(self): + # Patch Wallet.objects.filter to raise an exception + from unittest.mock import patch + + df = DrawingFile.objects.create(dwgpath='err.th', dwgname='err') + + with patch('troggle.core.models.wallets.Wallet.objects.filter') as mock_filter: + mock_filter.side_effect = RuntimeError('boom') + drawings._assign_wallets_for_model(df, 'WHATEVER', parser_label='AssignError') + + di = DataIssue.objects.filter(parser='AssignError', message__contains='Exception') + self.assertTrue(di.exists()) diff --git a/parsers/drawings.py b/parsers/drawings.py index 69700e39b..f2a30ef42 100644 --- a/parsers/drawings.py +++ b/parsers/drawings.py @@ -1,6 +1,7 @@ import os import re from pathlib import Path +import time import settings from troggle.core.models.survex import DrawingFile @@ -84,15 +85,32 @@ rx_wallet = re.compile(r""" # from the end of a path string. # --- Group 1: Directory or Survey Prefix --- + # but current AND HISTORIC wallet namings, as images have been edited over the years ( # Start of Capture Group 1 \d{4}\#X?\d+\w? # Matches patterns like "2025#123", "2016#X04" or "1999#45a", NB # must be escaped in VERBOSE mode | # OR 1995-96kh # Matches the literal string "1995-96kh" + | + 1989LUSS # Matches the literal string "1989LUSS" + | # OR + 1989Surveybook # Matches the literal string "1989Surveybook" + | # OR + 1990NotKHsurveybook # Matches the literal string "1990NotKHsurveybook" + | # OR + 199?kh # + | # OR + 199?notkh # + | # OR + 199?Surveybookkh # Matches the literal string "92-94Surveybookkh" + | # OR + 1992-94NotKHSurveybook # Matches the literal string "92-94Surveybookkh" | # OR 92-94Surveybookkh # Matches the literal string "92-94Surveybookkh" | # OR 1991surveybook # Matches the literal string "1991surveybook" | # OR + 1991surveybook # Matches the literal string "1991surveybook" + | # OR smkhs # Matches the literal string "smkhs" ) # End of Capture Group 1 @@ -108,6 +126,7 @@ rx_wallet = re.compile(r""" $ # Anchor, ensuring the match is at the end of the string """, re.VERBOSE | re.IGNORECASE) + def _set_filesize_and_check(fullpath, model_obj, parser_label, url_prefix="/dwgdataraw"): """Set model_obj.filesize from filesystem and create DataIssue if missing/zero. Returns True if file exists and has size > 0, False otherwise. @@ -151,7 +170,8 @@ def parse_tnl_file(dwgfile, path): which we have already seen when we imported all the files we could find in the surveyscans direstories. The purpose is to find cross-references between Tunnel drawing files and wallets - AND to find the names of the scanfiles in that wallet - from reading the Tunnel file not from interrogating the wallet. + AND to find the names of the scanfiles in that wallet - from reading the Tunnel file not from + interrogating the wallet. Note that this means that the list of scanfiles will be as it was when the drawing was created, not as it is now. Perhaps we should not actually do it this way ? Or at least, label the table heading. @@ -166,7 +186,7 @@ def _process_reference(dwgfile, path, parser_label="Tunnel"): """Unified processor to link drawing files to wallets/scans or referenced drawings. - If `path` matches a wallet pattern (rx_wallet), link the wallet and try to find the scan file in the wallet. - - If `path` looks like an image, do nothing (images are not treated as references here). + - If `path` looks like an image, do nothing (images are not treated as references here - yet). - Otherwise, treat `path` as a possible reference to another drawing (by name) and link via `dwgcontains`. """ @@ -215,7 +235,7 @@ def _process_reference(dwgfile, path, parser_label="Tunnel"): print(message) DataIssue.objects.create(parser=parser_label, message=message, url=f"/dwgdataraw/{path}") rdwgfile = rdwgfilel[0] - if hasattr(dwgfile, 'dwgcontains'): + if hasattr(dwgfile, 'dwgcontains'): # implement model change in models/survex.py to use this dwgfile.dwgcontains.add(rdwgfile) return None, None @@ -240,6 +260,45 @@ def findimportinsert(therionfile, imp): pass +def _assign_wallets_for_model(model_obj, wallet_names, parser_label="Tunnel"): + """Assign wallets to `model_obj` by wallet name(s). + + wallet_names may be a single string or an iterable of names. This function + will add any Wallets found via Wallet.objects.filter(walletname__in=...) to + model_obj.dwgwallets and return the list of matched Wallet objects. If none + are found, or an exception occurs, a DataIssue is recorded with parser + set to `parser_label`. + """ + if not wallet_names: + return [] + + # Normalize to list of names + if isinstance(wallet_names, (str, bytes)): + names = [str(wallet_names)] + else: + try: + names = [str(n) for n in wallet_names] + except Exception: + names = [str(wallet_names)] + + try: + wallets = list(Wallet.objects.filter(walletname__in=names)) + if wallets: + for w in wallets: + model_obj.dwgwallets.add(w) + return wallets + # Nothing found: record a DataIssue + message = f" ! wallet(s) '{names}' not found from {getattr(model_obj, 'dwgpath', model_obj)}" + print(message) + DataIssue.objects.create(parser=parser_label, message=message, url=f"/dwgdataraw/{getattr(model_obj, 'dwgpath', '')}") + return [] + except Exception as e: + message = f" ! Exception while looking up wallet(s) '{names}' from {getattr(model_obj, 'dwgpath', model_obj)} -- ({e})" + print(message) + DataIssue.objects.create(parser=parser_label, message=message, url=f"/dwgdataraw/{getattr(model_obj, 'dwgpath', '')}") + return [] + + def settherionfileinfo(filetuple): """Read in the drawing file contents and sets values on the dwgfile object""" thtype, therionfile = filetuple @@ -257,10 +316,8 @@ def settherionfileinfo(filetuple): therionfile.npaths = len(rx_input.findall(ttext)) modified.add("npaths") if wallet_texts := rx_ref.findall(ttext): - # print(f"#ref {therionfile.dwgname} : {wallet_text}") - if wallets := Wallet.objects.filter(walletname__in=wallet_texts): # ! Django idiom not used elsewhere. A filter using a list of strings. - for w in wallets: - therionfile.dwgwallets.add(w) + # Delegate wallet assignment to helper; use parser_label 'Therion' + _assign_wallets_for_model(therionfile, wallet_texts, parser_label="Therion") elif thtype == "th2": therionfile.npaths = len(rx_line.findall(ttext)) modified.add("npaths") @@ -330,15 +387,9 @@ def settnlfileinfo(dwgfile): # # *file_begin "/home/expo/loser/caves-1623/2025-dw-01/trip1.svx" "trip1.svx" | *begin 1 | *export 1 25 | | ; Cave: 2025-dw-01 | ; Area in cave/QM: Entrance series | *title "2025-dw-01" | *date 2025.07.13 | *team "Dylan Wase" notes | *team "Daniel Gorst" dog | *instrument SAP "SAP6 Dylan" | *ref 2025#20 | for refs in rx_pctext.findall(ttext): - try: - wallets = Wallet.objects.filter(walletname=refs) - if wallets: - for w in wallets: - dwgfile.dwgwallets.add(w) - except Exception as e: - message = f" ! wallet not found referenced from {dwgfile} -- '{refs}' ({e}) " - print(message) - DataIssue.objects.create(parser="Tunnel", message=message, url=f"/dwgdataraw/{dwgfile}") + if refs: + # Delegate wallet lookup/assignment to helper for consistent handling + _assign_wallets_for_model(dwgfile, refs, parser_label="Tunnel") # should also scan and look for survex blocks that might have been included, and image scans # which would populate dwgfile.survexfile @@ -365,6 +416,8 @@ def load_drawings_files(): We import JPG, PNG and SVG files; which have already been put on the server, but the upload form intentionally refuses to upload PNG and JPG (though it does allow SVG) """ + # Track elapsed time + start_time = time.perf_counter() all_xml = [] drawdatadir = Path(settings.DRAWINGS_DATA) DrawingFile.objects.all().delete() @@ -416,14 +469,13 @@ def load_drawings_files(): if dwgfile: all_xml.append((ext, dwgfile, p)) - print(f" - {len(all_xml)} Drawings files found") + elapsed = time.perf_counter() - start_time + print(f" - {len(all_xml)} Drawings files found ({elapsed:.2f}s)") # Process in a deterministic order; ensure .th2 are handled before .th ext_priority = {'th2': 0, 'th': 1} all_xml.sort(key=lambda t: ext_priority.get(t[0], 2)) - print(f" - Drawing found, starting parsing...") - # Process files and collect modified scalar fields for bulk update modified_map = {} # {DrawingFile instance: set(fields)} for extension, filename, pathobj in all_xml: @@ -441,7 +493,9 @@ def load_drawings_files(): if modified: modified_map.setdefault(filename, set()).update(modified) - # Bulk update scalar fields grouped by identical field-sets to use bulk_update efficiently + elapsed = time.perf_counter() - start_time + print(f" - Drawings parsed ({elapsed:.2f}s)") + # Bulk update scalar fields grouped by identical field-sets to use bulk_update efficiently from collections import defaultdict groups = defaultdict(list) # {tuple(fields): [instances]} @@ -454,7 +508,8 @@ def load_drawings_files(): # Use a conservative batch size DrawingFile.objects.bulk_update(instances, fields_list, batch_size=500) - print(f" - Drawings parsed and database updated.") + elapsed = time.perf_counter() - start_time + print(f" - Database updated ({elapsed:.2f}s)") # for drawfile in DrawingFile.objects.all(): # SetTunnelfileInfo(drawfile)