refactored wallet identification

2025-12-17 17:47:13 +00:00 · 2025-12-15 20:51:07 +00:00
parent d6b2811457
commit e23cab50ed
2 changed files with 106 additions and 21 deletions
--- a/core/TESTS/test_drawings.py
+++ b/core/TESTS/test_drawings.py
@@ -164,3 +164,33 @@ class DrawingsPathlibTests(TestCase):
        # Spot-check a few entries
        self.assertIn('bigdir/file0.txt', mapping)
        self.assertIn(f'bigdir/file{count-1}.txt', mapping)
    def test_assign_wallets_for_model_assigns_and_returns_wallets(self):
        w = Wallet.objects.create(fpath='x', walletname='2025#20')
        df = DrawingFile.objects.create(dwgpath='assign.th', dwgname='assign')
        res = drawings._assign_wallets_for_model(df, '2025#20', parser_label='AssignTest')
        self.assertTrue(res)
        self.assertIn(w, df.dwgwallets.all())
    def test_assign_wallets_for_model_creates_dataissue_on_missing(self):
        df = DrawingFile.objects.create(dwgpath='missing.th', dwgname='missing')
        drawings._assign_wallets_for_model(df, 'NONEXISTENT', parser_label='AssignMissing')
        di = DataIssue.objects.filter(parser='AssignMissing', message__contains='not found')
        self.assertTrue(di.exists())
    def test_assign_wallets_for_model_records_dataissue_on_exception(self):
        # Patch Wallet.objects.filter to raise an exception
        from unittest.mock import patch
        df = DrawingFile.objects.create(dwgpath='err.th', dwgname='err')
        with patch('troggle.core.models.wallets.Wallet.objects.filter') as mock_filter:
            mock_filter.side_effect = RuntimeError('boom')
            drawings._assign_wallets_for_model(df, 'WHATEVER', parser_label='AssignError')
        di = DataIssue.objects.filter(parser='AssignError', message__contains='Exception')
        self.assertTrue(di.exists())
--- a/parsers/drawings.py
+++ b/parsers/drawings.py
@@ -1,6 +1,7 @@
 import os
 import re
 from pathlib import Path
 import time
 import settings
 from troggle.core.models.survex import DrawingFile
@@ -84,15 +85,32 @@ rx_wallet = re.compile(r"""
    # from the end of a path string.
    # --- Group 1: Directory or Survey Prefix ---
    # but current AND HISTORIC wallet namings, as images have been edited over the years
    (                      # Start of Capture Group 1
        \d{4}\#X?\d+\w?    #  Matches patterns like "2025#123", "2016#X04" or "1999#45a", NB # must be escaped in VERBOSE mode
        |                  # OR
        1995-96kh          #  Matches the literal string "1995-96kh"
        |
        1989LUSS           #  Matches the literal string "1989LUSS"
        |                  # OR
        1989Surveybook     #  Matches the literal string "1989Surveybook"
        |                  # OR
        1990NotKHsurveybook      #  Matches the literal string "1990NotKHsurveybook"
        |                  # OR
        199?kh             #  
        |                  # OR
        199?notkh          #  
        |                  # OR
        199?Surveybookkh  #  Matches the literal string "92-94Surveybookkh"
        |                  # OR
        1992-94NotKHSurveybook  #  Matches the literal string "92-94Surveybookkh"
        |                  # OR
        92-94Surveybookkh  #  Matches the literal string "92-94Surveybookkh"
        |                  # OR
        1991surveybook     #  Matches the literal string "1991surveybook"
        |                  # OR
        1991surveybook     #  Matches the literal string "1991surveybook"
        |                  # OR
        smkhs              #  Matches the literal string "smkhs"
    )                      # End of Capture Group 1
@@ -108,6 +126,7 @@ rx_wallet = re.compile(r"""
    $                      # Anchor, ensuring the match is at the end of the string
    """, re.VERBOSE | re.IGNORECASE)
 def _set_filesize_and_check(fullpath, model_obj, parser_label, url_prefix="/dwgdataraw"):
    """Set model_obj.filesize from filesystem and create DataIssue if missing/zero.
    Returns True if file exists and has size > 0, False otherwise.
@@ -151,7 +170,8 @@ def parse_tnl_file(dwgfile, path):
    which we have already seen when we imported all the files we could find in the surveyscans direstories.
    The purpose is to find cross-references between Tunnel drawing files and wallets
-    AND to find the names of the scanfiles in that wallet - from reading the Tunnel file not from interrogating the wallet.
+    AND to find the names of the scanfiles in that wallet - from reading the Tunnel file not from 
    interrogating the wallet.
    Note that this means that the list of scanfiles will be as it was when the drawing was created, not as it is now. Perhaps
    we should not actually do it this way ? Or at least, label the table heading.
@@ -166,7 +186,7 @@ def _process_reference(dwgfile, path, parser_label="Tunnel"):
    """Unified processor to link drawing files to wallets/scans or referenced drawings.
    - If `path` matches a wallet pattern (rx_wallet), link the wallet and try to find the scan file in the wallet.
-    - If `path` looks like an image, do nothing (images are not treated as references here).
+    - If `path` looks like an image, do nothing (images are not treated as references here - yet).
    - Otherwise, treat `path` as a possible reference to another drawing (by name) and link via `dwgcontains`.
    """
@@ -215,7 +235,7 @@ def _process_reference(dwgfile, path, parser_label="Tunnel"):
            print(message)
            DataIssue.objects.create(parser=parser_label, message=message, url=f"/dwgdataraw/{path}")
        rdwgfile = rdwgfilel[0]
-        if hasattr(dwgfile, 'dwgcontains'):
+        if hasattr(dwgfile, 'dwgcontains'): # implement model change in models/survex.py to use this
            dwgfile.dwgcontains.add(rdwgfile)
    return None, None
@@ -240,6 +260,45 @@ def findimportinsert(therionfile, imp):
    pass
 def _assign_wallets_for_model(model_obj, wallet_names, parser_label="Tunnel"):
    """Assign wallets to `model_obj` by wallet name(s).
    wallet_names may be a single string or an iterable of names. This function
    will add any Wallets found via Wallet.objects.filter(walletname__in=...) to
    model_obj.dwgwallets and return the list of matched Wallet objects. If none
    are found, or an exception occurs, a DataIssue is recorded with parser
    set to `parser_label`.
    """
    if not wallet_names:
        return []
    # Normalize to list of names
    if isinstance(wallet_names, (str, bytes)):
        names = [str(wallet_names)]
    else:
        try:
            names = [str(n) for n in wallet_names]
        except Exception:
            names = [str(wallet_names)]
    try:
        wallets = list(Wallet.objects.filter(walletname__in=names))
        if wallets:
            for w in wallets:
                model_obj.dwgwallets.add(w)
            return wallets
        # Nothing found: record a DataIssue
        message = f"  ! wallet(s) '{names}' not found from {getattr(model_obj, 'dwgpath', model_obj)}"
        print(message)
        DataIssue.objects.create(parser=parser_label, message=message, url=f"/dwgdataraw/{getattr(model_obj, 'dwgpath', '')}")
        return []
    except Exception as e:
        message = f"  ! Exception while looking up wallet(s) '{names}' from {getattr(model_obj, 'dwgpath', model_obj)} -- ({e})"
        print(message)
        DataIssue.objects.create(parser=parser_label, message=message, url=f"/dwgdataraw/{getattr(model_obj, 'dwgpath', '')}")
        return []
 def settherionfileinfo(filetuple):
    """Read in the drawing file contents and sets values on the dwgfile object"""
    thtype, therionfile = filetuple
@@ -257,10 +316,8 @@ def settherionfileinfo(filetuple):
        therionfile.npaths = len(rx_input.findall(ttext))
        modified.add("npaths")
        if wallet_texts := rx_ref.findall(ttext):
-            # print(f"#ref {therionfile.dwgname} : {wallet_text}")
+            # Delegate wallet assignment to helper; use parser_label 'Therion'
-            if wallets := Wallet.objects.filter(walletname__in=wallet_texts): # ! Django idiom not used elsewhere. A filter using a list of strings.
+            _assign_wallets_for_model(therionfile, wallet_texts, parser_label="Therion")
                for w in wallets:
                    therionfile.dwgwallets.add(w)                
    elif thtype == "th2":
        therionfile.npaths = len(rx_line.findall(ttext))
        modified.add("npaths")
@@ -330,15 +387,9 @@ def settnlfileinfo(dwgfile):
    # <pathcodes>
    # <pctext style="survey" nodeposxrel="-1.0" nodeposyrel="-1.0"> *file_begin "/home/expo/loser/caves-1623/2025-dw-01/trip1.svx" "trip1.svx" | *begin 1 | *export 1 25 | | ; Cave: 2025-dw-01 | ; Area in cave/QM: Entrance series | *title "2025-dw-01" | *date 2025.07.13 | *team "Dylan Wase" notes | *team "Daniel Gorst" dog | *instrument SAP "SAP6 Dylan" | *ref 2025#20 | 
    for refs in rx_pctext.findall(ttext):
-        try:
+        if refs:
-            wallets = Wallet.objects.filter(walletname=refs)
+            # Delegate wallet lookup/assignment to helper for consistent handling
-            if wallets:
+            _assign_wallets_for_model(dwgfile, refs, parser_label="Tunnel")
                for w in wallets:
                    dwgfile.dwgwallets.add(w)
        except Exception as e:
            message =  f"  ! wallet not found referenced from {dwgfile} -- '{refs}' ({e}) "
            print(message)
            DataIssue.objects.create(parser="Tunnel", message=message, url=f"/dwgdataraw/{dwgfile}")
    # should also scan and look for survex blocks that might have been included, and image scans
    # which would populate dwgfile.survexfile
@@ -365,6 +416,8 @@ def load_drawings_files():
    We import JPG, PNG and SVG files; which have already been put on the server,
    but the upload form intentionally refuses to upload PNG and JPG (though it does allow SVG)
    """
    # Track elapsed time
    start_time = time.perf_counter()
    all_xml = []
    drawdatadir = Path(settings.DRAWINGS_DATA)
    DrawingFile.objects.all().delete()
@@ -416,14 +469,13 @@ def load_drawings_files():
            if dwgfile:
                all_xml.append((ext, dwgfile, p))
-    print(f" - {len(all_xml)} Drawings files found")
+    elapsed = time.perf_counter() - start_time
    print(f" - {len(all_xml)} Drawings files found ({elapsed:.2f}s)")
    # Process in a deterministic order; ensure .th2 are handled before .th
    ext_priority = {'th2': 0, 'th': 1}
    all_xml.sort(key=lambda t: ext_priority.get(t[0], 2))
    print(f" - Drawing found, starting parsing...")
    # Process files and collect modified scalar fields for bulk update
    modified_map = {}  # {DrawingFile instance: set(fields)}
    for extension, filename, pathobj in all_xml:
@@ -441,7 +493,9 @@ def load_drawings_files():
        if modified:
            modified_map.setdefault(filename, set()).update(modified)
-    # Bulk update scalar fields grouped by identical field-sets to use bulk_update efficiently
+    elapsed = time.perf_counter() - start_time
    print(f" - Drawings parsed ({elapsed:.2f}s)")
   # Bulk update scalar fields grouped by identical field-sets to use bulk_update efficiently
    from collections import defaultdict
    groups = defaultdict(list)  # {tuple(fields): [instances]}
@@ -454,7 +508,8 @@ def load_drawings_files():
        # Use a conservative batch size
        DrawingFile.objects.bulk_update(instances, fields_list, batch_size=500)
-    print(f" - Drawings parsed and database updated.")
+    elapsed = time.perf_counter() - start_time
    print(f" - Database updated ({elapsed:.2f}s)")
    # for drawfile in DrawingFile.objects.all():
    # SetTunnelfileInfo(drawfile)