2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2025-12-17 17:47:13 +00:00

refactored wallet identification

This commit is contained in:
2025-12-15 20:51:07 +00:00
parent d6b2811457
commit e23cab50ed
2 changed files with 106 additions and 21 deletions

View File

@@ -164,3 +164,33 @@ class DrawingsPathlibTests(TestCase):
# Spot-check a few entries
self.assertIn('bigdir/file0.txt', mapping)
self.assertIn(f'bigdir/file{count-1}.txt', mapping)
def test_assign_wallets_for_model_assigns_and_returns_wallets(self):
w = Wallet.objects.create(fpath='x', walletname='2025#20')
df = DrawingFile.objects.create(dwgpath='assign.th', dwgname='assign')
res = drawings._assign_wallets_for_model(df, '2025#20', parser_label='AssignTest')
self.assertTrue(res)
self.assertIn(w, df.dwgwallets.all())
def test_assign_wallets_for_model_creates_dataissue_on_missing(self):
df = DrawingFile.objects.create(dwgpath='missing.th', dwgname='missing')
drawings._assign_wallets_for_model(df, 'NONEXISTENT', parser_label='AssignMissing')
di = DataIssue.objects.filter(parser='AssignMissing', message__contains='not found')
self.assertTrue(di.exists())
def test_assign_wallets_for_model_records_dataissue_on_exception(self):
# Patch Wallet.objects.filter to raise an exception
from unittest.mock import patch
df = DrawingFile.objects.create(dwgpath='err.th', dwgname='err')
with patch('troggle.core.models.wallets.Wallet.objects.filter') as mock_filter:
mock_filter.side_effect = RuntimeError('boom')
drawings._assign_wallets_for_model(df, 'WHATEVER', parser_label='AssignError')
di = DataIssue.objects.filter(parser='AssignError', message__contains='Exception')
self.assertTrue(di.exists())

View File

@@ -1,6 +1,7 @@
import os
import re
from pathlib import Path
import time
import settings
from troggle.core.models.survex import DrawingFile
@@ -84,15 +85,32 @@ rx_wallet = re.compile(r"""
# from the end of a path string.
# --- Group 1: Directory or Survey Prefix ---
# but current AND HISTORIC wallet namings, as images have been edited over the years
( # Start of Capture Group 1
\d{4}\#X?\d+\w? # Matches patterns like "2025#123", "2016#X04" or "1999#45a", NB # must be escaped in VERBOSE mode
| # OR
1995-96kh # Matches the literal string "1995-96kh"
|
1989LUSS # Matches the literal string "1989LUSS"
| # OR
1989Surveybook # Matches the literal string "1989Surveybook"
| # OR
1990NotKHsurveybook # Matches the literal string "1990NotKHsurveybook"
| # OR
199?kh #
| # OR
199?notkh #
| # OR
199?Surveybookkh # Matches the literal string "92-94Surveybookkh"
| # OR
1992-94NotKHSurveybook # Matches the literal string "92-94Surveybookkh"
| # OR
92-94Surveybookkh # Matches the literal string "92-94Surveybookkh"
| # OR
1991surveybook # Matches the literal string "1991surveybook"
| # OR
1991surveybook # Matches the literal string "1991surveybook"
| # OR
smkhs # Matches the literal string "smkhs"
) # End of Capture Group 1
@@ -108,6 +126,7 @@ rx_wallet = re.compile(r"""
$ # Anchor, ensuring the match is at the end of the string
""", re.VERBOSE | re.IGNORECASE)
def _set_filesize_and_check(fullpath, model_obj, parser_label, url_prefix="/dwgdataraw"):
"""Set model_obj.filesize from filesystem and create DataIssue if missing/zero.
Returns True if file exists and has size > 0, False otherwise.
@@ -151,7 +170,8 @@ def parse_tnl_file(dwgfile, path):
which we have already seen when we imported all the files we could find in the surveyscans direstories.
The purpose is to find cross-references between Tunnel drawing files and wallets
AND to find the names of the scanfiles in that wallet - from reading the Tunnel file not from interrogating the wallet.
AND to find the names of the scanfiles in that wallet - from reading the Tunnel file not from
interrogating the wallet.
Note that this means that the list of scanfiles will be as it was when the drawing was created, not as it is now. Perhaps
we should not actually do it this way ? Or at least, label the table heading.
@@ -166,7 +186,7 @@ def _process_reference(dwgfile, path, parser_label="Tunnel"):
"""Unified processor to link drawing files to wallets/scans or referenced drawings.
- If `path` matches a wallet pattern (rx_wallet), link the wallet and try to find the scan file in the wallet.
- If `path` looks like an image, do nothing (images are not treated as references here).
- If `path` looks like an image, do nothing (images are not treated as references here - yet).
- Otherwise, treat `path` as a possible reference to another drawing (by name) and link via `dwgcontains`.
"""
@@ -215,7 +235,7 @@ def _process_reference(dwgfile, path, parser_label="Tunnel"):
print(message)
DataIssue.objects.create(parser=parser_label, message=message, url=f"/dwgdataraw/{path}")
rdwgfile = rdwgfilel[0]
if hasattr(dwgfile, 'dwgcontains'):
if hasattr(dwgfile, 'dwgcontains'): # implement model change in models/survex.py to use this
dwgfile.dwgcontains.add(rdwgfile)
return None, None
@@ -240,6 +260,45 @@ def findimportinsert(therionfile, imp):
pass
def _assign_wallets_for_model(model_obj, wallet_names, parser_label="Tunnel"):
"""Assign wallets to `model_obj` by wallet name(s).
wallet_names may be a single string or an iterable of names. This function
will add any Wallets found via Wallet.objects.filter(walletname__in=...) to
model_obj.dwgwallets and return the list of matched Wallet objects. If none
are found, or an exception occurs, a DataIssue is recorded with parser
set to `parser_label`.
"""
if not wallet_names:
return []
# Normalize to list of names
if isinstance(wallet_names, (str, bytes)):
names = [str(wallet_names)]
else:
try:
names = [str(n) for n in wallet_names]
except Exception:
names = [str(wallet_names)]
try:
wallets = list(Wallet.objects.filter(walletname__in=names))
if wallets:
for w in wallets:
model_obj.dwgwallets.add(w)
return wallets
# Nothing found: record a DataIssue
message = f" ! wallet(s) '{names}' not found from {getattr(model_obj, 'dwgpath', model_obj)}"
print(message)
DataIssue.objects.create(parser=parser_label, message=message, url=f"/dwgdataraw/{getattr(model_obj, 'dwgpath', '')}")
return []
except Exception as e:
message = f" ! Exception while looking up wallet(s) '{names}' from {getattr(model_obj, 'dwgpath', model_obj)} -- ({e})"
print(message)
DataIssue.objects.create(parser=parser_label, message=message, url=f"/dwgdataraw/{getattr(model_obj, 'dwgpath', '')}")
return []
def settherionfileinfo(filetuple):
"""Read in the drawing file contents and sets values on the dwgfile object"""
thtype, therionfile = filetuple
@@ -257,10 +316,8 @@ def settherionfileinfo(filetuple):
therionfile.npaths = len(rx_input.findall(ttext))
modified.add("npaths")
if wallet_texts := rx_ref.findall(ttext):
# print(f"#ref {therionfile.dwgname} : {wallet_text}")
if wallets := Wallet.objects.filter(walletname__in=wallet_texts): # ! Django idiom not used elsewhere. A filter using a list of strings.
for w in wallets:
therionfile.dwgwallets.add(w)
# Delegate wallet assignment to helper; use parser_label 'Therion'
_assign_wallets_for_model(therionfile, wallet_texts, parser_label="Therion")
elif thtype == "th2":
therionfile.npaths = len(rx_line.findall(ttext))
modified.add("npaths")
@@ -330,15 +387,9 @@ def settnlfileinfo(dwgfile):
# <pathcodes>
# <pctext style="survey" nodeposxrel="-1.0" nodeposyrel="-1.0"> *file_begin "/home/expo/loser/caves-1623/2025-dw-01/trip1.svx" "trip1.svx" | *begin 1 | *export 1 25 | | ; Cave: 2025-dw-01 | ; Area in cave/QM: Entrance series | *title "2025-dw-01" | *date 2025.07.13 | *team "Dylan Wase" notes | *team "Daniel Gorst" dog | *instrument SAP "SAP6 Dylan" | *ref 2025#20 |
for refs in rx_pctext.findall(ttext):
try:
wallets = Wallet.objects.filter(walletname=refs)
if wallets:
for w in wallets:
dwgfile.dwgwallets.add(w)
except Exception as e:
message = f" ! wallet not found referenced from {dwgfile} -- '{refs}' ({e}) "
print(message)
DataIssue.objects.create(parser="Tunnel", message=message, url=f"/dwgdataraw/{dwgfile}")
if refs:
# Delegate wallet lookup/assignment to helper for consistent handling
_assign_wallets_for_model(dwgfile, refs, parser_label="Tunnel")
# should also scan and look for survex blocks that might have been included, and image scans
# which would populate dwgfile.survexfile
@@ -365,6 +416,8 @@ def load_drawings_files():
We import JPG, PNG and SVG files; which have already been put on the server,
but the upload form intentionally refuses to upload PNG and JPG (though it does allow SVG)
"""
# Track elapsed time
start_time = time.perf_counter()
all_xml = []
drawdatadir = Path(settings.DRAWINGS_DATA)
DrawingFile.objects.all().delete()
@@ -416,14 +469,13 @@ def load_drawings_files():
if dwgfile:
all_xml.append((ext, dwgfile, p))
print(f" - {len(all_xml)} Drawings files found")
elapsed = time.perf_counter() - start_time
print(f" - {len(all_xml)} Drawings files found ({elapsed:.2f}s)")
# Process in a deterministic order; ensure .th2 are handled before .th
ext_priority = {'th2': 0, 'th': 1}
all_xml.sort(key=lambda t: ext_priority.get(t[0], 2))
print(f" - Drawing found, starting parsing...")
# Process files and collect modified scalar fields for bulk update
modified_map = {} # {DrawingFile instance: set(fields)}
for extension, filename, pathobj in all_xml:
@@ -441,7 +493,9 @@ def load_drawings_files():
if modified:
modified_map.setdefault(filename, set()).update(modified)
# Bulk update scalar fields grouped by identical field-sets to use bulk_update efficiently
elapsed = time.perf_counter() - start_time
print(f" - Drawings parsed ({elapsed:.2f}s)")
# Bulk update scalar fields grouped by identical field-sets to use bulk_update efficiently
from collections import defaultdict
groups = defaultdict(list) # {tuple(fields): [instances]}
@@ -454,7 +508,8 @@ def load_drawings_files():
# Use a conservative batch size
DrawingFile.objects.bulk_update(instances, fields_list, batch_size=500)
print(f" - Drawings parsed and database updated.")
elapsed = time.perf_counter() - start_time
print(f" - Database updated ({elapsed:.2f}s)")
# for drawfile in DrawingFile.objects.all():
# SetTunnelfileInfo(drawfile)