2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2025-12-17 17:47:13 +00:00

refactored wallet identification

This commit is contained in:
2025-12-15 20:51:07 +00:00
parent d6b2811457
commit e23cab50ed
2 changed files with 106 additions and 21 deletions

View File

@@ -164,3 +164,33 @@ class DrawingsPathlibTests(TestCase):
# Spot-check a few entries # Spot-check a few entries
self.assertIn('bigdir/file0.txt', mapping) self.assertIn('bigdir/file0.txt', mapping)
self.assertIn(f'bigdir/file{count-1}.txt', mapping) self.assertIn(f'bigdir/file{count-1}.txt', mapping)
def test_assign_wallets_for_model_assigns_and_returns_wallets(self):
w = Wallet.objects.create(fpath='x', walletname='2025#20')
df = DrawingFile.objects.create(dwgpath='assign.th', dwgname='assign')
res = drawings._assign_wallets_for_model(df, '2025#20', parser_label='AssignTest')
self.assertTrue(res)
self.assertIn(w, df.dwgwallets.all())
def test_assign_wallets_for_model_creates_dataissue_on_missing(self):
df = DrawingFile.objects.create(dwgpath='missing.th', dwgname='missing')
drawings._assign_wallets_for_model(df, 'NONEXISTENT', parser_label='AssignMissing')
di = DataIssue.objects.filter(parser='AssignMissing', message__contains='not found')
self.assertTrue(di.exists())
def test_assign_wallets_for_model_records_dataissue_on_exception(self):
# Patch Wallet.objects.filter to raise an exception
from unittest.mock import patch
df = DrawingFile.objects.create(dwgpath='err.th', dwgname='err')
with patch('troggle.core.models.wallets.Wallet.objects.filter') as mock_filter:
mock_filter.side_effect = RuntimeError('boom')
drawings._assign_wallets_for_model(df, 'WHATEVER', parser_label='AssignError')
di = DataIssue.objects.filter(parser='AssignError', message__contains='Exception')
self.assertTrue(di.exists())

View File

@@ -1,6 +1,7 @@
import os import os
import re import re
from pathlib import Path from pathlib import Path
import time
import settings import settings
from troggle.core.models.survex import DrawingFile from troggle.core.models.survex import DrawingFile
@@ -84,15 +85,32 @@ rx_wallet = re.compile(r"""
# from the end of a path string. # from the end of a path string.
# --- Group 1: Directory or Survey Prefix --- # --- Group 1: Directory or Survey Prefix ---
# but current AND HISTORIC wallet namings, as images have been edited over the years
( # Start of Capture Group 1 ( # Start of Capture Group 1
\d{4}\#X?\d+\w? # Matches patterns like "2025#123", "2016#X04" or "1999#45a", NB # must be escaped in VERBOSE mode \d{4}\#X?\d+\w? # Matches patterns like "2025#123", "2016#X04" or "1999#45a", NB # must be escaped in VERBOSE mode
| # OR | # OR
1995-96kh # Matches the literal string "1995-96kh" 1995-96kh # Matches the literal string "1995-96kh"
|
1989LUSS # Matches the literal string "1989LUSS"
| # OR
1989Surveybook # Matches the literal string "1989Surveybook"
| # OR
1990NotKHsurveybook # Matches the literal string "1990NotKHsurveybook"
| # OR
199?kh #
| # OR
199?notkh #
| # OR
199?Surveybookkh # Matches the literal string "92-94Surveybookkh"
| # OR
1992-94NotKHSurveybook # Matches the literal string "92-94Surveybookkh"
| # OR | # OR
92-94Surveybookkh # Matches the literal string "92-94Surveybookkh" 92-94Surveybookkh # Matches the literal string "92-94Surveybookkh"
| # OR | # OR
1991surveybook # Matches the literal string "1991surveybook" 1991surveybook # Matches the literal string "1991surveybook"
| # OR | # OR
1991surveybook # Matches the literal string "1991surveybook"
| # OR
smkhs # Matches the literal string "smkhs" smkhs # Matches the literal string "smkhs"
) # End of Capture Group 1 ) # End of Capture Group 1
@@ -108,6 +126,7 @@ rx_wallet = re.compile(r"""
$ # Anchor, ensuring the match is at the end of the string $ # Anchor, ensuring the match is at the end of the string
""", re.VERBOSE | re.IGNORECASE) """, re.VERBOSE | re.IGNORECASE)
def _set_filesize_and_check(fullpath, model_obj, parser_label, url_prefix="/dwgdataraw"): def _set_filesize_and_check(fullpath, model_obj, parser_label, url_prefix="/dwgdataraw"):
"""Set model_obj.filesize from filesystem and create DataIssue if missing/zero. """Set model_obj.filesize from filesystem and create DataIssue if missing/zero.
Returns True if file exists and has size > 0, False otherwise. Returns True if file exists and has size > 0, False otherwise.
@@ -151,7 +170,8 @@ def parse_tnl_file(dwgfile, path):
which we have already seen when we imported all the files we could find in the surveyscans direstories. which we have already seen when we imported all the files we could find in the surveyscans direstories.
The purpose is to find cross-references between Tunnel drawing files and wallets The purpose is to find cross-references between Tunnel drawing files and wallets
AND to find the names of the scanfiles in that wallet - from reading the Tunnel file not from interrogating the wallet. AND to find the names of the scanfiles in that wallet - from reading the Tunnel file not from
interrogating the wallet.
Note that this means that the list of scanfiles will be as it was when the drawing was created, not as it is now. Perhaps Note that this means that the list of scanfiles will be as it was when the drawing was created, not as it is now. Perhaps
we should not actually do it this way ? Or at least, label the table heading. we should not actually do it this way ? Or at least, label the table heading.
@@ -166,7 +186,7 @@ def _process_reference(dwgfile, path, parser_label="Tunnel"):
"""Unified processor to link drawing files to wallets/scans or referenced drawings. """Unified processor to link drawing files to wallets/scans or referenced drawings.
- If `path` matches a wallet pattern (rx_wallet), link the wallet and try to find the scan file in the wallet. - If `path` matches a wallet pattern (rx_wallet), link the wallet and try to find the scan file in the wallet.
- If `path` looks like an image, do nothing (images are not treated as references here). - If `path` looks like an image, do nothing (images are not treated as references here - yet).
- Otherwise, treat `path` as a possible reference to another drawing (by name) and link via `dwgcontains`. - Otherwise, treat `path` as a possible reference to another drawing (by name) and link via `dwgcontains`.
""" """
@@ -215,7 +235,7 @@ def _process_reference(dwgfile, path, parser_label="Tunnel"):
print(message) print(message)
DataIssue.objects.create(parser=parser_label, message=message, url=f"/dwgdataraw/{path}") DataIssue.objects.create(parser=parser_label, message=message, url=f"/dwgdataraw/{path}")
rdwgfile = rdwgfilel[0] rdwgfile = rdwgfilel[0]
if hasattr(dwgfile, 'dwgcontains'): if hasattr(dwgfile, 'dwgcontains'): # implement model change in models/survex.py to use this
dwgfile.dwgcontains.add(rdwgfile) dwgfile.dwgcontains.add(rdwgfile)
return None, None return None, None
@@ -240,6 +260,45 @@ def findimportinsert(therionfile, imp):
pass pass
def _assign_wallets_for_model(model_obj, wallet_names, parser_label="Tunnel"):
"""Assign wallets to `model_obj` by wallet name(s).
wallet_names may be a single string or an iterable of names. This function
will add any Wallets found via Wallet.objects.filter(walletname__in=...) to
model_obj.dwgwallets and return the list of matched Wallet objects. If none
are found, or an exception occurs, a DataIssue is recorded with parser
set to `parser_label`.
"""
if not wallet_names:
return []
# Normalize to list of names
if isinstance(wallet_names, (str, bytes)):
names = [str(wallet_names)]
else:
try:
names = [str(n) for n in wallet_names]
except Exception:
names = [str(wallet_names)]
try:
wallets = list(Wallet.objects.filter(walletname__in=names))
if wallets:
for w in wallets:
model_obj.dwgwallets.add(w)
return wallets
# Nothing found: record a DataIssue
message = f" ! wallet(s) '{names}' not found from {getattr(model_obj, 'dwgpath', model_obj)}"
print(message)
DataIssue.objects.create(parser=parser_label, message=message, url=f"/dwgdataraw/{getattr(model_obj, 'dwgpath', '')}")
return []
except Exception as e:
message = f" ! Exception while looking up wallet(s) '{names}' from {getattr(model_obj, 'dwgpath', model_obj)} -- ({e})"
print(message)
DataIssue.objects.create(parser=parser_label, message=message, url=f"/dwgdataraw/{getattr(model_obj, 'dwgpath', '')}")
return []
def settherionfileinfo(filetuple): def settherionfileinfo(filetuple):
"""Read in the drawing file contents and sets values on the dwgfile object""" """Read in the drawing file contents and sets values on the dwgfile object"""
thtype, therionfile = filetuple thtype, therionfile = filetuple
@@ -257,10 +316,8 @@ def settherionfileinfo(filetuple):
therionfile.npaths = len(rx_input.findall(ttext)) therionfile.npaths = len(rx_input.findall(ttext))
modified.add("npaths") modified.add("npaths")
if wallet_texts := rx_ref.findall(ttext): if wallet_texts := rx_ref.findall(ttext):
# print(f"#ref {therionfile.dwgname} : {wallet_text}") # Delegate wallet assignment to helper; use parser_label 'Therion'
if wallets := Wallet.objects.filter(walletname__in=wallet_texts): # ! Django idiom not used elsewhere. A filter using a list of strings. _assign_wallets_for_model(therionfile, wallet_texts, parser_label="Therion")
for w in wallets:
therionfile.dwgwallets.add(w)
elif thtype == "th2": elif thtype == "th2":
therionfile.npaths = len(rx_line.findall(ttext)) therionfile.npaths = len(rx_line.findall(ttext))
modified.add("npaths") modified.add("npaths")
@@ -330,15 +387,9 @@ def settnlfileinfo(dwgfile):
# <pathcodes> # <pathcodes>
# <pctext style="survey" nodeposxrel="-1.0" nodeposyrel="-1.0"> *file_begin "/home/expo/loser/caves-1623/2025-dw-01/trip1.svx" "trip1.svx" | *begin 1 | *export 1 25 | | ; Cave: 2025-dw-01 | ; Area in cave/QM: Entrance series | *title "2025-dw-01" | *date 2025.07.13 | *team "Dylan Wase" notes | *team "Daniel Gorst" dog | *instrument SAP "SAP6 Dylan" | *ref 2025#20 | # <pctext style="survey" nodeposxrel="-1.0" nodeposyrel="-1.0"> *file_begin "/home/expo/loser/caves-1623/2025-dw-01/trip1.svx" "trip1.svx" | *begin 1 | *export 1 25 | | ; Cave: 2025-dw-01 | ; Area in cave/QM: Entrance series | *title "2025-dw-01" | *date 2025.07.13 | *team "Dylan Wase" notes | *team "Daniel Gorst" dog | *instrument SAP "SAP6 Dylan" | *ref 2025#20 |
for refs in rx_pctext.findall(ttext): for refs in rx_pctext.findall(ttext):
try: if refs:
wallets = Wallet.objects.filter(walletname=refs) # Delegate wallet lookup/assignment to helper for consistent handling
if wallets: _assign_wallets_for_model(dwgfile, refs, parser_label="Tunnel")
for w in wallets:
dwgfile.dwgwallets.add(w)
except Exception as e:
message = f" ! wallet not found referenced from {dwgfile} -- '{refs}' ({e}) "
print(message)
DataIssue.objects.create(parser="Tunnel", message=message, url=f"/dwgdataraw/{dwgfile}")
# should also scan and look for survex blocks that might have been included, and image scans # should also scan and look for survex blocks that might have been included, and image scans
# which would populate dwgfile.survexfile # which would populate dwgfile.survexfile
@@ -365,6 +416,8 @@ def load_drawings_files():
We import JPG, PNG and SVG files; which have already been put on the server, We import JPG, PNG and SVG files; which have already been put on the server,
but the upload form intentionally refuses to upload PNG and JPG (though it does allow SVG) but the upload form intentionally refuses to upload PNG and JPG (though it does allow SVG)
""" """
# Track elapsed time
start_time = time.perf_counter()
all_xml = [] all_xml = []
drawdatadir = Path(settings.DRAWINGS_DATA) drawdatadir = Path(settings.DRAWINGS_DATA)
DrawingFile.objects.all().delete() DrawingFile.objects.all().delete()
@@ -416,14 +469,13 @@ def load_drawings_files():
if dwgfile: if dwgfile:
all_xml.append((ext, dwgfile, p)) all_xml.append((ext, dwgfile, p))
print(f" - {len(all_xml)} Drawings files found") elapsed = time.perf_counter() - start_time
print(f" - {len(all_xml)} Drawings files found ({elapsed:.2f}s)")
# Process in a deterministic order; ensure .th2 are handled before .th # Process in a deterministic order; ensure .th2 are handled before .th
ext_priority = {'th2': 0, 'th': 1} ext_priority = {'th2': 0, 'th': 1}
all_xml.sort(key=lambda t: ext_priority.get(t[0], 2)) all_xml.sort(key=lambda t: ext_priority.get(t[0], 2))
print(f" - Drawing found, starting parsing...")
# Process files and collect modified scalar fields for bulk update # Process files and collect modified scalar fields for bulk update
modified_map = {} # {DrawingFile instance: set(fields)} modified_map = {} # {DrawingFile instance: set(fields)}
for extension, filename, pathobj in all_xml: for extension, filename, pathobj in all_xml:
@@ -441,7 +493,9 @@ def load_drawings_files():
if modified: if modified:
modified_map.setdefault(filename, set()).update(modified) modified_map.setdefault(filename, set()).update(modified)
# Bulk update scalar fields grouped by identical field-sets to use bulk_update efficiently elapsed = time.perf_counter() - start_time
print(f" - Drawings parsed ({elapsed:.2f}s)")
# Bulk update scalar fields grouped by identical field-sets to use bulk_update efficiently
from collections import defaultdict from collections import defaultdict
groups = defaultdict(list) # {tuple(fields): [instances]} groups = defaultdict(list) # {tuple(fields): [instances]}
@@ -454,7 +508,8 @@ def load_drawings_files():
# Use a conservative batch size # Use a conservative batch size
DrawingFile.objects.bulk_update(instances, fields_list, batch_size=500) DrawingFile.objects.bulk_update(instances, fields_list, batch_size=500)
print(f" - Drawings parsed and database updated.") elapsed = time.perf_counter() - start_time
print(f" - Database updated ({elapsed:.2f}s)")
# for drawfile in DrawingFile.objects.all(): # for drawfile in DrawingFile.objects.all():
# SetTunnelfileInfo(drawfile) # SetTunnelfileInfo(drawfile)