2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2025-12-19 10:57:20 +00:00

finds scans in renamed electronic wallets

This commit is contained in:
2025-12-16 00:54:58 +00:00
parent 39be4a2886
commit 4d52cdbf18

View File

@@ -4,7 +4,7 @@ from pathlib import Path
import time
import settings
from troggle.core.models.survex import DrawingFile
from troggle.core.models.survex import DrawingFile, SingleScan
from troggle.core.models.troggle import DataIssue
from troggle.core.models.wallets import Wallet
@@ -79,6 +79,16 @@ def fetch_drawingfiles_by_paths(paths, chunk_size: int = 500):
return mapping
old_wallets = { # the wallet folders were rearranged into separate years. But the drawing files already existsed with the old names.
"1995-96kh": ["1995kh", "1996kh"],
"92-94NotKHSurveybook": [], # not rearranged yet
"92-94Surveybookkh": ["1992Surveybookkh", "1993Surveybookkh", "1994Surveybookkh"],
"92-94NotKHSurveybook": ["1992-94NotKHSurveybook"], # pending rearrangement
"smkhs": [settings.EXPOFILES / "surveys" / "smkhs"],
}
rx_x_wallet = re.compile(r"(\d{4}\#X\d+)", re.VERBOSE | re.IGNORECASE)
rx_wallet = re.compile(r"""
# This regex is designed to extract a specific directory prefix (walletname) and a filename
# from the end of a path string.
@@ -88,7 +98,7 @@ rx_wallet = re.compile(r"""
( # Start of Capture Group 1
\d{4}\#X?\d+\w? # Matches patterns like "2025#123", "2016#X04" or "1999#45a", NB # must be escaped in VERBOSE mode
| # OR
1995-96kh # Matches the literal string "1995-96kh"
1995-96kh # Matches the literal string "1995-96kh" - OLD
|
1989LUSS # Matches the literal string "1989LUSS"
| # OR
@@ -96,21 +106,21 @@ rx_wallet = re.compile(r"""
| # OR
1990NotKHsurveybook # Matches the literal string "1990NotKHsurveybook"
| # OR
199?kh #
199.kh #
| # OR
199?notkh #
199.notkh #
| # OR
199?Surveybookkh # Matches the literal string "92-94Surveybookkh"
199.Surveybookkh # Matches the literal string "92-94Surveybookkh"
| # OR
1992-94NotKHSurveybook # Matches the literal string "92-94Surveybookkh"
92-94NotKHSurveybook # Matches the literal string "92-94Surveybookkh" - OLD
| # OR
92-94Surveybookkh # Matches the literal string "92-94Surveybookkh"
1992-94NotKHSurveybook # Matches the literal string "92-94Surveybookkh" - not OLD yet
| # OR
1991surveybook # Matches the literal string "1991surveybook"
92-94Surveybookkh # Matches the literal string "92-94Surveybookkh" - OLD
| # OR
1991surveybook # Matches the literal string "1991surveybook"
19..surveybook #
| # OR
smkhs # This is now expofiles/surveys/smkhs/ not in surveyscans/ at all.
smkhs # This is now expofiles/surveys/smkhs/ not in surveyscans/ at all. - OLD
) # End of Capture Group 1
/ # A literal forward slash separating the parts
@@ -180,10 +190,47 @@ def parse_tnl_file(dwgfile, path):
# Delegate to the unified reference processor for consistent behaviour
_process_reference(dwgfile, path, parser_label="Tunnel")
def _handle_obsolete_wallets(old_wallet, dwgfile, path, parser_label):
message = f"- Warning {old_wallet} not a currently valid wallet name. In {path}"
print(message)
DataIssue.objects.update_or_create(parser=parser_label, message=message, url=f"/dwgdataraw/{path}")
def _find_renamed_x_wallet(old_wallet, scanfilename):
"""In past years we had a different naming convention for wallets containing electronic files
but now these have been renamed simply as 20nn#nn without the X.
"""
valid_wallet = False
# this find all the scanfiles which have a wallet where the walletyear.year == old_wallet[:4]
# It is in reverse order as the electronic wallets were always added to the end of the wallet numbers
scans = SingleScan.objects.filter(wallet__walletyear__year=old_wallet[:4]).order_by('-wallet__walletname')
for ss in scans:
if ss.name == scanfilename:
# print(f"FOUND {old_wallet}=>{ss.wallet.walletname}:{scanfilename}")
if valid_wallet:
message = f"- AMBIGUOUS scan filename {old_wallet}=>{ss.wallet.walletname}:{scanfilename}"
print(message)
DataIssue.objects.update_or_create(parser=parser_label, message=message, url="")
return False
else:
valid_wallet = ss.wallet
return valid_wallet
def _handle_obsolete_wallets(old_wallet, dwgfile, scanfilename, parser_label):
"""Wallets, which are folders on the filesystem, have been renamed over the years but
the filepaths emnbedded in drawing files are what they were when the drawing file was created.
So we have to match up these old wallet names with what they are now.
"""
if x := rx_x_wallet.search(old_wallet):
# only doing 2016#X04 etc. as yet
if w_renamed := _find_renamed_x_wallet(old_wallet, scanfilename):
return w_renamed
message = f"- Warning {parser_label} XFILES {old_wallet} {(old_wallet==x.group(1))} in {path}"
print(message)
DataIssue.objects.update_or_create(parser=parser_label, message=message, url=f"/dwgdataraw/{path}")
return False
if old_wallet not in old_wallets:
message = f"- Warning {old_wallet} not a currently valid wallet name. In {path}"
print(message)
DataIssue.objects.update_or_create(parser=parser_label, message=message, url=f"/dwgdataraw/{path}")
return False
def _process_reference(dwgfile, path, parser_label="Tunnel"):
@@ -199,7 +246,9 @@ def _process_reference(dwgfile, path, parser_label="Tunnel"):
return None, None
if mscansdir := rx_wallet.search(path):
scanswalletl = Wallet.objects.filter(walletname=mscansdir.group(1)) # wallet name
walletname = mscansdir.group(1)
scanfilename = mscansdir.group(2)
scanswalletl = Wallet.objects.filter(walletname=walletname) # wallet name
if len(scanswalletl):
wallet = scanswalletl[0]
if len(scanswalletl) > 1:
@@ -207,10 +256,10 @@ def _process_reference(dwgfile, path, parser_label="Tunnel"):
print(message)
DataIssue.objects.update_or_create(parser=parser_label, message=message)
else: # found a wallet name, but it is not one we recognise as having been imported
_handle_obsolete_wallets(mscansdir.group(1),dwgfile, path, parser_label)
wallet = _handle_obsolete_wallets(mscansdir.group(1),dwgfile, scanfilename, parser_label)
if wallet:
scansfilel = wallet.singlescan_set.filter(name=mscansdir.group(2)) # file name
scansfilel = wallet.singlescan_set.filter(name=scanfilename) # file name
if len(scansfilel):
if len(scansfilel) > 1:
plist = [sf.ffile for sf in scansfilel]
@@ -228,7 +277,7 @@ def _process_reference(dwgfile, path, parser_label="Tunnel"):
scanfilename = Path(path).name
message = f"! In '{wallet.walletname}' scanned file is not actually found '{scanfilename}' in '{path}'"
wurl = f"/survey_scans/{wallet.walletname}/".replace("#", ":")
DataIssue.objects.update_or_create(parser=parser_label, message=message, url=wurl)
DataIssue.objects.update_or_create(parser="z_"+parser_label, message=message, url=wurl)
return wallet, scansfile
# Not a wallet reference; check image extension and possibly drawing-to-drawing reference
@@ -421,6 +470,8 @@ def load_drawings_files():
DataIssue.objects.filter(parser="Therion").delete()
DataIssue.objects.filter(parser="xTherion").delete()
DataIssue.objects.filter(parser="Tunnel").delete()
DataIssue.objects.filter(parser="z_Therion").delete()
DataIssue.objects.filter(parser="z_Tunnel").delete()
if os.path.isfile("therionrefs.log"):
os.remove("therionrefs.log")