From 4d52cdbf18738a8f344b4b7c649efadf477452af Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Tue, 16 Dec 2025 00:54:58 +0000 Subject: [PATCH] finds scans in renamed electronic wallets --- parsers/drawings.py | 87 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 69 insertions(+), 18 deletions(-) diff --git a/parsers/drawings.py b/parsers/drawings.py index 33dfae4..2a7108b 100644 --- a/parsers/drawings.py +++ b/parsers/drawings.py @@ -4,7 +4,7 @@ from pathlib import Path import time import settings -from troggle.core.models.survex import DrawingFile +from troggle.core.models.survex import DrawingFile, SingleScan from troggle.core.models.troggle import DataIssue from troggle.core.models.wallets import Wallet @@ -79,6 +79,16 @@ def fetch_drawingfiles_by_paths(paths, chunk_size: int = 500): return mapping +old_wallets = { # the wallet folders were rearranged into separate years. But the drawing files already existsed with the old names. + "1995-96kh": ["1995kh", "1996kh"], + "92-94NotKHSurveybook": [], # not rearranged yet + "92-94Surveybookkh": ["1992Surveybookkh", "1993Surveybookkh", "1994Surveybookkh"], + "92-94NotKHSurveybook": ["1992-94NotKHSurveybook"], # pending rearrangement + "smkhs": [settings.EXPOFILES / "surveys" / "smkhs"], + } + +rx_x_wallet = re.compile(r"(\d{4}\#X\d+)", re.VERBOSE | re.IGNORECASE) + rx_wallet = re.compile(r""" # This regex is designed to extract a specific directory prefix (walletname) and a filename # from the end of a path string. @@ -88,7 +98,7 @@ rx_wallet = re.compile(r""" ( # Start of Capture Group 1 \d{4}\#X?\d+\w? # Matches patterns like "2025#123", "2016#X04" or "1999#45a", NB # must be escaped in VERBOSE mode | # OR - 1995-96kh # Matches the literal string "1995-96kh" + 1995-96kh # Matches the literal string "1995-96kh" - OLD | 1989LUSS # Matches the literal string "1989LUSS" | # OR @@ -96,21 +106,21 @@ rx_wallet = re.compile(r""" | # OR 1990NotKHsurveybook # Matches the literal string "1990NotKHsurveybook" | # OR - 199?kh # + 199.kh # | # OR - 199?notkh # + 199.notkh # | # OR - 199?Surveybookkh # Matches the literal string "92-94Surveybookkh" + 199.Surveybookkh # Matches the literal string "92-94Surveybookkh" | # OR - 1992-94NotKHSurveybook # Matches the literal string "92-94Surveybookkh" + 92-94NotKHSurveybook # Matches the literal string "92-94Surveybookkh" - OLD | # OR - 92-94Surveybookkh # Matches the literal string "92-94Surveybookkh" + 1992-94NotKHSurveybook # Matches the literal string "92-94Surveybookkh" - not OLD yet | # OR - 1991surveybook # Matches the literal string "1991surveybook" + 92-94Surveybookkh # Matches the literal string "92-94Surveybookkh" - OLD | # OR - 1991surveybook # Matches the literal string "1991surveybook" + 19..surveybook # | # OR - smkhs # This is now expofiles/surveys/smkhs/ not in surveyscans/ at all. + smkhs # This is now expofiles/surveys/smkhs/ not in surveyscans/ at all. - OLD ) # End of Capture Group 1 / # A literal forward slash separating the parts @@ -180,10 +190,47 @@ def parse_tnl_file(dwgfile, path): # Delegate to the unified reference processor for consistent behaviour _process_reference(dwgfile, path, parser_label="Tunnel") -def _handle_obsolete_wallets(old_wallet, dwgfile, path, parser_label): - message = f"- Warning {old_wallet} not a currently valid wallet name. In {path}" - print(message) - DataIssue.objects.update_or_create(parser=parser_label, message=message, url=f"/dwgdataraw/{path}") +def _find_renamed_x_wallet(old_wallet, scanfilename): + """In past years we had a different naming convention for wallets containing electronic files + but now these have been renamed simply as 20nn#nn without the X. + """ + valid_wallet = False + # this find all the scanfiles which have a wallet where the walletyear.year == old_wallet[:4] + # It is in reverse order as the electronic wallets were always added to the end of the wallet numbers + scans = SingleScan.objects.filter(wallet__walletyear__year=old_wallet[:4]).order_by('-wallet__walletname') + for ss in scans: + if ss.name == scanfilename: + # print(f"FOUND {old_wallet}=>{ss.wallet.walletname}:{scanfilename}") + if valid_wallet: + message = f"- AMBIGUOUS scan filename {old_wallet}=>{ss.wallet.walletname}:{scanfilename}" + print(message) + DataIssue.objects.update_or_create(parser=parser_label, message=message, url="") + return False + else: + valid_wallet = ss.wallet + return valid_wallet + + +def _handle_obsolete_wallets(old_wallet, dwgfile, scanfilename, parser_label): + """Wallets, which are folders on the filesystem, have been renamed over the years but + the filepaths emnbedded in drawing files are what they were when the drawing file was created. + So we have to match up these old wallet names with what they are now. + """ + if x := rx_x_wallet.search(old_wallet): + # only doing 2016#X04 etc. as yet + if w_renamed := _find_renamed_x_wallet(old_wallet, scanfilename): + return w_renamed + + message = f"- Warning {parser_label} XFILES {old_wallet} {(old_wallet==x.group(1))} in {path}" + print(message) + DataIssue.objects.update_or_create(parser=parser_label, message=message, url=f"/dwgdataraw/{path}") + return False + + if old_wallet not in old_wallets: + message = f"- Warning {old_wallet} not a currently valid wallet name. In {path}" + print(message) + DataIssue.objects.update_or_create(parser=parser_label, message=message, url=f"/dwgdataraw/{path}") + return False def _process_reference(dwgfile, path, parser_label="Tunnel"): @@ -199,7 +246,9 @@ def _process_reference(dwgfile, path, parser_label="Tunnel"): return None, None if mscansdir := rx_wallet.search(path): - scanswalletl = Wallet.objects.filter(walletname=mscansdir.group(1)) # wallet name + walletname = mscansdir.group(1) + scanfilename = mscansdir.group(2) + scanswalletl = Wallet.objects.filter(walletname=walletname) # wallet name if len(scanswalletl): wallet = scanswalletl[0] if len(scanswalletl) > 1: @@ -207,10 +256,10 @@ def _process_reference(dwgfile, path, parser_label="Tunnel"): print(message) DataIssue.objects.update_or_create(parser=parser_label, message=message) else: # found a wallet name, but it is not one we recognise as having been imported - _handle_obsolete_wallets(mscansdir.group(1),dwgfile, path, parser_label) + wallet = _handle_obsolete_wallets(mscansdir.group(1),dwgfile, scanfilename, parser_label) if wallet: - scansfilel = wallet.singlescan_set.filter(name=mscansdir.group(2)) # file name + scansfilel = wallet.singlescan_set.filter(name=scanfilename) # file name if len(scansfilel): if len(scansfilel) > 1: plist = [sf.ffile for sf in scansfilel] @@ -228,7 +277,7 @@ def _process_reference(dwgfile, path, parser_label="Tunnel"): scanfilename = Path(path).name message = f"! In '{wallet.walletname}' scanned file is not actually found '{scanfilename}' in '{path}'" wurl = f"/survey_scans/{wallet.walletname}/".replace("#", ":") - DataIssue.objects.update_or_create(parser=parser_label, message=message, url=wurl) + DataIssue.objects.update_or_create(parser="z_"+parser_label, message=message, url=wurl) return wallet, scansfile # Not a wallet reference; check image extension and possibly drawing-to-drawing reference @@ -421,6 +470,8 @@ def load_drawings_files(): DataIssue.objects.filter(parser="Therion").delete() DataIssue.objects.filter(parser="xTherion").delete() DataIssue.objects.filter(parser="Tunnel").delete() + DataIssue.objects.filter(parser="z_Therion").delete() + DataIssue.objects.filter(parser="z_Tunnel").delete() if os.path.isfile("therionrefs.log"): os.remove("therionrefs.log")