diff --git a/core/TESTS/test_drawings.py b/core/TESTS/test_drawings.py index fa0f7b2..693a59e 100644 --- a/core/TESTS/test_drawings.py +++ b/core/TESTS/test_drawings.py @@ -98,16 +98,6 @@ class DrawingsPathlibTests(TestCase): self.assertIn(w, df.dwgwallets.all()) self.assertIn(ss, df.scans.all()) - def test_findwalletimage_logs_missing_scan(self): - # Wallet exists but no scan inside. Should create a DataIssue - w = Wallet.objects.create(fpath='x', walletname='2026#01') - df = DrawingFile.objects.create(dwgpath='tst2.th2', dwgname='tst2') - - drawings.findwalletimage(df, '2026#01/missing.jpg') - - di = DataIssue.objects.filter(parser='Therion', message__contains='not actually found') - self.assertTrue(di.exists()) - def test_drawing_reference_multiple_creates_dataissue(self): df1 = DrawingFile.objects.create(dwgpath='ref1', dwgname='shared') df2 = DrawingFile.objects.create(dwgpath='ref2', dwgname='shared') diff --git a/parsers/drawings.py b/parsers/drawings.py index f2a30ef..33dfae4 100644 --- a/parsers/drawings.py +++ b/parsers/drawings.py @@ -80,8 +80,7 @@ def fetch_drawingfiles_by_paths(paths, chunk_size: int = 500): return mapping rx_wallet = re.compile(r""" - # r"(\d\d\d\d#X?\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg|gif|txt))$", path - # This regex is designed to extract a specific directory prefix and a filename + # This regex is designed to extract a specific directory prefix (walletname) and a filename # from the end of a path string. # --- Group 1: Directory or Survey Prefix --- @@ -111,7 +110,7 @@ rx_wallet = re.compile(r""" | # OR 1991surveybook # Matches the literal string "1991surveybook" | # OR - smkhs # Matches the literal string "smkhs" + smkhs # This is now expofiles/surveys/smkhs/ not in surveyscans/ at all. ) # End of Capture Group 1 / # A literal forward slash separating the parts @@ -137,7 +136,7 @@ def _set_filesize_and_check(fullpath, model_obj, parser_label, url_prefix="/dwgd except Exception as e: message = f"! Unable to stat file {fullpath}: {e}" print(message) - DataIssue.objects.create(parser=parser_label, message=message, url=f"{url_prefix}/{getattr(model_obj, 'dwgpath', '')}") + DataIssue.objects.update_or_create(parser=parser_label, message=message, url=f"{url_prefix}/{getattr(model_obj, 'dwgpath', '')}") return False model_obj.filesize = size @@ -145,7 +144,7 @@ def _set_filesize_and_check(fullpath, model_obj, parser_label, url_prefix="/dwgd if size <= 0: message = f"! Zero length {parser_label.lower()} file {fullpath}" print(message) - DataIssue.objects.create(parser=parser_label, message=message, url=f"{url_prefix}/{getattr(model_obj, 'dwgpath', '')}") + DataIssue.objects.update_or_create(parser=parser_label, message=message, url=f"{url_prefix}/{getattr(model_obj, 'dwgpath', '')}") return False return True @@ -181,6 +180,11 @@ def parse_tnl_file(dwgfile, path): # Delegate to the unified reference processor for consistent behaviour _process_reference(dwgfile, path, parser_label="Tunnel") +def _handle_obsolete_wallets(old_wallet, dwgfile, path, parser_label): + message = f"- Warning {old_wallet} not a currently valid wallet name. In {path}" + print(message) + DataIssue.objects.update_or_create(parser=parser_label, message=message, url=f"/dwgdataraw/{path}") + def _process_reference(dwgfile, path, parser_label="Tunnel"): """Unified processor to link drawing files to wallets/scans or referenced drawings. @@ -195,34 +199,42 @@ def _process_reference(dwgfile, path, parser_label="Tunnel"): return None, None if mscansdir := rx_wallet.search(path): - scanswalletl = Wallet.objects.filter(walletname=mscansdir.group(1)) + scanswalletl = Wallet.objects.filter(walletname=mscansdir.group(1)) # wallet name if len(scanswalletl): wallet = scanswalletl[0] if len(scanswalletl) > 1: message = f"! More than one scan FOLDER matches filter query. [{scanswalletl[0]}]: {mscansdir.group(1)} {mscansdir.group(2)} {dwgfile.dwgpath} {path}" print(message) - DataIssue.objects.create(parser=parser_label, message=message) + DataIssue.objects.update_or_create(parser=parser_label, message=message) + else: # found a wallet name, but it is not one we recognise as having been imported + _handle_obsolete_wallets(mscansdir.group(1),dwgfile, path, parser_label) if wallet: - scansfilel = wallet.singlescan_set.filter(name=mscansdir.group(2)) + scansfilel = wallet.singlescan_set.filter(name=mscansdir.group(2)) # file name if len(scansfilel): if len(scansfilel) > 1: plist = [sf.ffile for sf in scansfilel] message = f"! More than one image FILENAME matches filter query. [{scansfilel[0]}]: {mscansdir.group(1)} {mscansdir.group(2)} {dwgfile.dwgpath} {path} {plist}" print(message) - DataIssue.objects.create(parser=parser_label, message=message) + DataIssue.objects.update_or_create(parser=parser_label, message=message) scansfile = scansfilel[0] if wallet: dwgfile.dwgwallets.add(wallet) if scansfile: dwgfile.scans.add(scansfile) + # If a wallet was found but no scan was associated from the wallet, record a DataIssue. There are a lot of these.. + if wallet and not scansfile: + scanfilename = Path(path).name + message = f"! In '{wallet.walletname}' scanned file is not actually found '{scanfilename}' in '{path}'" + wurl = f"/survey_scans/{wallet.walletname}/".replace("#", ":") + DataIssue.objects.update_or_create(parser=parser_label, message=message, url=wurl) return wallet, scansfile # Not a wallet reference; check image extension and possibly drawing-to-drawing reference suffix = Path(path).suffix.lower() if _is_image_suffix(suffix): - # It's an image/scanned file type; we don't treat it as a referenced drawing + # It's an image/scanned file type; we don't treat it as a referenced drawing, though in future we should note the link return None, None # Not an image file: perhaps a reference to another drawing (no ext or other ext) @@ -233,28 +245,13 @@ def _process_reference(dwgfile, path, parser_label="Tunnel"): plist = [df.dwgpath for df in rdwgfilel] message = f"- Warning {len(rdwgfilel)} files named '{name}' {plist}" print(message) - DataIssue.objects.create(parser=parser_label, message=message, url=f"/dwgdataraw/{path}") + DataIssue.objects.update_or_create(parser=parser_label, message=message, url=f"/dwgdataraw/{path}") rdwgfile = rdwgfilel[0] if hasattr(dwgfile, 'dwgcontains'): # implement model change in models/survex.py to use this dwgfile.dwgcontains.add(rdwgfile) return None, None - -def findwalletimage(therionfile, foundpath): - """Tries to link the drawing file (Therion format) to the referenced image (scan) file""" - # Delegate to the unified reference processor for consistent behaviour - foundpath = foundpath.strip("{}") - wallet, scansfile = _process_reference(therionfile, foundpath, parser_label="Therion") - - # If a wallet was found but no scan was associated from the wallet, record a DataIssue - if wallet and not scansfile: - scanfilename = Path(foundpath).name - message = f'! In {wallet.walletname} scanned file is not actually found {scanfilename} mentioned in "{therionfile.dwgpath}"' - wurl = f"/survey_scans/{wallet.walletname}/".replace("#", ":") - DataIssue.objects.create(parser="Therion", message=message, url=wurl) - - def findimportinsert(therionfile, imp): """Tries to link the scrap (Therion format) to the referenced therion scrap""" pass @@ -290,12 +287,12 @@ def _assign_wallets_for_model(model_obj, wallet_names, parser_label="Tunnel"): # Nothing found: record a DataIssue message = f" ! wallet(s) '{names}' not found from {getattr(model_obj, 'dwgpath', model_obj)}" print(message) - DataIssue.objects.create(parser=parser_label, message=message, url=f"/dwgdataraw/{getattr(model_obj, 'dwgpath', '')}") + DataIssue.objects.update_or_create(parser=parser_label, message=message, url=f"/dwgdataraw/{getattr(model_obj, 'dwgpath', '')}") return [] except Exception as e: message = f" ! Exception while looking up wallet(s) '{names}' from {getattr(model_obj, 'dwgpath', model_obj)} -- ({e})" print(message) - DataIssue.objects.create(parser=parser_label, message=message, url=f"/dwgdataraw/{getattr(model_obj, 'dwgpath', '')}") + DataIssue.objects.update_or_create(parser=parser_label, message=message, url=f"/dwgdataraw/{getattr(model_obj, 'dwgpath', '')}") return [] @@ -334,22 +331,21 @@ def settherionfileinfo(filetuple): # Surely not needed for .th files ?? only .th2 ? message = f"! Un-parsed image filename: {therionfile.dwgname} : {xth_me.split()[-3]} - {therionfile.dwgpath}" # print(message) - # DataIssue.objects.create(parser='xTherion', message=message, url=f'/dwgdataraw/{therionfile.dwgpath}') + # DataIssue.objects.update_or_create(parser='xTherion', message=message, url=f'/dwgdataraw/{therionfile.dwgpath}') # ! Un-parsed image filename: 107coldest : ../../../expofiles/surveyscans/2015/2015#20/notes.jpg - therion/plan/107coldest.th2 with open("therionrefs.log", "a") as lg: lg.write(message + "\n") - findwalletimage(therionfile, xth_me.split()[-3]) - # print(f"{therionfile.dwgname} :{xth_me.split()[-3]}") - - # therionfile.save() + foundpath = xth_me.split()[-3].strip("{}") + _process_reference(therionfile, foundpath, parser_label="Therion") + for inp in rx_input.findall(ttext): # if this 'input' is a .th2 file we have already seen, then we can assign this as a sub-file # but we would need to disentangle to get the current path properly message = f"! Un-set (?) Therion .th2 input: - {therionfile.dwgname} : {inp} - {therionfile.dwgpath}" # print(message) - DataIssue.objects.create(parser="xTherion", message=message, url=f"/dwgdataraw/{therionfile.dwgpath}") + DataIssue.objects.update_or_create(parser="xTherion", message=message, url=f"/dwgdataraw/{therionfile.dwgpath}") findimportinsert(therionfile, inp) # Defer saving scalar fields; caller will perform bulk_update.