From 3e198fc4106bc39b73914a88166b87273680fad5 Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Sun, 11 Aug 2024 18:41:27 +0300 Subject: [PATCH] cleaned up wallets import process --- core/models/wallets.py | 37 +++++++++++++++++++------------------ parsers/scans.py | 30 ++++++++++++++++++------------ parsers/survex.py | 4 ++-- 3 files changed, 39 insertions(+), 32 deletions(-) diff --git a/core/models/wallets.py b/core/models/wallets.py index d94d048..79e022d 100644 --- a/core/models/wallets.py +++ b/core/models/wallets.py @@ -143,43 +143,41 @@ class Wallet(models.Model): Reads JSON date and sets w.walletdate + This repeats a lot of stuff done in the initial parsing job: + traversing the file system. + Needs to be refactored as special handling of subdirectories is duplicated + import DataIssue locally to prevent import cycle problem""" if hasattr(self, "JSONdata"): return self.JSONdata scans_path = Path(settings.SCANS_ROOT) + wurl = self.get_url() # :drawings: walletjson/2022/2022#01/contents.json # fpath = /mnt/d/EXPO/expofiles/surveyscans/1999/1999#02 - # This does not work if there are subdirectories in the surveyscan folder fp = Path(self.fpath) if fp.name in archaic_wallets: return None - - if fp.parent.parent.parent.parent == scans_path: - print(f" ! ", end="") - if fp.parent.parent.parent.parent.parent == scans_path: - print(f" !! ", end="") - + if fp.parent.parent.parent.parent == scans_path: subfolder = fp.parent.name wname = fp.parent.parent.name wyear = fp.parent.parent.parent.name - print(f"\n - Subfolder {subfolder} one deep, detected in {wname}") + # print(f" - Subfolder {subfolder} two deep, detected in {wname=} {wyear=} {wurl=}") else: wname = fp.name wyear = fp.parent.name - try: - y = int(wyear) - except: - message = f"\n! 2 or more deep subfolder detected?:\n {fp.parent.name=}\n {fp.name=} \n {self.fpath=}" - print(message) - # this is a hack, work down from /surveyscans instead. To be fixed - wname = fp.parent.parent.parent.name - wyear = fp.parent.parent.parent.parent.name - wurl = self.get_url() - + try: + y = int(wyear) + except: + message = f"! 3 or more deep subfolder detected?:\n {fp.parent.name=}\n {fp.name=} \n {self.fpath=} {wurl=}" + print(message) + # this is a hack, work down from /surveyscans instead. To be fixed + wname = fp.parent.parent.parent.name + wyear = fp.parent.parent.parent.parent.name + if len(wyear) != 4 or len(wname) !=6: # no contents.json for old-style wallets # but this ruined all the tick-list displays.. why?! @@ -188,6 +186,9 @@ class Wallet(models.Model): jsonfile = Path(settings.DRAWINGS_DATA, "walletjson") / wyear / wname / "contents.json" if not Path(jsonfile).is_file(): + message = f"! {jsonfile} is not a file:\n {wyear=} (should be eg. '2035')\n {wname=} (should be eg. '2035#13')\n {self.fpath=}" + print(message) + if wname not in archaic_wallets: message = f"! {jsonfile} is not a file:\n {wyear=} (should be eg. '2023')\n {wname=} (should be eg. '2023#13')\n {self.fpath=}" print(message) diff --git a/parsers/scans.py b/parsers/scans.py index 947278e..206f645 100644 --- a/parsers/scans.py +++ b/parsers/scans.py @@ -81,6 +81,9 @@ def load_all_scans(): # they are if they are /2010/2010#33 # or /1996-1999NotKHbook/ # but not if they are /2010/2010#33/therion/ : the wallet is /2010#33/ not /therion/ + + # READ THE FUNCTION get_json(self) in models/wallets which ALSO does this SAME job + # needs refactoring print(" ", end="") scans_path = Path(settings.SCANS_ROOT) seen = set() @@ -96,19 +99,25 @@ def load_all_scans(): pass else: c += 1 - if c % 60 == 0: + if c % 120 == 0: print(".", end="") - if c % 3000 == 0: + if c % 6000 == 0: print("\n ", end="") - if p.parent.parent.parent.parent == scans_path: - # the wallet has subfolders, which we are uninterested in at this point. - # print(f"too deep {p}", end='\n') + # Finding the wallet from deeply hidden subdirectories + if p.parent.parent.parent.parent.parent.parent == scans_path: + fpath = p.parent.parent.parent.parent + walletname = p.parent.parent.parent.parent.name + elif p.parent.parent.parent.parent.parent == scans_path: + fpath = p.parent.parent.parent + walletname = p.parent.parent.parent.name + elif p.parent.parent.parent.parent == scans_path: fpath = p.parent.parent - walletname = p.parent.parent.name # wallet is one level higher + walletname = p.parent.parent.name else: fpath = p.parent walletname = p.parent.name + if walletname in wallets: # assumes all walletnames are unique wallet = wallets[walletname] @@ -129,17 +138,14 @@ def load_all_scans(): if len(tag) > 4: if tag[4] != "#": relative_path = p.relative_to(scans_path, walk_up=False) - seen.add(str(relative_path.parent)) + seen.add((str(relative_path.parent)+"/", walletname)) wjson = 0 - print("\n ", end="") seenlist = list(seen) seenlist.sort() - + print("\n") for tag in seenlist: wjson += 1 - if wjson % 10 == 0: - print("\n ", end="") - print(f" {tag} ", end="") + print(f" {tag} ", end="\n") print(f"\n - found and loaded {c:,} acceptable scan files in {len(wallets):,} wallets") # but we also need to check if JSON exists, even if there are no uploaded scan files. diff --git a/parsers/survex.py b/parsers/survex.py index 45712f6..e59a3bb 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -2709,12 +2709,12 @@ def set_survexblocks(): pass elif b.scanswallet: if b.date > date(2024, 1, 1) and b.date < date(2025, 1, 1): - print(f"2019 not set{wallet} on {b.survexfile} for block {b} as set explicitly to {b.scanswallet}") + print(f"2024-check not set{wallet} on {b.survexfile} for block {b} as set explicitly to {b.scanswallet}") else: b.scanswallet = wallet b.save() if b.date > date(2024, 1, 1) and b.date < date(2025, 1, 1): - print(f"2019 setting {wallet} on {b.survexfile} for block {b}") + print(f"2024-check setting {wallet} on {b.survexfile} for block {b}") def survexifywallets(): """Gets the caves from the list of survexblocks