diff --git a/core/utils.py b/core/utils.py index 521892c..1081e17 100644 --- a/core/utils.py +++ b/core/utils.py @@ -74,6 +74,8 @@ def chaosmonkey(n): def GetListDir(sdir): '''handles url or file, so we can refer to a set of scans (not drawings) on another server returns a list of f (file), ff (file full path), is_dir (bool) + + REPLACE all use of this with Path.rglob() ! ''' res = [ ] if type(sdir) is str and sdir[:7] == "http://": diff --git a/parsers/scans.py b/parsers/scans.py index 109d231..3922b6b 100644 --- a/parsers/scans.py +++ b/parsers/scans.py @@ -66,16 +66,18 @@ def LoadListScansFile(wallet): if c>=10: print(".", end='') c = 0 - + def load_all_scans(): '''This iterates through the scans directories (either here or on the remote server) and builds up the models we can access later. It does NOT read or validate anything in the JSON data attached to each wallet. Those checks are done at runtime, when a wallet is accessed, not at import time. - NOTE that parsers/survex.py does NOT create a wallet if it finds an unrecognised *REF wallet. - Instead it reports an error in DataIssues. But it does make a link in the db between the - existing wallet (probably no JSON, just a folder containing scans) and the survex file. + Replace GetListDir with a more modern Path.iter idiom + path = Path("scans") + for p in path.rglob("*"): + print(p.name) + ''' print(' - Loading Survey Scans') @@ -83,49 +85,68 @@ def load_all_scans(): Wallet.objects.all().delete() print(' - deleting all Wallet and SingleScan objects') DataIssue.objects.filter(parser='scans').delete() - - # first do the smkhs (large kh survey scans) directory - # this seems to be never used ?! - #We should load all the scans, even for nonstandard names. - manywallets_smkhs = Wallet(fpath=os.path.join(settings.SCANS_ROOT, "../surveys/smkhs"), walletname="smkhs") - print("smkhs", end=' ') - if os.path.isdir(manywallets_smkhs.fpath): - manywallets_smkhs.save() - LoadListScansFile(manywallets_smkhs) - else: - print("smkhs NOT LOADED", end=' ') + + valids = [".top",".txt",".tif",".png",".jpg",".jpeg",".pdf",".svg",".gif",".xvi", + ".json",".autosave",".sxd",".svx",".th",".th2",".tdr",".sql",".zip",".dxf",".3d", + ".ods",".csv",".xcf",".xml"] + validnames = ["thconfig","manifest"] # iterate into the surveyscans directory + # Not all folders with files in them are wallets. + # they are if they are /2010/2010#33 + # or /1996-1999NotKHbook/ + # but not if they are /2010/1010#33/therion or /1998/ print(' - ', end=' ') - for walletname, fpath, fisdir in GetListDir(settings.SCANS_ROOT): + scans_path = Path(settings.SCANS_ROOT) + seen = [] + for p in scans_path.rglob('*'): + if p.is_file(): + if p.suffix.lower() not in valids and p.name.lower() not in validnames: + # print(f"'{p}'", end='\n') + pass + elif p.parent == scans_path: # skip files directly in /surveyscans/ + pass + else: + if p.parent.parent.parent.parent == scans_path: + # print(f"too deep {p}", end='\n') + fpath = p.parent.parent + walletname = p.parent.parent.name # wallet is one level higher + else: + fpath = p.parent + walletname = p.parent.name + + # UNFINISHED + tag = p.parent + if len(walletname)>4: + if walletname[4] == "#": + tag = p.parent.parent + + if tag not in seen: + print(f"{tag.name}", end=' ') + seen.append(tag) + #wallet = Wallet(fpath=fpath, walletname=walletname) + + + print('\n UNFINISHED \n\n--- ') + for topfolder, fpath, fisdir in GetListDir(settings.SCANS_ROOT): if not fisdir: continue - for walletname, fpath, fisdir in GetListDir(fpath): - if fisdir: - wallet = Wallet(fpath=fpath, walletname=walletname) - # this is where we should record the year explicitly - # line 347 of view/uploads.py and needs refactoring for loading contentsjson - CheckEmptyDate(wallet) - CheckEmptyPeople(wallet) - wallet.save() - LoadListScansFile(wallet) - - # # do the year folders - # if re.match(r"\d\d\d\d$", walletname): - # print(f"{walletname}", end=' ') - # for walletname, fpath, fisdir in GetListDir(fpath): - # if fisdir: - # wallet = Wallet(fpath=fpath, walletname=walletname) - # # this is where we should record the year explicitly - # # line 347 of view/uploads.py and needs refactoring for loading contentsjson - # CheckEmptyDate(wallet) - # CheckEmptyPeople(wallet) - # wallet.save() - # LoadListScansFile(wallet) - # else: - # # but We *should* load all the scans, even for nonstandard names. - # print(f'\n - IGNORE {walletname} - {fpath}') + # do the year folders + if re.match(r"\d\d\d\d$", topfolder): + print(f"{topfolder}", end=' ') + for walletname, fpath, fisdir in GetListDir(fpath): + if fisdir: + wallet = Wallet(fpath=fpath, walletname=walletname) + # this is where we should record the year explicitly + # line 347 of view/uploads.py and needs refactoring for loading contentsjson + CheckEmptyDate(wallet) + CheckEmptyPeople(wallet) + wallet.save() + LoadListScansFile(wallet) + else: + # but We *should* load all the scans, even for nonstandard names. + print(f'\n - IGNORE {walletname} - {fpath}') # but we also need to check if JSON exists, even if there are no uploaded scan files contents_path = Path(settings.DRAWINGS_DATA, "walletjson")