mirror of
https://expo.survex.com/repositories/troggle/.git
synced 2024-11-22 07:11:52 +00:00
199 lines
7.9 KiB
Python
199 lines
7.9 KiB
Python
import datetime
|
|
from pathlib import Path
|
|
|
|
import settings
|
|
from troggle.core.models.survex import SingleScan
|
|
from troggle.core.models.troggle import DataIssue
|
|
from troggle.core.models.wallets import Wallet, archaic_wallets
|
|
|
|
"""Searches through all the survey scans directories (wallets) in expofiles, looking for images to be referenced. Loads all the wallets .
|
|
|
|
todo - update the survexblock scanswallet field to refer to the wallet, if it isn't already.
|
|
But we can't do that, we only have the survexfile, not the blcok. But we could set it for ALL the blocks? Hmm.
|
|
"""
|
|
|
|
contentsjson = "contents.json"
|
|
|
|
git = settings.GIT
|
|
|
|
|
|
def set_walletyear(wallet):
|
|
_ = wallet.year() # don't need return value. Just calling this saves it as w.walletyear Syntactic.
|
|
|
|
def set_JSONwalletdate(wallet):
|
|
"""At this point in the import process, the survex files have not been imported so
|
|
we cannot get dates from them. There are about 40 JSON files (in 2022) which we read here.
|
|
Actually, doing anything that reads the JSON sets .walletdate"""
|
|
_ = wallet.date() # don't need return value. Sets .walletdate as side effect
|
|
|
|
def set_caves(wallet):
|
|
_ = wallet.allcaves() # don't need return value. Just calling this saves it as w.caves This ONLY gets the list on the wallet JSON
|
|
|
|
def load_all_scans():
|
|
"""This iterates through the scans directories (either here or on the remote server)
|
|
and builds up the models we can access later.
|
|
|
|
Loads people as a simple string of fullnames. We should replace this with a list of Person slugs,
|
|
and change the wallet editor to save People as slugs.
|
|
"""
|
|
print(" - Loading Survey Scans")
|
|
|
|
SingleScan.objects.all().delete()
|
|
Wallet.objects.all().delete()
|
|
# print(" - deleting all Wallet and SingleScan objects, and resize error messages")
|
|
DataIssue.objects.filter(parser="scans").delete()
|
|
DataIssue.objects.filter(parser="wallets").delete()
|
|
DataIssue.objects.filter(parser="mogrify").delete()
|
|
|
|
# These are valid old file types to be visible, they are not necessarily allowed to be uploaded to a new wallet.
|
|
# But we ignore this list.. anything goes..
|
|
valids = [
|
|
".top",
|
|
".txt",
|
|
".tif",
|
|
".png",
|
|
".jpg",
|
|
".jpeg",
|
|
".pdf",
|
|
".svg",
|
|
".gif",
|
|
".xvi",
|
|
".json",
|
|
".autosave",
|
|
".sxd",
|
|
".svx",
|
|
".th",
|
|
".th2",
|
|
".tdr",
|
|
".sql",
|
|
".zip",
|
|
".dxf",
|
|
".3d",
|
|
".ods",
|
|
".csv",
|
|
".xcf",
|
|
".xml",
|
|
]
|
|
validnames = ["thconfig", "manifest"]
|
|
|
|
# iterate into the surveyscans directory
|
|
# Not all folders with files in them are wallets.
|
|
# they are if they are /2010/2010#33
|
|
# or /1996-1999NotKHbook/
|
|
# but not if they are /2010/2010#33/therion/ : the wallet is /2010#33/ not /therion/
|
|
|
|
# READ THE FUNCTION get_json(self) in models/wallets which ALSO does this SAME job
|
|
# needs refactoring
|
|
print(" ", end="")
|
|
scans_path = Path(settings.SCANS_ROOT)
|
|
seen = set()
|
|
c = 0
|
|
wallets = {}
|
|
for p in scans_path.rglob("*"): # this implicitly ignoires empty folders as it looks only for files
|
|
if p.is_file():
|
|
# if p.suffix.lower() not in valids and p.name.lower() not in validnames:
|
|
# # we do not care how the files are called. Many have no suffices at all.
|
|
# print(f"'{p}'", end='\n')
|
|
# pass
|
|
if p.parent == scans_path: # skip files directly in /surveyscans/
|
|
pass
|
|
else:
|
|
c += 1
|
|
if c % 120 == 0:
|
|
print(".", end="")
|
|
if c % 6000 == 0:
|
|
print("\n ", end="")
|
|
|
|
# Finding the wallet from deeply hidden subdirectories
|
|
if p.parent.parent.parent.parent.parent.parent == scans_path:
|
|
fpath = p.parent.parent.parent.parent
|
|
walletname = p.parent.parent.parent.parent.name
|
|
elif p.parent.parent.parent.parent.parent == scans_path:
|
|
fpath = p.parent.parent.parent
|
|
walletname = p.parent.parent.parent.name
|
|
elif p.parent.parent.parent.parent == scans_path:
|
|
fpath = p.parent.parent
|
|
walletname = p.parent.parent.name
|
|
else:
|
|
fpath = p.parent
|
|
walletname = p.parent.name
|
|
|
|
|
|
if walletname in wallets: # assumes all walletnames are unique
|
|
wallet = wallets[walletname]
|
|
else:
|
|
print("", flush=True, end="")
|
|
# Create the wallet object. But we don't have a date for it yet.
|
|
wallet = Wallet.objects.create(walletname=walletname, fpath=fpath)
|
|
wallets[walletname] = wallet
|
|
set_walletyear(wallet)
|
|
set_caves(wallet)
|
|
wallet.save()
|
|
|
|
singlescan = SingleScan(ffile=fpath, name=p.name, wallet=wallet)
|
|
singlescan.save()
|
|
|
|
# only printing progress:
|
|
tag = str(p.parent.name) # folder containing the scan file
|
|
if len(tag) > 4:
|
|
if tag[4] != "#":
|
|
# relative_path = p.relative_to(scans_path, walk_up=False) # from python 3.12
|
|
relative_path = p.relative_to(scans_path)
|
|
seen.add((str(relative_path.parent)+"/", walletname))
|
|
wjson = 0
|
|
seenlist = list(seen)
|
|
seenlist.sort()
|
|
print(f"\n - modern wallets with sub-folders")
|
|
for tag in seenlist:
|
|
wjson += 1
|
|
dirc, wall = tag
|
|
if wall not in archaic_wallets:
|
|
print(f" {wall} {dirc}", end="\n")
|
|
print(f"\n - found and loaded {c:,} acceptable scan files in {len(wallets):,} wallets")
|
|
|
|
# but we also need to check if JSON exists, even if there are no uploaded scan files.
|
|
# Here we know there is a rigid folder structure, so no need to look for sub folders
|
|
print("\n - Checking for wallets where JSON exists, but there may be no uploaded scan files:")
|
|
print(" ", end="")
|
|
wjson = 0
|
|
contents_path = Path(settings.DRAWINGS_DATA, "walletjson")
|
|
for yeardir in contents_path.iterdir():
|
|
if yeardir.is_dir():
|
|
for walletpath in yeardir.iterdir():
|
|
if Path(walletpath, contentsjson).is_file():
|
|
walletname = walletpath.name
|
|
|
|
if walletname not in wallets:
|
|
wjson += 1
|
|
if wjson % 10 == 0:
|
|
print("\n ", end="")
|
|
|
|
print(f"{walletname} ", end="")
|
|
fpath = Path(settings.SCANS_ROOT, str(yeardir.stem), walletname)
|
|
# The wallets found from JSON should all have dates already
|
|
wallet, created = Wallet.objects.update_or_create(walletname=walletname, fpath=fpath)
|
|
wallets[walletname] = wallet
|
|
set_walletyear(wallet)
|
|
set_caves(wallet)
|
|
wallet.save()
|
|
if not created:
|
|
print(
|
|
f"\n - {walletname} was not created, but was not in directory walk of /surveyscans/. Who created it?"
|
|
)
|
|
print(f"\n - found another {wjson:,} JSON files, making a total of {len(wallets):,} wallets")
|
|
|
|
# Only the 1999 wallets have filepaths which would mean that the walletyear will be unset:
|
|
wallets = Wallet.objects.filter(walletyear=None)
|
|
for w in wallets:
|
|
w.walletyear = datetime.date(1999, 1, 1)
|
|
|
|
# Although the survex files haven't been processed yet, we can at least check if the wallets refer to a real file or not
|
|
for wallet in Wallet.objects.all():
|
|
# this reads JSON
|
|
wallet.check_survexlist()
|
|
|
|
|
|
|
|
|
|
|