2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2024-11-22 15:21:52 +00:00
troggle/parsers/scans.py

163 lines
5.9 KiB
Python
Raw Normal View History

2023-01-19 18:33:04 +00:00
import datetime
from pathlib import Path
2021-05-03 20:36:29 +01:00
import settings
2023-01-19 21:34:09 +00:00
from troggle.core.models.survex import SingleScan, Wallet
2021-05-03 20:36:29 +01:00
from troggle.core.models.troggle import DataIssue
2023-01-19 21:18:42 +00:00
"""Searches through all the survey scans directories (wallets) in expofiles, looking for images to be referenced.
"""
2021-05-03 20:36:29 +01:00
contentsjson = "contents.json"
git = settings.GIT
# to do: Actually read all the JSON files and set the survex file field appropriately!
2023-01-19 21:18:42 +00:00
def setwalletyear(wallet):
2023-01-19 21:18:42 +00:00
_ = wallet.year() # don't need return value. Just calling this saves it as w.walletyear
2021-05-03 20:36:29 +01:00
def load_all_scans():
2023-01-19 21:18:42 +00:00
"""This iterates through the scans directories (either here or on the remote server)
and builds up the models we can access later.
2023-01-19 21:18:42 +00:00
It does NOT read or validate anything in the JSON data attached to each wallet. Those checks
are done at runtime, when a wallet is accessed, not at import time.
2023-01-19 21:18:42 +00:00
"""
print(" - Loading Survey Scans")
2021-05-03 20:36:29 +01:00
SingleScan.objects.all().delete()
Wallet.objects.all().delete()
2023-01-19 21:18:42 +00:00
print(" - deleting all Wallet and SingleScan objects")
DataIssue.objects.filter(parser="scans").delete()
# These are valid old file types to be visible, they are not necessarily allowed to be uploaded to a new wallet.
2023-01-19 21:18:42 +00:00
valids = [
".top",
".txt",
".tif",
".png",
".jpg",
".jpeg",
".pdf",
".svg",
".gif",
".xvi",
".json",
".autosave",
".sxd",
".svx",
".th",
".th2",
".tdr",
".sql",
".zip",
".dxf",
".3d",
".ods",
".csv",
".xcf",
".xml",
]
validnames = ["thconfig", "manifest"]
2022-07-22 09:23:00 +01:00
2021-05-03 20:36:29 +01:00
# iterate into the surveyscans directory
# Not all folders with files in them are wallets.
2023-01-19 21:18:42 +00:00
# they are if they are /2010/2010#33
# or /1996-1999NotKHbook/
# but not if they are /2010/2010#33/therion/ : the wallet is /2010#33/ not /therion/
2023-01-19 21:18:42 +00:00
print(" ", end="")
scans_path = Path(settings.SCANS_ROOT)
seen = []
2023-01-19 21:18:42 +00:00
c = 0
wallets = {}
2023-01-19 21:18:42 +00:00
for p in scans_path.rglob("*"):
if p.is_file():
if p.suffix.lower() not in valids and p.name.lower() not in validnames:
# print(f"'{p}'", end='\n')
pass
2023-01-19 21:18:42 +00:00
elif p.parent == scans_path: # skip files directly in /surveyscans/
pass
else:
2023-01-19 21:18:42 +00:00
c += 1
if c % 15 == 0:
print(".", end="")
if c % 750 == 0:
print("\n ", end="")
if p.parent.parent.parent.parent == scans_path:
# print(f"too deep {p}", end='\n')
fpath = p.parent.parent
2023-01-19 21:18:42 +00:00
walletname = p.parent.parent.name # wallet is one level higher
else:
fpath = p.parent
walletname = p.parent.name
2023-01-19 21:18:42 +00:00
if walletname in wallets:
wallet = wallets[walletname]
else:
2023-01-19 21:18:42 +00:00
print("", flush=True, end="")
# Create the wallet object. But we don't have a date for it yet.
wallet = Wallet(fpath=fpath, walletname=walletname)
setwalletyear(wallet)
wallet.save()
wallets[walletname] = wallet
2023-01-19 21:18:42 +00:00
singlescan = SingleScan(ffile=fpath, name=p.name, wallet=wallet)
singlescan.save()
2023-01-19 21:18:42 +00:00
# only printing progress:
tag = p.parent
2023-01-19 21:18:42 +00:00
if len(walletname) > 4:
if walletname[4] == "#":
tag = p.parent.parent
2023-01-19 21:18:42 +00:00
if tag not in seen:
2023-01-19 21:18:42 +00:00
print(f" {tag.name} ", end="")
if len(str(tag.name)) > 17:
2023-01-19 21:18:42 +00:00
print("\n ", end="")
seen.append(tag)
2023-01-19 21:18:42 +00:00
print(f"\n - found and loaded {c:,} acceptable scan files in {len(wallets):,} wallets")
# but we also need to check if JSON exists, even if there are no uploaded scan files.
# Here we know there is a rigid folder structure, so no need to look for sub folders
2023-01-19 21:34:09 +00:00
print("\n - Checking for wallets where JSON exists, but there may be no uploaded scan files:")
2023-01-19 21:18:42 +00:00
print(" ", end="")
wjson = 0
2023-01-19 21:18:42 +00:00
contents_path = Path(settings.DRAWINGS_DATA, "walletjson")
for yeardir in contents_path.iterdir():
if yeardir.is_dir():
2023-01-19 21:18:42 +00:00
for walletpath in yeardir.iterdir():
if Path(walletpath, contentsjson).is_file():
walletname = walletpath.name
2023-01-19 21:18:42 +00:00
if walletname not in wallets:
wjson += 1
2023-01-19 21:18:42 +00:00
if wjson % 10 == 0:
print("\n ", end="")
2023-01-19 21:18:42 +00:00
print(f"{walletname} ", end="")
fpath = Path(settings.SCANS_ROOT, str(yeardir.stem), walletname)
# The wallets found from JSON should all have dates already
2022-10-03 19:18:35 +01:00
wallet, created = Wallet.objects.update_or_create(walletname=walletname, fpath=fpath)
wallets[walletname] = wallet
# could now also load the json but we don't. Do later, on-demand
# wallet.walletdate = wallet.date()
# could check if link to svx file is valid too.. but do on-demand later
# But we *do* set the walletyear:
setwalletyear(wallet)
if not created:
2023-01-19 21:18:42 +00:00
print(
f"\n - {walletname} was not created, but was not in directory walk of /surveyscans/. Who created it?"
)
wallet.save()
2023-01-19 21:18:42 +00:00
print(f"\n - found another {wjson:,} JSON files, making a total of {len(wallets):,} wallets")
wallets = Wallet.objects.filter(walletyear=None)
for w in wallets:
w.walletyear = datetime.date(1999, 1, 1)