2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2024-11-26 01:01:53 +00:00
troggle/parsers/scans.py

180 lines
6.7 KiB
Python
Raw Normal View History

2023-01-19 18:33:04 +00:00
import datetime
from pathlib import Path
2021-05-03 20:36:29 +01:00
import settings
from troggle.core.models.survex import SingleScan
2021-05-03 20:36:29 +01:00
from troggle.core.models.troggle import DataIssue
from troggle.core.models.wallets import Wallet
2021-05-03 20:36:29 +01:00
"""Searches through all the survey scans directories (wallets) in expofiles, looking for images to be referenced. Loads all the wallets .
2023-01-19 21:18:42 +00:00
"""
2021-05-03 20:36:29 +01:00
contentsjson = "contents.json"
git = settings.GIT
2023-01-19 21:18:42 +00:00
def set_walletyear(wallet):
_ = wallet.year() # don't need return value. Just calling this saves it as w.walletyear Syntactic.
2023-01-19 21:18:42 +00:00
def set_JSONwalletdate(wallet):
"""At this point in the import process, the survex files have not been imported so
2023-10-23 20:54:46 +01:00
we cannot get dates from them. There are about 40 JSON files (in 2022) which we read here.
Actually, doing anything that reads the JSON sets .walletdate"""
_ = wallet.date() # don't need return value. Sets .walletdate as side effect
2023-10-21 14:22:20 +01:00
def set_caves(wallet):
_ = wallet.allcaves() # don't need return value. Just calling this saves it as w.caves This ONLY gets the list on the wallet JSON
2021-05-03 20:36:29 +01:00
def load_all_scans():
2023-01-19 21:18:42 +00:00
"""This iterates through the scans directories (either here or on the remote server)
and builds up the models we can access later.
Loads people as a simple string of fullnames. We should replace this with a list of Person slugs,
and change the wallet editor to save People as slugs.
2023-01-19 21:18:42 +00:00
"""
print(" - Loading Survey Scans")
2021-05-03 20:36:29 +01:00
SingleScan.objects.all().delete()
Wallet.objects.all().delete()
2023-09-14 13:08:10 +01:00
print(" - deleting all Wallet and SingleScan objects, and resize error messages")
2023-01-19 21:18:42 +00:00
DataIssue.objects.filter(parser="scans").delete()
DataIssue.objects.filter(parser="wallets").delete()
2023-09-14 13:08:10 +01:00
DataIssue.objects.filter(parser="mogrify").delete()
2023-01-19 21:18:42 +00:00
# These are valid old file types to be visible, they are not necessarily allowed to be uploaded to a new wallet.
2023-01-19 21:18:42 +00:00
valids = [
".top",
".txt",
".tif",
".png",
".jpg",
".jpeg",
".pdf",
".svg",
".gif",
".xvi",
".json",
".autosave",
".sxd",
".svx",
".th",
".th2",
".tdr",
".sql",
".zip",
".dxf",
".3d",
".ods",
".csv",
".xcf",
".xml",
]
validnames = ["thconfig", "manifest"]
2022-07-22 09:23:00 +01:00
2021-05-03 20:36:29 +01:00
# iterate into the surveyscans directory
# Not all folders with files in them are wallets.
2023-01-19 21:18:42 +00:00
# they are if they are /2010/2010#33
# or /1996-1999NotKHbook/
# but not if they are /2010/2010#33/therion/ : the wallet is /2010#33/ not /therion/
2023-01-19 21:18:42 +00:00
print(" ", end="")
scans_path = Path(settings.SCANS_ROOT)
seen = []
2023-01-19 21:18:42 +00:00
c = 0
wallets = {}
2023-01-19 21:18:42 +00:00
for p in scans_path.rglob("*"):
if p.is_file():
if p.suffix.lower() not in valids and p.name.lower() not in validnames:
# print(f"'{p}'", end='\n')
pass
2023-01-19 21:18:42 +00:00
elif p.parent == scans_path: # skip files directly in /surveyscans/
pass
else:
2023-01-19 21:18:42 +00:00
c += 1
if c % 15 == 0:
print(".", end="")
if c % 750 == 0:
print("\n ", end="")
if p.parent.parent.parent.parent == scans_path:
# print(f"too deep {p}", end='\n')
fpath = p.parent.parent
2023-01-19 21:18:42 +00:00
walletname = p.parent.parent.name # wallet is one level higher
else:
fpath = p.parent
walletname = p.parent.name
2023-01-19 21:18:42 +00:00
if walletname in wallets:
wallet = wallets[walletname]
else:
2023-01-19 21:18:42 +00:00
print("", flush=True, end="")
# Create the wallet object. But we don't have a date for it yet.
2023-10-23 20:54:46 +01:00
wallet = Wallet.objects.create(walletname=walletname, fpath=fpath)
wallets[walletname] = wallet
set_walletyear(wallet)
2023-10-21 14:22:20 +01:00
set_caves(wallet)
2023-10-23 20:54:46 +01:00
wallet.save()
singlescan = SingleScan(ffile=fpath, name=p.name, wallet=wallet)
singlescan.save()
2023-01-19 21:18:42 +00:00
# only printing progress:
tag = p.parent
2023-01-19 21:18:42 +00:00
if len(walletname) > 4:
if walletname[4] == "#":
tag = p.parent.parent
2023-01-19 21:18:42 +00:00
if tag not in seen:
2023-01-19 21:18:42 +00:00
print(f" {tag.name} ", end="")
if len(str(tag.name)) > 17:
2023-01-19 21:18:42 +00:00
print("\n ", end="")
seen.append(tag)
2023-01-19 21:18:42 +00:00
print(f"\n - found and loaded {c:,} acceptable scan files in {len(wallets):,} wallets")
# but we also need to check if JSON exists, even if there are no uploaded scan files.
# Here we know there is a rigid folder structure, so no need to look for sub folders
2023-01-19 21:34:09 +00:00
print("\n - Checking for wallets where JSON exists, but there may be no uploaded scan files:")
2023-01-19 21:18:42 +00:00
print(" ", end="")
wjson = 0
2023-01-19 21:18:42 +00:00
contents_path = Path(settings.DRAWINGS_DATA, "walletjson")
for yeardir in contents_path.iterdir():
if yeardir.is_dir():
2023-01-19 21:18:42 +00:00
for walletpath in yeardir.iterdir():
if Path(walletpath, contentsjson).is_file():
walletname = walletpath.name
2023-01-19 21:18:42 +00:00
if walletname not in wallets:
wjson += 1
2023-01-19 21:18:42 +00:00
if wjson % 10 == 0:
print("\n ", end="")
2023-01-19 21:18:42 +00:00
print(f"{walletname} ", end="")
fpath = Path(settings.SCANS_ROOT, str(yeardir.stem), walletname)
# The wallets found from JSON should all have dates already
2022-10-03 19:18:35 +01:00
wallet, created = Wallet.objects.update_or_create(walletname=walletname, fpath=fpath)
wallets[walletname] = wallet
set_walletyear(wallet)
2023-10-21 14:22:20 +01:00
set_caves(wallet)
2023-10-23 20:54:46 +01:00
wallet.save()
if not created:
2023-01-19 21:18:42 +00:00
print(
f"\n - {walletname} was not created, but was not in directory walk of /surveyscans/. Who created it?"
)
print(f"\n - found another {wjson:,} JSON files, making a total of {len(wallets):,} wallets")
# Only the 1999 wallets have filenames which mean that the walletyear will be unset:
wallets = Wallet.objects.filter(walletyear=None)
for w in wallets:
w.walletyear = datetime.date(1999, 1, 1)
# Although the survex files haven't been processed yet, we can at least check if the wallets refer to a real file or not
for wallet in Wallet.objects.all():
# this reads JSON
wallet.check_survexlist()