2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2024-11-21 23:01:52 +00:00

Replaced maintenance headache with cleaner folder walking

This commit is contained in:
Philip Sargent 2022-09-23 23:43:34 +03:00
parent c3672b476c
commit 6e3fdd35c1
6 changed files with 107 additions and 109 deletions

View File

@ -70,27 +70,6 @@ def chaosmonkey(n):
# print("CHAOS strikes !", file=sys.stderr)
return True
#
def GetListDir(sdir):
'''handles url or file, so we can refer to a set of scans (not drawings) on another server
returns a list of f (file), ff (file full path), is_dir (bool)
REPLACE all use of this with Path.rglob() !
'''
res = [ ]
if type(sdir) is str and sdir[:7] == "http://":
# s = urllib.request.urlopen(sdir)
message = f"! Requesting loading from http:// NOT IMPLEMENTED. [{sdir}]"
print(message)
DataIssue.objects.create(parser='Drawings', message=message)
sdir[:7] = ""
for f in os.listdir(sdir):
if f[0] != ".":
ff = os.path.join(sdir, f)
res.append((f, ff, os.path.isdir(ff)))
return res
def only_commit(fname, message):
'''Only used to commit a survex file edited and saved in view/survex.py
'''

View File

@ -208,8 +208,8 @@ def cavewallets(request, caveid):
wallets.add(z)
else:
wurl = f"/scanupload/{z.walletname.replace('#',':')}"
print(f' - Unrecognised cave name \'{zcaveid}\' in {z.walletname}')
message = f" ! In {z.walletname} there is an unrecognised cave name '{zcaveid}'"
print(f' - Unrecognised cave name \'{zcaveid}\' in {z.walletname} (out of {len(Gcavelookup):,} cave names')
message = f" ! In {z.walletname} there is an unrecognised cave name '{zcaveid}' (out of {len(Gcavelookup):,} cave names"
DataIssue.objects.update_or_create(parser='scans', message=message, url=wurl)
manywallets = list(set(wallets))

View File

@ -208,7 +208,7 @@ def get_complaints(complaints, waldata, svxfiles, files, wallet, wurl):
if not waldata["description written"]:
complaints.append("The guidebook description needs writing into the survex file. Tick the 'Cave description written' checkbox when this is done.")
# QMs
if not waldata["qms written"] and int(w.year()) >= 2015:
if not waldata["qms written"] and w.year() and int(w.year()) >= 2015:
complaints.append("The QMs needs writing into the survex file. Tick the 'QMs written' checkbox when this is done.")
# Website

View File

@ -12,7 +12,7 @@ from functools import reduce
import settings
from troggle.core.models.survex import SingleScan, Wallet, DrawingFile
from troggle.core.models.troggle import DataIssue
from troggle.core.utils import save_carefully, GetListDir
from troggle.core.utils import save_carefully
'''Searches through all the :drawings: repository looking
for tunnel and therion files

View File

@ -14,7 +14,7 @@ from pathlib import Path
import settings
from troggle.core.models.survex import SingleScan, Wallet, DrawingFile
from troggle.core.models.troggle import DataIssue
from troggle.core.utils import save_carefully, GetListDir
from troggle.core.utils import save_carefully
from troggle.core.views.scans import datewallet
'''Searches through all the survey scans directories (wallets) in expofiles, looking for images to be referenced.
@ -26,66 +26,63 @@ git = settings.GIT
# to do: Actually read all the JSON files and set the survex file field appropriately!
def CheckEmptyDate(wallet):
'''If date is not set, get it from a linked survex file.
Could also look at filedates for the scans in expofiles/surveyscans/ , but these can be re-set by copying.
'''
earliest = datetime.datetime.now().date()
# def GetListDir(sdir):
# '''handles url or file, so we can refer to a set of scans (not drawings) on another server
# returns a list of f (file), ff (file full path), is_dir (bool)
# This is not working, can't see why. An scans parser now taking a very long time..
#datewallet(wallet, earliest)
return
def CheckEmptyPeople(wallet):
'''If people list is empty, copy them from the survex files: all of them
To be a Troggle model change; a many:many relationship between wallets and people,
as well as being a list in the JSON file (which is the permanent repository). We want the many:many
relationship so that we can filter wallets based on a person.
For the moment, we will just get a list..
'''
return
# REPLACE all use of this with Path.rglob() !
# '''
# res = [ ]
# if type(sdir) is str and sdir[:7] == "http://":
# # s = urllib.request.urlopen(sdir)
# message = f"! Requesting loading from http:// NOT IMPLEMENTED. [{sdir}]"
# print(message)
# DataIssue.objects.create(parser='Drawings', message=message)
# sdir[:7] = ""
def LoadListScansFile(wallet):
gld = [ ]
# flatten out any directories in these wallet folders - should not be any
for (fyf, ffyf, fisdiryf) in GetListDir(wallet.fpath):
if fisdiryf:
gld.extend(GetListDir(ffyf))
else:
gld.append((fyf, ffyf, fisdiryf))
# for f in os.listdir(sdir):
# if f[0] != ".":
# ff = os.path.join(sdir, f)
# res.append((f, ff, os.path.isdir(ff)))
# return res
# def LoadListScansFile(wallet):
# # formerly a generic troggle utility, written by who ? Being gradually expunged and replaced by python standard library functions
# gld = [ ]
# # flatten out any directories in these wallet folders - should not be any
# for (fyf, ffyf, fisdiryf) in GetListDir(wallet.fpath):
# if fisdiryf:
# gld.extend(GetListDir(ffyf))
# else:
# gld.append((fyf, ffyf, fisdiryf))
c=0
for (fyf, ffyf, fisdiryf) in gld:
if re.search(r"\.(?:png|jpg|jpeg|pdf|svg|gif|xvi)(?i)$", fyf):
singlescan = SingleScan(ffile=ffyf, name=fyf, wallet=wallet)
singlescan.save()
c+=1
if c>=10:
print(".", end='')
c = 0
# c=0
# for (fyf, ffyf, fisdiryf) in gld:
# if re.search(r"\.(?:png|jpg|jpeg|pdf|svg|gif|xvi)(?i)$", fyf):
# singlescan = SingleScan(ffile=ffyf, name=fyf, wallet=wallet)
# singlescan.save()
# c+=1
# if c>=10:
# print(".", end='')
# c = 0
def load_all_scans():
'''This iterates through the scans directories (either here or on the remote server)
and builds up the models we can access later.
It does NOT read or validate anything in the JSON data attached to each wallet. Those checks
are done at runtime, when a wallet is accessed, not at import time.
Replace GetListDir with a more modern Path.iter idiom
path = Path("scans")
for p in path.rglob("*"):
print(p.name)
'''
print(' - Loading Survey Scans')
SingleScan.objects.all().delete()
Wallet.objects.all().delete()
print(' - deleting all Wallet and SingleScan objects')
print(' - deleting all Wallet and SingleScan objects')
DataIssue.objects.filter(parser='scans').delete()
# These are valid old file types to be visible, they are not necessarily allowed to be uploaded to a new wallet.
valids = [".top",".txt",".tif",".png",".jpg",".jpeg",".pdf",".svg",".gif",".xvi",
".json",".autosave",".sxd",".svx",".th",".th2",".tdr",".sql",".zip",".dxf",".3d",
".ods",".csv",".xcf",".xml"]
@ -95,10 +92,12 @@ def load_all_scans():
# Not all folders with files in them are wallets.
# they are if they are /2010/2010#33
# or /1996-1999NotKHbook/
# but not if they are /2010/1010#33/therion or /1998/
print(' - ', end=' ')
# but not if they are /2010/2010#33/therion/ : the wallet is /2010#33/ not /therion/
print(' - ', end='')
scans_path = Path(settings.SCANS_ROOT)
seen = []
c=0
wallets = {}
for p in scans_path.rglob('*'):
if p.is_file():
if p.suffix.lower() not in valids and p.name.lower() not in validnames:
@ -107,6 +106,13 @@ def load_all_scans():
elif p.parent == scans_path: # skip files directly in /surveyscans/
pass
else:
c+=1
if c % 15 == 0 :
print(".", end='')
if c % 500 == 0 :
print("\n -", end='')
if p.parent.parent.parent.parent == scans_path:
# print(f"too deep {p}", end='\n')
fpath = p.parent.parent
@ -114,54 +120,66 @@ def load_all_scans():
else:
fpath = p.parent
walletname = p.parent.name
# UNFINISHED
if walletname in wallets:
wallet = wallets[walletname]
else:
print("", flush=True, end='')
wallet = Wallet(fpath=fpath, walletname=walletname)
wallet.save()
wallets[walletname] = wallet
singlescan = SingleScan(ffile=fpath, name=p.name, wallet=wallet)
singlescan.save()
# only printing progress:
tag = p.parent
if len(walletname)>4:
if walletname[4] == "#":
tag = p.parent.parent
if tag not in seen:
print(f"{tag.name}", end=' ')
print(f" {tag.name} ", end='')
seen.append(tag)
#wallet = Wallet(fpath=fpath, walletname=walletname)
print('\n UNFINISHED \n\n--- ')
for topfolder, fpath, fisdir in GetListDir(settings.SCANS_ROOT):
if not fisdir:
continue
print(f'\n - found and loaded {c:,} acceptable scan files in {len(wallets):,} wallets')
# if False:
# n=0
# for topfolder, fpath, fisdir in GetListDir(settings.SCANS_ROOT):
# if not fisdir:
# continue
# do the year folders
if re.match(r"\d\d\d\d$", topfolder):
print(f"{topfolder}", end=' ')
for walletname, fpath, fisdir in GetListDir(fpath):
if fisdir:
wallet = Wallet(fpath=fpath, walletname=walletname)
# this is where we should record the year explicitly
# line 347 of view/uploads.py and needs refactoring for loading contentsjson
CheckEmptyDate(wallet)
CheckEmptyPeople(wallet)
wallet.save()
LoadListScansFile(wallet)
else:
# but We *should* load all the scans, even for nonstandard names.
print(f'\n - IGNORE {walletname} - {fpath}')
# but we also need to check if JSON exists, even if there are no uploaded scan files
# # do the year folders
# # if re.match(r"\d\d\d\d$", topfolder):
# print(f"{topfolder}", end=' ')
# for walletname, fpath, fisdir in GetListDir(fpath):
# if fisdir:
# wallet = Wallet(fpath=fpath, walletname=walletname)
# # this is where we should record the year explicitly
# # line 347 of view/uploads.py and needs refactoring for loading contentsjson
# wallet.save()
# LoadListScansFile(wallet)
# # else:
# # # but We *should* load all the scans, even for nonstandard names.
# # print(f'\n - IGNORE {topfolder} - {fpath}')
# print("", flush=True)
# but we also need to check if JSON exists, even if there are no uploaded scan files.
# Here we know there is a rigid folder structure, so no need to look for sub folders
contents_path = Path(settings.DRAWINGS_DATA, "walletjson")
for yeardir in contents_path.iterdir():
if yeardir.is_dir():
for walletpath in yeardir.iterdir():
if Path(walletpath, contentsjson).is_file():
walletname = walletpath.name
wallet, created = Wallet.objects.update_or_create(walletname=walletname)
# should now also load the json and use it ! check &ref is correct or missing too
if created:
print(f"\n{walletname} created: only JSON, no actual uploaded scan files.", end=' ')
CheckEmptyDate(wallet)
CheckEmptyPeople(wallet)
wallet.save()
print("", flush=True)
if walletname not in wallets:
print(f" - {walletname} creation attempting: only JSON, no actual uploaded scan files.", end=' ')
wallet, created = Wallet.objects.update_or_create(walletname=walletname)
# should now also load the json and use it ! check &ref is correct or missing too
if created:
print(f" - {walletname} created: only JSON, no actual uploaded scan files.", end=' ')
wallet.save()

View File

@ -323,7 +323,7 @@ class LoadingSurvex():
perps = get_people_on_trip(survexblock) # What, you don't know Judge Dredd slang ?
message = f"! DATE Warning only accurate to the month, setting to 1st '{oline}' ({survexblock}) {survexblock.survexfile.path} {perps}"
print(self.insp+message)
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path))
DataIssue.objects.create(parser='survex-date', message=message, url=get_offending_filename(survexblock.survexfile.path))
survexblock.date = datetime.strptime(line.replace('.','-'), '%Y-%m') # sets to first of month
setdate(year)
elif len(line) == 4:
@ -331,7 +331,7 @@ class LoadingSurvex():
perps = get_people_on_trip(survexblock)
message = f"! DATE WARNING only accurate to the YEAR, setting to 1st January '{oline}' ({survexblock}) {survexblock.survexfile.path} {perps}"
print(self.insp+message)
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path))
DataIssue.objects.create(parser='survex-date', message=message, url=get_offending_filename(survexblock.survexfile.path))
survexblock.date = datetime.strptime(line, '%Y') # sets to January 1st
setdate(year)
else:
@ -1546,6 +1546,7 @@ def LoadSurvexBlocks():
SurvexStation.objects.all().delete()
print(" - survex Data Issues flushed")
DataIssue.objects.filter(parser='survex').delete()
DataIssue.objects.filter(parser='survex-date').delete()
DataIssue.objects.filter(parser='survexleg').delete()
DataIssue.objects.filter(parser='survexunits').delete()
DataIssue.objects.filter(parser='entrances').delete()