2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2024-11-25 08:41:51 +00:00

Replaced maintenance headache with cleaner folder walking

This commit is contained in:
Philip Sargent 2022-09-23 23:43:34 +03:00
parent c3672b476c
commit 6e3fdd35c1
6 changed files with 107 additions and 109 deletions

View File

@ -70,27 +70,6 @@ def chaosmonkey(n):
# print("CHAOS strikes !", file=sys.stderr) # print("CHAOS strikes !", file=sys.stderr)
return True return True
#
def GetListDir(sdir):
'''handles url or file, so we can refer to a set of scans (not drawings) on another server
returns a list of f (file), ff (file full path), is_dir (bool)
REPLACE all use of this with Path.rglob() !
'''
res = [ ]
if type(sdir) is str and sdir[:7] == "http://":
# s = urllib.request.urlopen(sdir)
message = f"! Requesting loading from http:// NOT IMPLEMENTED. [{sdir}]"
print(message)
DataIssue.objects.create(parser='Drawings', message=message)
sdir[:7] = ""
for f in os.listdir(sdir):
if f[0] != ".":
ff = os.path.join(sdir, f)
res.append((f, ff, os.path.isdir(ff)))
return res
def only_commit(fname, message): def only_commit(fname, message):
'''Only used to commit a survex file edited and saved in view/survex.py '''Only used to commit a survex file edited and saved in view/survex.py
''' '''

View File

@ -208,8 +208,8 @@ def cavewallets(request, caveid):
wallets.add(z) wallets.add(z)
else: else:
wurl = f"/scanupload/{z.walletname.replace('#',':')}" wurl = f"/scanupload/{z.walletname.replace('#',':')}"
print(f' - Unrecognised cave name \'{zcaveid}\' in {z.walletname}') print(f' - Unrecognised cave name \'{zcaveid}\' in {z.walletname} (out of {len(Gcavelookup):,} cave names')
message = f" ! In {z.walletname} there is an unrecognised cave name '{zcaveid}'" message = f" ! In {z.walletname} there is an unrecognised cave name '{zcaveid}' (out of {len(Gcavelookup):,} cave names"
DataIssue.objects.update_or_create(parser='scans', message=message, url=wurl) DataIssue.objects.update_or_create(parser='scans', message=message, url=wurl)
manywallets = list(set(wallets)) manywallets = list(set(wallets))

View File

@ -208,7 +208,7 @@ def get_complaints(complaints, waldata, svxfiles, files, wallet, wurl):
if not waldata["description written"]: if not waldata["description written"]:
complaints.append("The guidebook description needs writing into the survex file. Tick the 'Cave description written' checkbox when this is done.") complaints.append("The guidebook description needs writing into the survex file. Tick the 'Cave description written' checkbox when this is done.")
# QMs # QMs
if not waldata["qms written"] and int(w.year()) >= 2015: if not waldata["qms written"] and w.year() and int(w.year()) >= 2015:
complaints.append("The QMs needs writing into the survex file. Tick the 'QMs written' checkbox when this is done.") complaints.append("The QMs needs writing into the survex file. Tick the 'QMs written' checkbox when this is done.")
# Website # Website

View File

@ -12,7 +12,7 @@ from functools import reduce
import settings import settings
from troggle.core.models.survex import SingleScan, Wallet, DrawingFile from troggle.core.models.survex import SingleScan, Wallet, DrawingFile
from troggle.core.models.troggle import DataIssue from troggle.core.models.troggle import DataIssue
from troggle.core.utils import save_carefully, GetListDir from troggle.core.utils import save_carefully
'''Searches through all the :drawings: repository looking '''Searches through all the :drawings: repository looking
for tunnel and therion files for tunnel and therion files

View File

@ -14,7 +14,7 @@ from pathlib import Path
import settings import settings
from troggle.core.models.survex import SingleScan, Wallet, DrawingFile from troggle.core.models.survex import SingleScan, Wallet, DrawingFile
from troggle.core.models.troggle import DataIssue from troggle.core.models.troggle import DataIssue
from troggle.core.utils import save_carefully, GetListDir from troggle.core.utils import save_carefully
from troggle.core.views.scans import datewallet from troggle.core.views.scans import datewallet
'''Searches through all the survey scans directories (wallets) in expofiles, looking for images to be referenced. '''Searches through all the survey scans directories (wallets) in expofiles, looking for images to be referenced.
@ -26,58 +26,54 @@ git = settings.GIT
# to do: Actually read all the JSON files and set the survex file field appropriately! # to do: Actually read all the JSON files and set the survex file field appropriately!
# def GetListDir(sdir):
# '''handles url or file, so we can refer to a set of scans (not drawings) on another server
# returns a list of f (file), ff (file full path), is_dir (bool)
def CheckEmptyDate(wallet): # REPLACE all use of this with Path.rglob() !
'''If date is not set, get it from a linked survex file. # '''
Could also look at filedates for the scans in expofiles/surveyscans/ , but these can be re-set by copying. # res = [ ]
''' # if type(sdir) is str and sdir[:7] == "http://":
earliest = datetime.datetime.now().date() # # s = urllib.request.urlopen(sdir)
# message = f"! Requesting loading from http:// NOT IMPLEMENTED. [{sdir}]"
# print(message)
# DataIssue.objects.create(parser='Drawings', message=message)
# sdir[:7] = ""
# This is not working, can't see why. An scans parser now taking a very long time.. # for f in os.listdir(sdir):
#datewallet(wallet, earliest) # if f[0] != ".":
return # ff = os.path.join(sdir, f)
# res.append((f, ff, os.path.isdir(ff)))
# return res
def CheckEmptyPeople(wallet):
'''If people list is empty, copy them from the survex files: all of them
To be a Troggle model change; a many:many relationship between wallets and people, # def LoadListScansFile(wallet):
as well as being a list in the JSON file (which is the permanent repository). We want the many:many # # formerly a generic troggle utility, written by who ? Being gradually expunged and replaced by python standard library functions
relationship so that we can filter wallets based on a person. # gld = [ ]
# # flatten out any directories in these wallet folders - should not be any
# for (fyf, ffyf, fisdiryf) in GetListDir(wallet.fpath):
# if fisdiryf:
# gld.extend(GetListDir(ffyf))
# else:
# gld.append((fyf, ffyf, fisdiryf))
For the moment, we will just get a list.. # c=0
''' # for (fyf, ffyf, fisdiryf) in gld:
return # if re.search(r"\.(?:png|jpg|jpeg|pdf|svg|gif|xvi)(?i)$", fyf):
# singlescan = SingleScan(ffile=ffyf, name=fyf, wallet=wallet)
def LoadListScansFile(wallet): # singlescan.save()
gld = [ ] # c+=1
# flatten out any directories in these wallet folders - should not be any # if c>=10:
for (fyf, ffyf, fisdiryf) in GetListDir(wallet.fpath): # print(".", end='')
if fisdiryf: # c = 0
gld.extend(GetListDir(ffyf))
else:
gld.append((fyf, ffyf, fisdiryf))
c=0
for (fyf, ffyf, fisdiryf) in gld:
if re.search(r"\.(?:png|jpg|jpeg|pdf|svg|gif|xvi)(?i)$", fyf):
singlescan = SingleScan(ffile=ffyf, name=fyf, wallet=wallet)
singlescan.save()
c+=1
if c>=10:
print(".", end='')
c = 0
def load_all_scans(): def load_all_scans():
'''This iterates through the scans directories (either here or on the remote server) '''This iterates through the scans directories (either here or on the remote server)
and builds up the models we can access later. and builds up the models we can access later.
It does NOT read or validate anything in the JSON data attached to each wallet. Those checks It does NOT read or validate anything in the JSON data attached to each wallet. Those checks
are done at runtime, when a wallet is accessed, not at import time. are done at runtime, when a wallet is accessed, not at import time.
Replace GetListDir with a more modern Path.iter idiom
path = Path("scans")
for p in path.rglob("*"):
print(p.name)
''' '''
print(' - Loading Survey Scans') print(' - Loading Survey Scans')
@ -86,6 +82,7 @@ def load_all_scans():
print(' - deleting all Wallet and SingleScan objects') print(' - deleting all Wallet and SingleScan objects')
DataIssue.objects.filter(parser='scans').delete() DataIssue.objects.filter(parser='scans').delete()
# These are valid old file types to be visible, they are not necessarily allowed to be uploaded to a new wallet.
valids = [".top",".txt",".tif",".png",".jpg",".jpeg",".pdf",".svg",".gif",".xvi", valids = [".top",".txt",".tif",".png",".jpg",".jpeg",".pdf",".svg",".gif",".xvi",
".json",".autosave",".sxd",".svx",".th",".th2",".tdr",".sql",".zip",".dxf",".3d", ".json",".autosave",".sxd",".svx",".th",".th2",".tdr",".sql",".zip",".dxf",".3d",
".ods",".csv",".xcf",".xml"] ".ods",".csv",".xcf",".xml"]
@ -95,10 +92,12 @@ def load_all_scans():
# Not all folders with files in them are wallets. # Not all folders with files in them are wallets.
# they are if they are /2010/2010#33 # they are if they are /2010/2010#33
# or /1996-1999NotKHbook/ # or /1996-1999NotKHbook/
# but not if they are /2010/1010#33/therion or /1998/ # but not if they are /2010/2010#33/therion/ : the wallet is /2010#33/ not /therion/
print(' - ', end=' ') print(' - ', end='')
scans_path = Path(settings.SCANS_ROOT) scans_path = Path(settings.SCANS_ROOT)
seen = [] seen = []
c=0
wallets = {}
for p in scans_path.rglob('*'): for p in scans_path.rglob('*'):
if p.is_file(): if p.is_file():
if p.suffix.lower() not in valids and p.name.lower() not in validnames: if p.suffix.lower() not in valids and p.name.lower() not in validnames:
@ -107,6 +106,13 @@ def load_all_scans():
elif p.parent == scans_path: # skip files directly in /surveyscans/ elif p.parent == scans_path: # skip files directly in /surveyscans/
pass pass
else: else:
c+=1
if c % 15 == 0 :
print(".", end='')
if c % 500 == 0 :
print("\n -", end='')
if p.parent.parent.parent.parent == scans_path: if p.parent.parent.parent.parent == scans_path:
# print(f"too deep {p}", end='\n') # print(f"too deep {p}", end='\n')
fpath = p.parent.parent fpath = p.parent.parent
@ -115,53 +121,65 @@ def load_all_scans():
fpath = p.parent fpath = p.parent
walletname = p.parent.name walletname = p.parent.name
# UNFINISHED if walletname in wallets:
wallet = wallets[walletname]
else:
print("", flush=True, end='')
wallet = Wallet(fpath=fpath, walletname=walletname)
wallet.save()
wallets[walletname] = wallet
singlescan = SingleScan(ffile=fpath, name=p.name, wallet=wallet)
singlescan.save()
# only printing progress:
tag = p.parent tag = p.parent
if len(walletname)>4: if len(walletname)>4:
if walletname[4] == "#": if walletname[4] == "#":
tag = p.parent.parent tag = p.parent.parent
if tag not in seen: if tag not in seen:
print(f"{tag.name}", end=' ') print(f" {tag.name} ", end='')
seen.append(tag) seen.append(tag)
#wallet = Wallet(fpath=fpath, walletname=walletname)
print('\n UNFINISHED \n\n--- ') print(f'\n - found and loaded {c:,} acceptable scan files in {len(wallets):,} wallets')
for topfolder, fpath, fisdir in GetListDir(settings.SCANS_ROOT):
if not fisdir:
continue
# do the year folders # if False:
if re.match(r"\d\d\d\d$", topfolder): # n=0
print(f"{topfolder}", end=' ') # for topfolder, fpath, fisdir in GetListDir(settings.SCANS_ROOT):
for walletname, fpath, fisdir in GetListDir(fpath): # if not fisdir:
if fisdir: # continue
wallet = Wallet(fpath=fpath, walletname=walletname)
# this is where we should record the year explicitly
# line 347 of view/uploads.py and needs refactoring for loading contentsjson
CheckEmptyDate(wallet)
CheckEmptyPeople(wallet)
wallet.save()
LoadListScansFile(wallet)
else:
# but We *should* load all the scans, even for nonstandard names.
print(f'\n - IGNORE {walletname} - {fpath}')
# but we also need to check if JSON exists, even if there are no uploaded scan files # # do the year folders
# # if re.match(r"\d\d\d\d$", topfolder):
# print(f"{topfolder}", end=' ')
# for walletname, fpath, fisdir in GetListDir(fpath):
# if fisdir:
# wallet = Wallet(fpath=fpath, walletname=walletname)
# # this is where we should record the year explicitly
# # line 347 of view/uploads.py and needs refactoring for loading contentsjson
# wallet.save()
# LoadListScansFile(wallet)
# # else:
# # # but We *should* load all the scans, even for nonstandard names.
# # print(f'\n - IGNORE {topfolder} - {fpath}')
# print("", flush=True)
# but we also need to check if JSON exists, even if there are no uploaded scan files.
# Here we know there is a rigid folder structure, so no need to look for sub folders
contents_path = Path(settings.DRAWINGS_DATA, "walletjson") contents_path = Path(settings.DRAWINGS_DATA, "walletjson")
for yeardir in contents_path.iterdir(): for yeardir in contents_path.iterdir():
if yeardir.is_dir(): if yeardir.is_dir():
for walletpath in yeardir.iterdir(): for walletpath in yeardir.iterdir():
if Path(walletpath, contentsjson).is_file(): if Path(walletpath, contentsjson).is_file():
walletname = walletpath.name walletname = walletpath.name
if walletname not in wallets:
print(f" - {walletname} creation attempting: only JSON, no actual uploaded scan files.", end=' ')
wallet, created = Wallet.objects.update_or_create(walletname=walletname) wallet, created = Wallet.objects.update_or_create(walletname=walletname)
# should now also load the json and use it ! check &ref is correct or missing too # should now also load the json and use it ! check &ref is correct or missing too
if created: if created:
print(f"\n{walletname} created: only JSON, no actual uploaded scan files.", end=' ') print(f" - {walletname} created: only JSON, no actual uploaded scan files.", end=' ')
CheckEmptyDate(wallet)
CheckEmptyPeople(wallet)
wallet.save() wallet.save()
print("", flush=True)

View File

@ -323,7 +323,7 @@ class LoadingSurvex():
perps = get_people_on_trip(survexblock) # What, you don't know Judge Dredd slang ? perps = get_people_on_trip(survexblock) # What, you don't know Judge Dredd slang ?
message = f"! DATE Warning only accurate to the month, setting to 1st '{oline}' ({survexblock}) {survexblock.survexfile.path} {perps}" message = f"! DATE Warning only accurate to the month, setting to 1st '{oline}' ({survexblock}) {survexblock.survexfile.path} {perps}"
print(self.insp+message) print(self.insp+message)
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path)) DataIssue.objects.create(parser='survex-date', message=message, url=get_offending_filename(survexblock.survexfile.path))
survexblock.date = datetime.strptime(line.replace('.','-'), '%Y-%m') # sets to first of month survexblock.date = datetime.strptime(line.replace('.','-'), '%Y-%m') # sets to first of month
setdate(year) setdate(year)
elif len(line) == 4: elif len(line) == 4:
@ -331,7 +331,7 @@ class LoadingSurvex():
perps = get_people_on_trip(survexblock) perps = get_people_on_trip(survexblock)
message = f"! DATE WARNING only accurate to the YEAR, setting to 1st January '{oline}' ({survexblock}) {survexblock.survexfile.path} {perps}" message = f"! DATE WARNING only accurate to the YEAR, setting to 1st January '{oline}' ({survexblock}) {survexblock.survexfile.path} {perps}"
print(self.insp+message) print(self.insp+message)
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path)) DataIssue.objects.create(parser='survex-date', message=message, url=get_offending_filename(survexblock.survexfile.path))
survexblock.date = datetime.strptime(line, '%Y') # sets to January 1st survexblock.date = datetime.strptime(line, '%Y') # sets to January 1st
setdate(year) setdate(year)
else: else:
@ -1546,6 +1546,7 @@ def LoadSurvexBlocks():
SurvexStation.objects.all().delete() SurvexStation.objects.all().delete()
print(" - survex Data Issues flushed") print(" - survex Data Issues flushed")
DataIssue.objects.filter(parser='survex').delete() DataIssue.objects.filter(parser='survex').delete()
DataIssue.objects.filter(parser='survex-date').delete()
DataIssue.objects.filter(parser='survexleg').delete() DataIssue.objects.filter(parser='survexleg').delete()
DataIssue.objects.filter(parser='survexunits').delete() DataIssue.objects.filter(parser='survexunits').delete()
DataIssue.objects.filter(parser='entrances').delete() DataIssue.objects.filter(parser='entrances').delete()