forked from expo/troggle
Replaced maintenance headache with cleaner folder walking
This commit is contained in:
parent
c3672b476c
commit
6e3fdd35c1
@ -70,27 +70,6 @@ def chaosmonkey(n):
|
||||
# print("CHAOS strikes !", file=sys.stderr)
|
||||
return True
|
||||
|
||||
#
|
||||
def GetListDir(sdir):
|
||||
'''handles url or file, so we can refer to a set of scans (not drawings) on another server
|
||||
returns a list of f (file), ff (file full path), is_dir (bool)
|
||||
|
||||
REPLACE all use of this with Path.rglob() !
|
||||
'''
|
||||
res = [ ]
|
||||
if type(sdir) is str and sdir[:7] == "http://":
|
||||
# s = urllib.request.urlopen(sdir)
|
||||
message = f"! Requesting loading from http:// NOT IMPLEMENTED. [{sdir}]"
|
||||
print(message)
|
||||
DataIssue.objects.create(parser='Drawings', message=message)
|
||||
sdir[:7] = ""
|
||||
|
||||
for f in os.listdir(sdir):
|
||||
if f[0] != ".":
|
||||
ff = os.path.join(sdir, f)
|
||||
res.append((f, ff, os.path.isdir(ff)))
|
||||
return res
|
||||
|
||||
def only_commit(fname, message):
|
||||
'''Only used to commit a survex file edited and saved in view/survex.py
|
||||
'''
|
||||
|
@ -208,8 +208,8 @@ def cavewallets(request, caveid):
|
||||
wallets.add(z)
|
||||
else:
|
||||
wurl = f"/scanupload/{z.walletname.replace('#',':')}"
|
||||
print(f' - Unrecognised cave name \'{zcaveid}\' in {z.walletname}')
|
||||
message = f" ! In {z.walletname} there is an unrecognised cave name '{zcaveid}'"
|
||||
print(f' - Unrecognised cave name \'{zcaveid}\' in {z.walletname} (out of {len(Gcavelookup):,} cave names')
|
||||
message = f" ! In {z.walletname} there is an unrecognised cave name '{zcaveid}' (out of {len(Gcavelookup):,} cave names"
|
||||
DataIssue.objects.update_or_create(parser='scans', message=message, url=wurl)
|
||||
|
||||
manywallets = list(set(wallets))
|
||||
|
@ -208,7 +208,7 @@ def get_complaints(complaints, waldata, svxfiles, files, wallet, wurl):
|
||||
if not waldata["description written"]:
|
||||
complaints.append("The guidebook description needs writing into the survex file. Tick the 'Cave description written' checkbox when this is done.")
|
||||
# QMs
|
||||
if not waldata["qms written"] and int(w.year()) >= 2015:
|
||||
if not waldata["qms written"] and w.year() and int(w.year()) >= 2015:
|
||||
complaints.append("The QMs needs writing into the survex file. Tick the 'QMs written' checkbox when this is done.")
|
||||
|
||||
# Website
|
||||
|
@ -12,7 +12,7 @@ from functools import reduce
|
||||
import settings
|
||||
from troggle.core.models.survex import SingleScan, Wallet, DrawingFile
|
||||
from troggle.core.models.troggle import DataIssue
|
||||
from troggle.core.utils import save_carefully, GetListDir
|
||||
from troggle.core.utils import save_carefully
|
||||
|
||||
'''Searches through all the :drawings: repository looking
|
||||
for tunnel and therion files
|
||||
|
182
parsers/scans.py
182
parsers/scans.py
@ -14,7 +14,7 @@ from pathlib import Path
|
||||
import settings
|
||||
from troggle.core.models.survex import SingleScan, Wallet, DrawingFile
|
||||
from troggle.core.models.troggle import DataIssue
|
||||
from troggle.core.utils import save_carefully, GetListDir
|
||||
from troggle.core.utils import save_carefully
|
||||
from troggle.core.views.scans import datewallet
|
||||
|
||||
'''Searches through all the survey scans directories (wallets) in expofiles, looking for images to be referenced.
|
||||
@ -26,66 +26,63 @@ git = settings.GIT
|
||||
|
||||
# to do: Actually read all the JSON files and set the survex file field appropriately!
|
||||
|
||||
|
||||
def CheckEmptyDate(wallet):
|
||||
'''If date is not set, get it from a linked survex file.
|
||||
Could also look at filedates for the scans in expofiles/surveyscans/ , but these can be re-set by copying.
|
||||
'''
|
||||
earliest = datetime.datetime.now().date()
|
||||
# def GetListDir(sdir):
|
||||
# '''handles url or file, so we can refer to a set of scans (not drawings) on another server
|
||||
# returns a list of f (file), ff (file full path), is_dir (bool)
|
||||
|
||||
# This is not working, can't see why. An scans parser now taking a very long time..
|
||||
#datewallet(wallet, earliest)
|
||||
return
|
||||
|
||||
def CheckEmptyPeople(wallet):
|
||||
'''If people list is empty, copy them from the survex files: all of them
|
||||
|
||||
To be a Troggle model change; a many:many relationship between wallets and people,
|
||||
as well as being a list in the JSON file (which is the permanent repository). We want the many:many
|
||||
relationship so that we can filter wallets based on a person.
|
||||
|
||||
For the moment, we will just get a list..
|
||||
'''
|
||||
return
|
||||
# REPLACE all use of this with Path.rglob() !
|
||||
# '''
|
||||
# res = [ ]
|
||||
# if type(sdir) is str and sdir[:7] == "http://":
|
||||
# # s = urllib.request.urlopen(sdir)
|
||||
# message = f"! Requesting loading from http:// NOT IMPLEMENTED. [{sdir}]"
|
||||
# print(message)
|
||||
# DataIssue.objects.create(parser='Drawings', message=message)
|
||||
# sdir[:7] = ""
|
||||
|
||||
def LoadListScansFile(wallet):
|
||||
gld = [ ]
|
||||
# flatten out any directories in these wallet folders - should not be any
|
||||
for (fyf, ffyf, fisdiryf) in GetListDir(wallet.fpath):
|
||||
if fisdiryf:
|
||||
gld.extend(GetListDir(ffyf))
|
||||
else:
|
||||
gld.append((fyf, ffyf, fisdiryf))
|
||||
# for f in os.listdir(sdir):
|
||||
# if f[0] != ".":
|
||||
# ff = os.path.join(sdir, f)
|
||||
# res.append((f, ff, os.path.isdir(ff)))
|
||||
# return res
|
||||
|
||||
|
||||
# def LoadListScansFile(wallet):
|
||||
# # formerly a generic troggle utility, written by who ? Being gradually expunged and replaced by python standard library functions
|
||||
# gld = [ ]
|
||||
# # flatten out any directories in these wallet folders - should not be any
|
||||
# for (fyf, ffyf, fisdiryf) in GetListDir(wallet.fpath):
|
||||
# if fisdiryf:
|
||||
# gld.extend(GetListDir(ffyf))
|
||||
# else:
|
||||
# gld.append((fyf, ffyf, fisdiryf))
|
||||
|
||||
c=0
|
||||
for (fyf, ffyf, fisdiryf) in gld:
|
||||
if re.search(r"\.(?:png|jpg|jpeg|pdf|svg|gif|xvi)(?i)$", fyf):
|
||||
singlescan = SingleScan(ffile=ffyf, name=fyf, wallet=wallet)
|
||||
singlescan.save()
|
||||
c+=1
|
||||
if c>=10:
|
||||
print(".", end='')
|
||||
c = 0
|
||||
# c=0
|
||||
# for (fyf, ffyf, fisdiryf) in gld:
|
||||
# if re.search(r"\.(?:png|jpg|jpeg|pdf|svg|gif|xvi)(?i)$", fyf):
|
||||
# singlescan = SingleScan(ffile=ffyf, name=fyf, wallet=wallet)
|
||||
# singlescan.save()
|
||||
# c+=1
|
||||
# if c>=10:
|
||||
# print(".", end='')
|
||||
# c = 0
|
||||
|
||||
def load_all_scans():
|
||||
'''This iterates through the scans directories (either here or on the remote server)
|
||||
and builds up the models we can access later.
|
||||
|
||||
It does NOT read or validate anything in the JSON data attached to each wallet. Those checks
|
||||
are done at runtime, when a wallet is accessed, not at import time.
|
||||
|
||||
Replace GetListDir with a more modern Path.iter idiom
|
||||
path = Path("scans")
|
||||
for p in path.rglob("*"):
|
||||
print(p.name)
|
||||
|
||||
'''
|
||||
print(' - Loading Survey Scans')
|
||||
|
||||
SingleScan.objects.all().delete()
|
||||
Wallet.objects.all().delete()
|
||||
print(' - deleting all Wallet and SingleScan objects')
|
||||
print(' - deleting all Wallet and SingleScan objects')
|
||||
DataIssue.objects.filter(parser='scans').delete()
|
||||
|
||||
# These are valid old file types to be visible, they are not necessarily allowed to be uploaded to a new wallet.
|
||||
valids = [".top",".txt",".tif",".png",".jpg",".jpeg",".pdf",".svg",".gif",".xvi",
|
||||
".json",".autosave",".sxd",".svx",".th",".th2",".tdr",".sql",".zip",".dxf",".3d",
|
||||
".ods",".csv",".xcf",".xml"]
|
||||
@ -95,10 +92,12 @@ def load_all_scans():
|
||||
# Not all folders with files in them are wallets.
|
||||
# they are if they are /2010/2010#33
|
||||
# or /1996-1999NotKHbook/
|
||||
# but not if they are /2010/1010#33/therion or /1998/
|
||||
print(' - ', end=' ')
|
||||
# but not if they are /2010/2010#33/therion/ : the wallet is /2010#33/ not /therion/
|
||||
print(' - ', end='')
|
||||
scans_path = Path(settings.SCANS_ROOT)
|
||||
seen = []
|
||||
c=0
|
||||
wallets = {}
|
||||
for p in scans_path.rglob('*'):
|
||||
if p.is_file():
|
||||
if p.suffix.lower() not in valids and p.name.lower() not in validnames:
|
||||
@ -107,6 +106,13 @@ def load_all_scans():
|
||||
elif p.parent == scans_path: # skip files directly in /surveyscans/
|
||||
pass
|
||||
else:
|
||||
|
||||
c+=1
|
||||
if c % 15 == 0 :
|
||||
print(".", end='')
|
||||
if c % 500 == 0 :
|
||||
print("\n -", end='')
|
||||
|
||||
if p.parent.parent.parent.parent == scans_path:
|
||||
# print(f"too deep {p}", end='\n')
|
||||
fpath = p.parent.parent
|
||||
@ -114,54 +120,66 @@ def load_all_scans():
|
||||
else:
|
||||
fpath = p.parent
|
||||
walletname = p.parent.name
|
||||
|
||||
# UNFINISHED
|
||||
|
||||
if walletname in wallets:
|
||||
wallet = wallets[walletname]
|
||||
else:
|
||||
print("", flush=True, end='')
|
||||
wallet = Wallet(fpath=fpath, walletname=walletname)
|
||||
wallet.save()
|
||||
wallets[walletname] = wallet
|
||||
|
||||
singlescan = SingleScan(ffile=fpath, name=p.name, wallet=wallet)
|
||||
singlescan.save()
|
||||
|
||||
|
||||
# only printing progress:
|
||||
tag = p.parent
|
||||
if len(walletname)>4:
|
||||
if walletname[4] == "#":
|
||||
tag = p.parent.parent
|
||||
|
||||
if tag not in seen:
|
||||
print(f"{tag.name}", end=' ')
|
||||
print(f" {tag.name} ", end='')
|
||||
seen.append(tag)
|
||||
#wallet = Wallet(fpath=fpath, walletname=walletname)
|
||||
|
||||
|
||||
|
||||
print('\n UNFINISHED \n\n--- ')
|
||||
for topfolder, fpath, fisdir in GetListDir(settings.SCANS_ROOT):
|
||||
if not fisdir:
|
||||
continue
|
||||
print(f'\n - found and loaded {c:,} acceptable scan files in {len(wallets):,} wallets')
|
||||
|
||||
# if False:
|
||||
# n=0
|
||||
# for topfolder, fpath, fisdir in GetListDir(settings.SCANS_ROOT):
|
||||
# if not fisdir:
|
||||
# continue
|
||||
|
||||
# do the year folders
|
||||
if re.match(r"\d\d\d\d$", topfolder):
|
||||
print(f"{topfolder}", end=' ')
|
||||
for walletname, fpath, fisdir in GetListDir(fpath):
|
||||
if fisdir:
|
||||
wallet = Wallet(fpath=fpath, walletname=walletname)
|
||||
# this is where we should record the year explicitly
|
||||
# line 347 of view/uploads.py and needs refactoring for loading contentsjson
|
||||
CheckEmptyDate(wallet)
|
||||
CheckEmptyPeople(wallet)
|
||||
wallet.save()
|
||||
LoadListScansFile(wallet)
|
||||
else:
|
||||
# but We *should* load all the scans, even for nonstandard names.
|
||||
print(f'\n - IGNORE {walletname} - {fpath}')
|
||||
|
||||
# but we also need to check if JSON exists, even if there are no uploaded scan files
|
||||
# # do the year folders
|
||||
# # if re.match(r"\d\d\d\d$", topfolder):
|
||||
# print(f"{topfolder}", end=' ')
|
||||
# for walletname, fpath, fisdir in GetListDir(fpath):
|
||||
# if fisdir:
|
||||
# wallet = Wallet(fpath=fpath, walletname=walletname)
|
||||
# # this is where we should record the year explicitly
|
||||
# # line 347 of view/uploads.py and needs refactoring for loading contentsjson
|
||||
# wallet.save()
|
||||
# LoadListScansFile(wallet)
|
||||
# # else:
|
||||
# # # but We *should* load all the scans, even for nonstandard names.
|
||||
# # print(f'\n - IGNORE {topfolder} - {fpath}')
|
||||
# print("", flush=True)
|
||||
|
||||
# but we also need to check if JSON exists, even if there are no uploaded scan files.
|
||||
# Here we know there is a rigid folder structure, so no need to look for sub folders
|
||||
contents_path = Path(settings.DRAWINGS_DATA, "walletjson")
|
||||
for yeardir in contents_path.iterdir():
|
||||
if yeardir.is_dir():
|
||||
for walletpath in yeardir.iterdir():
|
||||
if Path(walletpath, contentsjson).is_file():
|
||||
walletname = walletpath.name
|
||||
wallet, created = Wallet.objects.update_or_create(walletname=walletname)
|
||||
# should now also load the json and use it ! check &ref is correct or missing too
|
||||
if created:
|
||||
print(f"\n{walletname} created: only JSON, no actual uploaded scan files.", end=' ')
|
||||
CheckEmptyDate(wallet)
|
||||
CheckEmptyPeople(wallet)
|
||||
wallet.save()
|
||||
|
||||
|
||||
print("", flush=True)
|
||||
|
||||
if walletname not in wallets:
|
||||
print(f" - {walletname} creation attempting: only JSON, no actual uploaded scan files.", end=' ')
|
||||
wallet, created = Wallet.objects.update_or_create(walletname=walletname)
|
||||
# should now also load the json and use it ! check &ref is correct or missing too
|
||||
if created:
|
||||
print(f" - {walletname} created: only JSON, no actual uploaded scan files.", end=' ')
|
||||
wallet.save()
|
||||
|
@ -323,7 +323,7 @@ class LoadingSurvex():
|
||||
perps = get_people_on_trip(survexblock) # What, you don't know Judge Dredd slang ?
|
||||
message = f"! DATE Warning only accurate to the month, setting to 1st '{oline}' ({survexblock}) {survexblock.survexfile.path} {perps}"
|
||||
print(self.insp+message)
|
||||
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path))
|
||||
DataIssue.objects.create(parser='survex-date', message=message, url=get_offending_filename(survexblock.survexfile.path))
|
||||
survexblock.date = datetime.strptime(line.replace('.','-'), '%Y-%m') # sets to first of month
|
||||
setdate(year)
|
||||
elif len(line) == 4:
|
||||
@ -331,7 +331,7 @@ class LoadingSurvex():
|
||||
perps = get_people_on_trip(survexblock)
|
||||
message = f"! DATE WARNING only accurate to the YEAR, setting to 1st January '{oline}' ({survexblock}) {survexblock.survexfile.path} {perps}"
|
||||
print(self.insp+message)
|
||||
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path))
|
||||
DataIssue.objects.create(parser='survex-date', message=message, url=get_offending_filename(survexblock.survexfile.path))
|
||||
survexblock.date = datetime.strptime(line, '%Y') # sets to January 1st
|
||||
setdate(year)
|
||||
else:
|
||||
@ -1546,6 +1546,7 @@ def LoadSurvexBlocks():
|
||||
SurvexStation.objects.all().delete()
|
||||
print(" - survex Data Issues flushed")
|
||||
DataIssue.objects.filter(parser='survex').delete()
|
||||
DataIssue.objects.filter(parser='survex-date').delete()
|
||||
DataIssue.objects.filter(parser='survexleg').delete()
|
||||
DataIssue.objects.filter(parser='survexunits').delete()
|
||||
DataIssue.objects.filter(parser='entrances').delete()
|
||||
|
Loading…
Reference in New Issue
Block a user