mirror of
https://expo.survex.com/repositories/troggle/.git
synced 2024-11-25 08:41:51 +00:00
Replaced maintenance headache with cleaner folder walking
This commit is contained in:
parent
c3672b476c
commit
6e3fdd35c1
@ -70,27 +70,6 @@ def chaosmonkey(n):
|
|||||||
# print("CHAOS strikes !", file=sys.stderr)
|
# print("CHAOS strikes !", file=sys.stderr)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
#
|
|
||||||
def GetListDir(sdir):
|
|
||||||
'''handles url or file, so we can refer to a set of scans (not drawings) on another server
|
|
||||||
returns a list of f (file), ff (file full path), is_dir (bool)
|
|
||||||
|
|
||||||
REPLACE all use of this with Path.rglob() !
|
|
||||||
'''
|
|
||||||
res = [ ]
|
|
||||||
if type(sdir) is str and sdir[:7] == "http://":
|
|
||||||
# s = urllib.request.urlopen(sdir)
|
|
||||||
message = f"! Requesting loading from http:// NOT IMPLEMENTED. [{sdir}]"
|
|
||||||
print(message)
|
|
||||||
DataIssue.objects.create(parser='Drawings', message=message)
|
|
||||||
sdir[:7] = ""
|
|
||||||
|
|
||||||
for f in os.listdir(sdir):
|
|
||||||
if f[0] != ".":
|
|
||||||
ff = os.path.join(sdir, f)
|
|
||||||
res.append((f, ff, os.path.isdir(ff)))
|
|
||||||
return res
|
|
||||||
|
|
||||||
def only_commit(fname, message):
|
def only_commit(fname, message):
|
||||||
'''Only used to commit a survex file edited and saved in view/survex.py
|
'''Only used to commit a survex file edited and saved in view/survex.py
|
||||||
'''
|
'''
|
||||||
|
@ -208,8 +208,8 @@ def cavewallets(request, caveid):
|
|||||||
wallets.add(z)
|
wallets.add(z)
|
||||||
else:
|
else:
|
||||||
wurl = f"/scanupload/{z.walletname.replace('#',':')}"
|
wurl = f"/scanupload/{z.walletname.replace('#',':')}"
|
||||||
print(f' - Unrecognised cave name \'{zcaveid}\' in {z.walletname}')
|
print(f' - Unrecognised cave name \'{zcaveid}\' in {z.walletname} (out of {len(Gcavelookup):,} cave names')
|
||||||
message = f" ! In {z.walletname} there is an unrecognised cave name '{zcaveid}'"
|
message = f" ! In {z.walletname} there is an unrecognised cave name '{zcaveid}' (out of {len(Gcavelookup):,} cave names"
|
||||||
DataIssue.objects.update_or_create(parser='scans', message=message, url=wurl)
|
DataIssue.objects.update_or_create(parser='scans', message=message, url=wurl)
|
||||||
|
|
||||||
manywallets = list(set(wallets))
|
manywallets = list(set(wallets))
|
||||||
|
@ -208,7 +208,7 @@ def get_complaints(complaints, waldata, svxfiles, files, wallet, wurl):
|
|||||||
if not waldata["description written"]:
|
if not waldata["description written"]:
|
||||||
complaints.append("The guidebook description needs writing into the survex file. Tick the 'Cave description written' checkbox when this is done.")
|
complaints.append("The guidebook description needs writing into the survex file. Tick the 'Cave description written' checkbox when this is done.")
|
||||||
# QMs
|
# QMs
|
||||||
if not waldata["qms written"] and int(w.year()) >= 2015:
|
if not waldata["qms written"] and w.year() and int(w.year()) >= 2015:
|
||||||
complaints.append("The QMs needs writing into the survex file. Tick the 'QMs written' checkbox when this is done.")
|
complaints.append("The QMs needs writing into the survex file. Tick the 'QMs written' checkbox when this is done.")
|
||||||
|
|
||||||
# Website
|
# Website
|
||||||
|
@ -12,7 +12,7 @@ from functools import reduce
|
|||||||
import settings
|
import settings
|
||||||
from troggle.core.models.survex import SingleScan, Wallet, DrawingFile
|
from troggle.core.models.survex import SingleScan, Wallet, DrawingFile
|
||||||
from troggle.core.models.troggle import DataIssue
|
from troggle.core.models.troggle import DataIssue
|
||||||
from troggle.core.utils import save_carefully, GetListDir
|
from troggle.core.utils import save_carefully
|
||||||
|
|
||||||
'''Searches through all the :drawings: repository looking
|
'''Searches through all the :drawings: repository looking
|
||||||
for tunnel and therion files
|
for tunnel and therion files
|
||||||
|
162
parsers/scans.py
162
parsers/scans.py
@ -14,7 +14,7 @@ from pathlib import Path
|
|||||||
import settings
|
import settings
|
||||||
from troggle.core.models.survex import SingleScan, Wallet, DrawingFile
|
from troggle.core.models.survex import SingleScan, Wallet, DrawingFile
|
||||||
from troggle.core.models.troggle import DataIssue
|
from troggle.core.models.troggle import DataIssue
|
||||||
from troggle.core.utils import save_carefully, GetListDir
|
from troggle.core.utils import save_carefully
|
||||||
from troggle.core.views.scans import datewallet
|
from troggle.core.views.scans import datewallet
|
||||||
|
|
||||||
'''Searches through all the survey scans directories (wallets) in expofiles, looking for images to be referenced.
|
'''Searches through all the survey scans directories (wallets) in expofiles, looking for images to be referenced.
|
||||||
@ -26,58 +26,54 @@ git = settings.GIT
|
|||||||
|
|
||||||
# to do: Actually read all the JSON files and set the survex file field appropriately!
|
# to do: Actually read all the JSON files and set the survex file field appropriately!
|
||||||
|
|
||||||
|
# def GetListDir(sdir):
|
||||||
|
# '''handles url or file, so we can refer to a set of scans (not drawings) on another server
|
||||||
|
# returns a list of f (file), ff (file full path), is_dir (bool)
|
||||||
|
|
||||||
def CheckEmptyDate(wallet):
|
# REPLACE all use of this with Path.rglob() !
|
||||||
'''If date is not set, get it from a linked survex file.
|
# '''
|
||||||
Could also look at filedates for the scans in expofiles/surveyscans/ , but these can be re-set by copying.
|
# res = [ ]
|
||||||
'''
|
# if type(sdir) is str and sdir[:7] == "http://":
|
||||||
earliest = datetime.datetime.now().date()
|
# # s = urllib.request.urlopen(sdir)
|
||||||
|
# message = f"! Requesting loading from http:// NOT IMPLEMENTED. [{sdir}]"
|
||||||
|
# print(message)
|
||||||
|
# DataIssue.objects.create(parser='Drawings', message=message)
|
||||||
|
# sdir[:7] = ""
|
||||||
|
|
||||||
# This is not working, can't see why. An scans parser now taking a very long time..
|
# for f in os.listdir(sdir):
|
||||||
#datewallet(wallet, earliest)
|
# if f[0] != ".":
|
||||||
return
|
# ff = os.path.join(sdir, f)
|
||||||
|
# res.append((f, ff, os.path.isdir(ff)))
|
||||||
|
# return res
|
||||||
|
|
||||||
def CheckEmptyPeople(wallet):
|
|
||||||
'''If people list is empty, copy them from the survex files: all of them
|
|
||||||
|
|
||||||
To be a Troggle model change; a many:many relationship between wallets and people,
|
# def LoadListScansFile(wallet):
|
||||||
as well as being a list in the JSON file (which is the permanent repository). We want the many:many
|
# # formerly a generic troggle utility, written by who ? Being gradually expunged and replaced by python standard library functions
|
||||||
relationship so that we can filter wallets based on a person.
|
# gld = [ ]
|
||||||
|
# # flatten out any directories in these wallet folders - should not be any
|
||||||
|
# for (fyf, ffyf, fisdiryf) in GetListDir(wallet.fpath):
|
||||||
|
# if fisdiryf:
|
||||||
|
# gld.extend(GetListDir(ffyf))
|
||||||
|
# else:
|
||||||
|
# gld.append((fyf, ffyf, fisdiryf))
|
||||||
|
|
||||||
For the moment, we will just get a list..
|
# c=0
|
||||||
'''
|
# for (fyf, ffyf, fisdiryf) in gld:
|
||||||
return
|
# if re.search(r"\.(?:png|jpg|jpeg|pdf|svg|gif|xvi)(?i)$", fyf):
|
||||||
|
# singlescan = SingleScan(ffile=ffyf, name=fyf, wallet=wallet)
|
||||||
def LoadListScansFile(wallet):
|
# singlescan.save()
|
||||||
gld = [ ]
|
# c+=1
|
||||||
# flatten out any directories in these wallet folders - should not be any
|
# if c>=10:
|
||||||
for (fyf, ffyf, fisdiryf) in GetListDir(wallet.fpath):
|
# print(".", end='')
|
||||||
if fisdiryf:
|
# c = 0
|
||||||
gld.extend(GetListDir(ffyf))
|
|
||||||
else:
|
|
||||||
gld.append((fyf, ffyf, fisdiryf))
|
|
||||||
|
|
||||||
c=0
|
|
||||||
for (fyf, ffyf, fisdiryf) in gld:
|
|
||||||
if re.search(r"\.(?:png|jpg|jpeg|pdf|svg|gif|xvi)(?i)$", fyf):
|
|
||||||
singlescan = SingleScan(ffile=ffyf, name=fyf, wallet=wallet)
|
|
||||||
singlescan.save()
|
|
||||||
c+=1
|
|
||||||
if c>=10:
|
|
||||||
print(".", end='')
|
|
||||||
c = 0
|
|
||||||
|
|
||||||
def load_all_scans():
|
def load_all_scans():
|
||||||
'''This iterates through the scans directories (either here or on the remote server)
|
'''This iterates through the scans directories (either here or on the remote server)
|
||||||
and builds up the models we can access later.
|
and builds up the models we can access later.
|
||||||
|
|
||||||
It does NOT read or validate anything in the JSON data attached to each wallet. Those checks
|
It does NOT read or validate anything in the JSON data attached to each wallet. Those checks
|
||||||
are done at runtime, when a wallet is accessed, not at import time.
|
are done at runtime, when a wallet is accessed, not at import time.
|
||||||
|
|
||||||
Replace GetListDir with a more modern Path.iter idiom
|
|
||||||
path = Path("scans")
|
|
||||||
for p in path.rglob("*"):
|
|
||||||
print(p.name)
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
print(' - Loading Survey Scans')
|
print(' - Loading Survey Scans')
|
||||||
|
|
||||||
@ -86,6 +82,7 @@ def load_all_scans():
|
|||||||
print(' - deleting all Wallet and SingleScan objects')
|
print(' - deleting all Wallet and SingleScan objects')
|
||||||
DataIssue.objects.filter(parser='scans').delete()
|
DataIssue.objects.filter(parser='scans').delete()
|
||||||
|
|
||||||
|
# These are valid old file types to be visible, they are not necessarily allowed to be uploaded to a new wallet.
|
||||||
valids = [".top",".txt",".tif",".png",".jpg",".jpeg",".pdf",".svg",".gif",".xvi",
|
valids = [".top",".txt",".tif",".png",".jpg",".jpeg",".pdf",".svg",".gif",".xvi",
|
||||||
".json",".autosave",".sxd",".svx",".th",".th2",".tdr",".sql",".zip",".dxf",".3d",
|
".json",".autosave",".sxd",".svx",".th",".th2",".tdr",".sql",".zip",".dxf",".3d",
|
||||||
".ods",".csv",".xcf",".xml"]
|
".ods",".csv",".xcf",".xml"]
|
||||||
@ -95,10 +92,12 @@ def load_all_scans():
|
|||||||
# Not all folders with files in them are wallets.
|
# Not all folders with files in them are wallets.
|
||||||
# they are if they are /2010/2010#33
|
# they are if they are /2010/2010#33
|
||||||
# or /1996-1999NotKHbook/
|
# or /1996-1999NotKHbook/
|
||||||
# but not if they are /2010/1010#33/therion or /1998/
|
# but not if they are /2010/2010#33/therion/ : the wallet is /2010#33/ not /therion/
|
||||||
print(' - ', end=' ')
|
print(' - ', end='')
|
||||||
scans_path = Path(settings.SCANS_ROOT)
|
scans_path = Path(settings.SCANS_ROOT)
|
||||||
seen = []
|
seen = []
|
||||||
|
c=0
|
||||||
|
wallets = {}
|
||||||
for p in scans_path.rglob('*'):
|
for p in scans_path.rglob('*'):
|
||||||
if p.is_file():
|
if p.is_file():
|
||||||
if p.suffix.lower() not in valids and p.name.lower() not in validnames:
|
if p.suffix.lower() not in valids and p.name.lower() not in validnames:
|
||||||
@ -107,6 +106,13 @@ def load_all_scans():
|
|||||||
elif p.parent == scans_path: # skip files directly in /surveyscans/
|
elif p.parent == scans_path: # skip files directly in /surveyscans/
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
|
|
||||||
|
c+=1
|
||||||
|
if c % 15 == 0 :
|
||||||
|
print(".", end='')
|
||||||
|
if c % 500 == 0 :
|
||||||
|
print("\n -", end='')
|
||||||
|
|
||||||
if p.parent.parent.parent.parent == scans_path:
|
if p.parent.parent.parent.parent == scans_path:
|
||||||
# print(f"too deep {p}", end='\n')
|
# print(f"too deep {p}", end='\n')
|
||||||
fpath = p.parent.parent
|
fpath = p.parent.parent
|
||||||
@ -115,53 +121,65 @@ def load_all_scans():
|
|||||||
fpath = p.parent
|
fpath = p.parent
|
||||||
walletname = p.parent.name
|
walletname = p.parent.name
|
||||||
|
|
||||||
# UNFINISHED
|
if walletname in wallets:
|
||||||
|
wallet = wallets[walletname]
|
||||||
|
else:
|
||||||
|
print("", flush=True, end='')
|
||||||
|
wallet = Wallet(fpath=fpath, walletname=walletname)
|
||||||
|
wallet.save()
|
||||||
|
wallets[walletname] = wallet
|
||||||
|
|
||||||
|
singlescan = SingleScan(ffile=fpath, name=p.name, wallet=wallet)
|
||||||
|
singlescan.save()
|
||||||
|
|
||||||
|
|
||||||
|
# only printing progress:
|
||||||
tag = p.parent
|
tag = p.parent
|
||||||
if len(walletname)>4:
|
if len(walletname)>4:
|
||||||
if walletname[4] == "#":
|
if walletname[4] == "#":
|
||||||
tag = p.parent.parent
|
tag = p.parent.parent
|
||||||
|
|
||||||
if tag not in seen:
|
if tag not in seen:
|
||||||
print(f"{tag.name}", end=' ')
|
print(f" {tag.name} ", end='')
|
||||||
seen.append(tag)
|
seen.append(tag)
|
||||||
#wallet = Wallet(fpath=fpath, walletname=walletname)
|
|
||||||
|
|
||||||
|
|
||||||
print('\n UNFINISHED \n\n--- ')
|
print(f'\n - found and loaded {c:,} acceptable scan files in {len(wallets):,} wallets')
|
||||||
for topfolder, fpath, fisdir in GetListDir(settings.SCANS_ROOT):
|
|
||||||
if not fisdir:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# do the year folders
|
# if False:
|
||||||
if re.match(r"\d\d\d\d$", topfolder):
|
# n=0
|
||||||
print(f"{topfolder}", end=' ')
|
# for topfolder, fpath, fisdir in GetListDir(settings.SCANS_ROOT):
|
||||||
for walletname, fpath, fisdir in GetListDir(fpath):
|
# if not fisdir:
|
||||||
if fisdir:
|
# continue
|
||||||
wallet = Wallet(fpath=fpath, walletname=walletname)
|
|
||||||
# this is where we should record the year explicitly
|
|
||||||
# line 347 of view/uploads.py and needs refactoring for loading contentsjson
|
|
||||||
CheckEmptyDate(wallet)
|
|
||||||
CheckEmptyPeople(wallet)
|
|
||||||
wallet.save()
|
|
||||||
LoadListScansFile(wallet)
|
|
||||||
else:
|
|
||||||
# but We *should* load all the scans, even for nonstandard names.
|
|
||||||
print(f'\n - IGNORE {walletname} - {fpath}')
|
|
||||||
|
|
||||||
# but we also need to check if JSON exists, even if there are no uploaded scan files
|
# # do the year folders
|
||||||
|
# # if re.match(r"\d\d\d\d$", topfolder):
|
||||||
|
# print(f"{topfolder}", end=' ')
|
||||||
|
# for walletname, fpath, fisdir in GetListDir(fpath):
|
||||||
|
# if fisdir:
|
||||||
|
# wallet = Wallet(fpath=fpath, walletname=walletname)
|
||||||
|
# # this is where we should record the year explicitly
|
||||||
|
# # line 347 of view/uploads.py and needs refactoring for loading contentsjson
|
||||||
|
# wallet.save()
|
||||||
|
# LoadListScansFile(wallet)
|
||||||
|
# # else:
|
||||||
|
# # # but We *should* load all the scans, even for nonstandard names.
|
||||||
|
# # print(f'\n - IGNORE {topfolder} - {fpath}')
|
||||||
|
# print("", flush=True)
|
||||||
|
|
||||||
|
# but we also need to check if JSON exists, even if there are no uploaded scan files.
|
||||||
|
# Here we know there is a rigid folder structure, so no need to look for sub folders
|
||||||
contents_path = Path(settings.DRAWINGS_DATA, "walletjson")
|
contents_path = Path(settings.DRAWINGS_DATA, "walletjson")
|
||||||
for yeardir in contents_path.iterdir():
|
for yeardir in contents_path.iterdir():
|
||||||
if yeardir.is_dir():
|
if yeardir.is_dir():
|
||||||
for walletpath in yeardir.iterdir():
|
for walletpath in yeardir.iterdir():
|
||||||
if Path(walletpath, contentsjson).is_file():
|
if Path(walletpath, contentsjson).is_file():
|
||||||
walletname = walletpath.name
|
walletname = walletpath.name
|
||||||
|
|
||||||
|
if walletname not in wallets:
|
||||||
|
print(f" - {walletname} creation attempting: only JSON, no actual uploaded scan files.", end=' ')
|
||||||
wallet, created = Wallet.objects.update_or_create(walletname=walletname)
|
wallet, created = Wallet.objects.update_or_create(walletname=walletname)
|
||||||
# should now also load the json and use it ! check &ref is correct or missing too
|
# should now also load the json and use it ! check &ref is correct or missing too
|
||||||
if created:
|
if created:
|
||||||
print(f"\n{walletname} created: only JSON, no actual uploaded scan files.", end=' ')
|
print(f" - {walletname} created: only JSON, no actual uploaded scan files.", end=' ')
|
||||||
CheckEmptyDate(wallet)
|
|
||||||
CheckEmptyPeople(wallet)
|
|
||||||
wallet.save()
|
wallet.save()
|
||||||
|
|
||||||
|
|
||||||
print("", flush=True)
|
|
||||||
|
@ -323,7 +323,7 @@ class LoadingSurvex():
|
|||||||
perps = get_people_on_trip(survexblock) # What, you don't know Judge Dredd slang ?
|
perps = get_people_on_trip(survexblock) # What, you don't know Judge Dredd slang ?
|
||||||
message = f"! DATE Warning only accurate to the month, setting to 1st '{oline}' ({survexblock}) {survexblock.survexfile.path} {perps}"
|
message = f"! DATE Warning only accurate to the month, setting to 1st '{oline}' ({survexblock}) {survexblock.survexfile.path} {perps}"
|
||||||
print(self.insp+message)
|
print(self.insp+message)
|
||||||
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path))
|
DataIssue.objects.create(parser='survex-date', message=message, url=get_offending_filename(survexblock.survexfile.path))
|
||||||
survexblock.date = datetime.strptime(line.replace('.','-'), '%Y-%m') # sets to first of month
|
survexblock.date = datetime.strptime(line.replace('.','-'), '%Y-%m') # sets to first of month
|
||||||
setdate(year)
|
setdate(year)
|
||||||
elif len(line) == 4:
|
elif len(line) == 4:
|
||||||
@ -331,7 +331,7 @@ class LoadingSurvex():
|
|||||||
perps = get_people_on_trip(survexblock)
|
perps = get_people_on_trip(survexblock)
|
||||||
message = f"! DATE WARNING only accurate to the YEAR, setting to 1st January '{oline}' ({survexblock}) {survexblock.survexfile.path} {perps}"
|
message = f"! DATE WARNING only accurate to the YEAR, setting to 1st January '{oline}' ({survexblock}) {survexblock.survexfile.path} {perps}"
|
||||||
print(self.insp+message)
|
print(self.insp+message)
|
||||||
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path))
|
DataIssue.objects.create(parser='survex-date', message=message, url=get_offending_filename(survexblock.survexfile.path))
|
||||||
survexblock.date = datetime.strptime(line, '%Y') # sets to January 1st
|
survexblock.date = datetime.strptime(line, '%Y') # sets to January 1st
|
||||||
setdate(year)
|
setdate(year)
|
||||||
else:
|
else:
|
||||||
@ -1546,6 +1546,7 @@ def LoadSurvexBlocks():
|
|||||||
SurvexStation.objects.all().delete()
|
SurvexStation.objects.all().delete()
|
||||||
print(" - survex Data Issues flushed")
|
print(" - survex Data Issues flushed")
|
||||||
DataIssue.objects.filter(parser='survex').delete()
|
DataIssue.objects.filter(parser='survex').delete()
|
||||||
|
DataIssue.objects.filter(parser='survex-date').delete()
|
||||||
DataIssue.objects.filter(parser='survexleg').delete()
|
DataIssue.objects.filter(parser='survexleg').delete()
|
||||||
DataIssue.objects.filter(parser='survexunits').delete()
|
DataIssue.objects.filter(parser='survexunits').delete()
|
||||||
DataIssue.objects.filter(parser='entrances').delete()
|
DataIssue.objects.filter(parser='entrances').delete()
|
||||||
|
Loading…
Reference in New Issue
Block a user