diff --git a/core/utils.py b/core/utils.py index 1081e17..c7f71fa 100644 --- a/core/utils.py +++ b/core/utils.py @@ -70,27 +70,6 @@ def chaosmonkey(n): # print("CHAOS strikes !", file=sys.stderr) return True -# -def GetListDir(sdir): - '''handles url or file, so we can refer to a set of scans (not drawings) on another server - returns a list of f (file), ff (file full path), is_dir (bool) - - REPLACE all use of this with Path.rglob() ! - ''' - res = [ ] - if type(sdir) is str and sdir[:7] == "http://": - # s = urllib.request.urlopen(sdir) - message = f"! Requesting loading from http:// NOT IMPLEMENTED. [{sdir}]" - print(message) - DataIssue.objects.create(parser='Drawings', message=message) - sdir[:7] = "" - - for f in os.listdir(sdir): - if f[0] != ".": - ff = os.path.join(sdir, f) - res.append((f, ff, os.path.isdir(ff))) - return res - def only_commit(fname, message): '''Only used to commit a survex file edited and saved in view/survex.py ''' diff --git a/core/views/scans.py b/core/views/scans.py index 1b9ab95..50e7209 100644 --- a/core/views/scans.py +++ b/core/views/scans.py @@ -208,8 +208,8 @@ def cavewallets(request, caveid): wallets.add(z) else: wurl = f"/scanupload/{z.walletname.replace('#',':')}" - print(f' - Unrecognised cave name \'{zcaveid}\' in {z.walletname}') - message = f" ! In {z.walletname} there is an unrecognised cave name '{zcaveid}'" + print(f' - Unrecognised cave name \'{zcaveid}\' in {z.walletname} (out of {len(Gcavelookup):,} cave names') + message = f" ! In {z.walletname} there is an unrecognised cave name '{zcaveid}' (out of {len(Gcavelookup):,} cave names" DataIssue.objects.update_or_create(parser='scans', message=message, url=wurl) manywallets = list(set(wallets)) diff --git a/core/views/uploads.py b/core/views/uploads.py index 676c554..7e25980 100644 --- a/core/views/uploads.py +++ b/core/views/uploads.py @@ -208,7 +208,7 @@ def get_complaints(complaints, waldata, svxfiles, files, wallet, wurl): if not waldata["description written"]: complaints.append("The guidebook description needs writing into the survex file. Tick the 'Cave description written' checkbox when this is done.") # QMs - if not waldata["qms written"] and int(w.year()) >= 2015: + if not waldata["qms written"] and w.year() and int(w.year()) >= 2015: complaints.append("The QMs needs writing into the survex file. Tick the 'QMs written' checkbox when this is done.") # Website diff --git a/parsers/drawings.py b/parsers/drawings.py index 4f52889..b3ce8c8 100644 --- a/parsers/drawings.py +++ b/parsers/drawings.py @@ -12,7 +12,7 @@ from functools import reduce import settings from troggle.core.models.survex import SingleScan, Wallet, DrawingFile from troggle.core.models.troggle import DataIssue -from troggle.core.utils import save_carefully, GetListDir +from troggle.core.utils import save_carefully '''Searches through all the :drawings: repository looking for tunnel and therion files diff --git a/parsers/scans.py b/parsers/scans.py index 3922b6b..4a8b68d 100644 --- a/parsers/scans.py +++ b/parsers/scans.py @@ -14,7 +14,7 @@ from pathlib import Path import settings from troggle.core.models.survex import SingleScan, Wallet, DrawingFile from troggle.core.models.troggle import DataIssue -from troggle.core.utils import save_carefully, GetListDir +from troggle.core.utils import save_carefully from troggle.core.views.scans import datewallet '''Searches through all the survey scans directories (wallets) in expofiles, looking for images to be referenced. @@ -26,66 +26,63 @@ git = settings.GIT # to do: Actually read all the JSON files and set the survex file field appropriately! - -def CheckEmptyDate(wallet): - '''If date is not set, get it from a linked survex file. - Could also look at filedates for the scans in expofiles/surveyscans/ , but these can be re-set by copying. - ''' - earliest = datetime.datetime.now().date() +# def GetListDir(sdir): + # '''handles url or file, so we can refer to a set of scans (not drawings) on another server + # returns a list of f (file), ff (file full path), is_dir (bool) - # This is not working, can't see why. An scans parser now taking a very long time.. - #datewallet(wallet, earliest) - return - -def CheckEmptyPeople(wallet): - '''If people list is empty, copy them from the survex files: all of them - - To be a Troggle model change; a many:many relationship between wallets and people, - as well as being a list in the JSON file (which is the permanent repository). We want the many:many - relationship so that we can filter wallets based on a person. - - For the moment, we will just get a list.. - ''' - return + # REPLACE all use of this with Path.rglob() ! + # ''' + # res = [ ] + # if type(sdir) is str and sdir[:7] == "http://": + # # s = urllib.request.urlopen(sdir) + # message = f"! Requesting loading from http:// NOT IMPLEMENTED. [{sdir}]" + # print(message) + # DataIssue.objects.create(parser='Drawings', message=message) + # sdir[:7] = "" -def LoadListScansFile(wallet): - gld = [ ] - # flatten out any directories in these wallet folders - should not be any - for (fyf, ffyf, fisdiryf) in GetListDir(wallet.fpath): - if fisdiryf: - gld.extend(GetListDir(ffyf)) - else: - gld.append((fyf, ffyf, fisdiryf)) + # for f in os.listdir(sdir): + # if f[0] != ".": + # ff = os.path.join(sdir, f) + # res.append((f, ff, os.path.isdir(ff))) + # return res + + +# def LoadListScansFile(wallet): + # # formerly a generic troggle utility, written by who ? Being gradually expunged and replaced by python standard library functions + # gld = [ ] + # # flatten out any directories in these wallet folders - should not be any + # for (fyf, ffyf, fisdiryf) in GetListDir(wallet.fpath): + # if fisdiryf: + # gld.extend(GetListDir(ffyf)) + # else: + # gld.append((fyf, ffyf, fisdiryf)) - c=0 - for (fyf, ffyf, fisdiryf) in gld: - if re.search(r"\.(?:png|jpg|jpeg|pdf|svg|gif|xvi)(?i)$", fyf): - singlescan = SingleScan(ffile=ffyf, name=fyf, wallet=wallet) - singlescan.save() - c+=1 - if c>=10: - print(".", end='') - c = 0 + # c=0 + # for (fyf, ffyf, fisdiryf) in gld: + # if re.search(r"\.(?:png|jpg|jpeg|pdf|svg|gif|xvi)(?i)$", fyf): + # singlescan = SingleScan(ffile=ffyf, name=fyf, wallet=wallet) + # singlescan.save() + # c+=1 + # if c>=10: + # print(".", end='') + # c = 0 def load_all_scans(): '''This iterates through the scans directories (either here or on the remote server) and builds up the models we can access later. + It does NOT read or validate anything in the JSON data attached to each wallet. Those checks are done at runtime, when a wallet is accessed, not at import time. - Replace GetListDir with a more modern Path.iter idiom - path = Path("scans") - for p in path.rglob("*"): - print(p.name) - ''' print(' - Loading Survey Scans') SingleScan.objects.all().delete() Wallet.objects.all().delete() - print(' - deleting all Wallet and SingleScan objects') + print(' - deleting all Wallet and SingleScan objects') DataIssue.objects.filter(parser='scans').delete() + # These are valid old file types to be visible, they are not necessarily allowed to be uploaded to a new wallet. valids = [".top",".txt",".tif",".png",".jpg",".jpeg",".pdf",".svg",".gif",".xvi", ".json",".autosave",".sxd",".svx",".th",".th2",".tdr",".sql",".zip",".dxf",".3d", ".ods",".csv",".xcf",".xml"] @@ -95,10 +92,12 @@ def load_all_scans(): # Not all folders with files in them are wallets. # they are if they are /2010/2010#33 # or /1996-1999NotKHbook/ - # but not if they are /2010/1010#33/therion or /1998/ - print(' - ', end=' ') + # but not if they are /2010/2010#33/therion/ : the wallet is /2010#33/ not /therion/ + print(' - ', end='') scans_path = Path(settings.SCANS_ROOT) seen = [] + c=0 + wallets = {} for p in scans_path.rglob('*'): if p.is_file(): if p.suffix.lower() not in valids and p.name.lower() not in validnames: @@ -107,6 +106,13 @@ def load_all_scans(): elif p.parent == scans_path: # skip files directly in /surveyscans/ pass else: + + c+=1 + if c % 15 == 0 : + print(".", end='') + if c % 500 == 0 : + print("\n -", end='') + if p.parent.parent.parent.parent == scans_path: # print(f"too deep {p}", end='\n') fpath = p.parent.parent @@ -114,54 +120,66 @@ def load_all_scans(): else: fpath = p.parent walletname = p.parent.name - - # UNFINISHED + + if walletname in wallets: + wallet = wallets[walletname] + else: + print("", flush=True, end='') + wallet = Wallet(fpath=fpath, walletname=walletname) + wallet.save() + wallets[walletname] = wallet + + singlescan = SingleScan(ffile=fpath, name=p.name, wallet=wallet) + singlescan.save() + + + # only printing progress: tag = p.parent if len(walletname)>4: if walletname[4] == "#": tag = p.parent.parent if tag not in seen: - print(f"{tag.name}", end=' ') + print(f" {tag.name} ", end='') seen.append(tag) - #wallet = Wallet(fpath=fpath, walletname=walletname) + - - print('\n UNFINISHED \n\n--- ') - for topfolder, fpath, fisdir in GetListDir(settings.SCANS_ROOT): - if not fisdir: - continue + print(f'\n - found and loaded {c:,} acceptable scan files in {len(wallets):,} wallets') + + # if False: + # n=0 + # for topfolder, fpath, fisdir in GetListDir(settings.SCANS_ROOT): + # if not fisdir: + # continue - # do the year folders - if re.match(r"\d\d\d\d$", topfolder): - print(f"{topfolder}", end=' ') - for walletname, fpath, fisdir in GetListDir(fpath): - if fisdir: - wallet = Wallet(fpath=fpath, walletname=walletname) - # this is where we should record the year explicitly - # line 347 of view/uploads.py and needs refactoring for loading contentsjson - CheckEmptyDate(wallet) - CheckEmptyPeople(wallet) - wallet.save() - LoadListScansFile(wallet) - else: - # but We *should* load all the scans, even for nonstandard names. - print(f'\n - IGNORE {walletname} - {fpath}') - - # but we also need to check if JSON exists, even if there are no uploaded scan files + # # do the year folders + # # if re.match(r"\d\d\d\d$", topfolder): + # print(f"{topfolder}", end=' ') + # for walletname, fpath, fisdir in GetListDir(fpath): + # if fisdir: + # wallet = Wallet(fpath=fpath, walletname=walletname) + # # this is where we should record the year explicitly + # # line 347 of view/uploads.py and needs refactoring for loading contentsjson + # wallet.save() + # LoadListScansFile(wallet) + # # else: + # # # but We *should* load all the scans, even for nonstandard names. + # # print(f'\n - IGNORE {topfolder} - {fpath}') + # print("", flush=True) + + # but we also need to check if JSON exists, even if there are no uploaded scan files. + # Here we know there is a rigid folder structure, so no need to look for sub folders contents_path = Path(settings.DRAWINGS_DATA, "walletjson") for yeardir in contents_path.iterdir(): if yeardir.is_dir(): for walletpath in yeardir.iterdir(): if Path(walletpath, contentsjson).is_file(): walletname = walletpath.name - wallet, created = Wallet.objects.update_or_create(walletname=walletname) - # should now also load the json and use it ! check &ref is correct or missing too - if created: - print(f"\n{walletname} created: only JSON, no actual uploaded scan files.", end=' ') - CheckEmptyDate(wallet) - CheckEmptyPeople(wallet) - wallet.save() - - - print("", flush=True) + + if walletname not in wallets: + print(f" - {walletname} creation attempting: only JSON, no actual uploaded scan files.", end=' ') + wallet, created = Wallet.objects.update_or_create(walletname=walletname) + # should now also load the json and use it ! check &ref is correct or missing too + if created: + print(f" - {walletname} created: only JSON, no actual uploaded scan files.", end=' ') + wallet.save() diff --git a/parsers/survex.py b/parsers/survex.py index d3eec8c..44f72f8 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -323,7 +323,7 @@ class LoadingSurvex(): perps = get_people_on_trip(survexblock) # What, you don't know Judge Dredd slang ? message = f"! DATE Warning only accurate to the month, setting to 1st '{oline}' ({survexblock}) {survexblock.survexfile.path} {perps}" print(self.insp+message) - DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path)) + DataIssue.objects.create(parser='survex-date', message=message, url=get_offending_filename(survexblock.survexfile.path)) survexblock.date = datetime.strptime(line.replace('.','-'), '%Y-%m') # sets to first of month setdate(year) elif len(line) == 4: @@ -331,7 +331,7 @@ class LoadingSurvex(): perps = get_people_on_trip(survexblock) message = f"! DATE WARNING only accurate to the YEAR, setting to 1st January '{oline}' ({survexblock}) {survexblock.survexfile.path} {perps}" print(self.insp+message) - DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path)) + DataIssue.objects.create(parser='survex-date', message=message, url=get_offending_filename(survexblock.survexfile.path)) survexblock.date = datetime.strptime(line, '%Y') # sets to January 1st setdate(year) else: @@ -1546,6 +1546,7 @@ def LoadSurvexBlocks(): SurvexStation.objects.all().delete() print(" - survex Data Issues flushed") DataIssue.objects.filter(parser='survex').delete() + DataIssue.objects.filter(parser='survex-date').delete() DataIssue.objects.filter(parser='survexleg').delete() DataIssue.objects.filter(parser='survexunits').delete() DataIssue.objects.filter(parser='entrances').delete()