troggle-unchained/parsers/scans.py

195 lines
7.7 KiB
Python
Raw Normal View History

2021-05-03 20:36:29 +01:00
import sys
import os
import subprocess
2021-05-03 20:36:29 +01:00
import types
import stat
import csv
import re
import datetime
import shutil, filecmp
2021-05-03 20:36:29 +01:00
from functools import reduce
from pathlib import Path
2021-05-03 20:36:29 +01:00
import settings
from troggle.core.models.survex import SingleScan, Wallet, DrawingFile
from troggle.core.models.troggle import DataIssue
2021-05-04 20:57:16 +01:00
from troggle.core.utils import save_carefully, GetListDir
2022-08-01 15:32:35 +01:00
from troggle.core.views.scans import datewallet
2021-05-03 20:36:29 +01:00
'''Searches through all the survey scans directories (wallets) in expofiles, looking for images to be referenced.
'''
contentsjson = "contents.json"
indexhtml = "walletindex.html"
git = settings.GIT
# to do: create a 'low priority' field, so that any such wallet does not appear in summary reports
wallet_blank_json = {
"cave": "",
"date": "",
"description url": "/caves",
"description written": False,
"electronic survey": False,
"elev drawn": False,
"elev not required": False,
"name": "",
"people": [
"Unknown"
],
"plan drawn": False,
"plan not required": False,
"qms written": False,
"survex file": [],
"survex not required": False,
"website updated": False}
wallet_blank_html = '''<html><body><H1>Wallet WALLET</H1>
<p>List of trips: <a href="http://expo.survex.com/expedition/YEAR">expedition/YEAR</a>
- troggle-processed .svx files and logbook entries on server</p>
<p>Date: </p><p>People: Unknown,</p>
<p>Cave <a href='http://expo.survex.com/caves/'>Guidebook description</a>
- A description is indicated as being needed, so may need adding into this cave page.
<p>Survex file: not identified yet
<H2>Issues</H2>
<p>The description needs writing</p>
<p>The QMs needs writing</p><p>The website is marked as needing updating (using the guidebook description)</p>
<p>Tunnel / Therion drawing files need drawing</p>
<H2>Files</H2>
<UL>
</UL>
</body></html>
'''
def CheckEmptyDate(wallet):
2022-08-01 15:32:35 +01:00
'''If date is not set, get it from a linked survex file.
Could also look at filedates for the scans in expofiles/surveyscans/ , but these can be re-set by copying.
'''
2022-08-01 15:32:35 +01:00
earliest = datetime.datetime.now().date()
# This is not working, can't see why. An scans parser now taking a very long time..
#datewallet(wallet, earliest)
return
def CheckEmptyPeople(wallet):
'''If people list is empty, copy them from the survex files: all of them
To be a Troggle model change; a many:many relationship between wallets and people,
as well as being a list in the JSON file (which is the permanent repository). We want the many:many
relationship so that we can filter wallets based on a person.
2022-08-01 15:32:35 +01:00
For the moment, we will just get a list..
'''
return
2021-05-03 20:36:29 +01:00
def LoadListScansFile(wallet):
gld = [ ]
# flatten out any directories in these wallet folders - should not be any
for (fyf, ffyf, fisdiryf) in GetListDir(wallet.fpath):
if fisdiryf:
gld.extend(GetListDir(ffyf))
else:
gld.append((fyf, ffyf, fisdiryf))
c=0
for (fyf, ffyf, fisdiryf) in gld:
if re.search(r"\.(?:png|jpg|jpeg|pdf|svg|gif)(?i)$", fyf):
singlescan = SingleScan(ffile=ffyf, name=fyf, wallet=wallet)
singlescan.save()
c+=1
if c>=10:
print(".", end='')
c = 0
def CopyWalletData(wallet):
'''Copies all the contents.json to a parallel set of folders in the drawings repo
2022-08-06 20:23:39 +01:00
refreshes everything during a ful import, but it should all be up to date as every time
wallet data gets saved it should also be copied across and committed.
'''
2022-08-06 20:23:39 +01:00
# not needed now the drawinsg repo is the master
return
year = wallet.walletname[0:4]
destfolder = Path(settings.DRAWINGS_DATA,'walletjson', year, wallet.walletname)
destjson = destfolder / contentsjson
sourcejson = Path(wallet.fpath, contentsjson)
if not os.path.exists(Path(destfolder)):
try:
os.makedirs(destfolder)
print(f' - created folder {destfolder}..')
except PermissionError:
print(f"CANNOT save this JSON file.\nPERMISSIONS incorrectly set on server for this folder {destfolder}. Ask a nerd to fix this.")
if os.path.isfile(sourcejson):
try:
if not os.path.isfile(destjson) or not filecmp.cmp(sourcejson, destjson):
shutil.copy(sourcejson, destjson)
print(f' - Copied {sourcejson} to {destjson}')
dr_add = subprocess.run([git, "add", contentsjson], cwd=destfolder, capture_output=True, text=True)
if dr_add.returncode != 0:
msgdata = 'Ask a nerd to fix this.\n\n' + dr_add.stderr + '\n\n' + dr_add.stdout + '\n\nreturn code: ' + str(dr_add.returncode)
message = f'CANNOT git on server for this file {contentsjson}. Edits saved but not added to git.\n\n' + msgdata
print(message)
else:
# ideally we would commit many chnages to many wallets just once. But most of the time only a couple of files will change.
dr_commit = subprocess.run([git, "commit", "-m", f'Update of {contentsjson} in wallet'], cwd=destfolder, capture_output=True, text=True)
# This produces return code = 1 if it commits OK
if dr_commit.returncode != 0:
msgdata = 'Ask a nerd to fix this.\n\n' + dr_commit.stderr + '\n\n' + dr_commit.stdout + '\n\nreturn code: ' + str(dr_commit.returncode)
message = f'Error code with git on server for this {contentsjson}. File is copied, added to git, but NOT committed.\n\n' + msgdata
print(message)
2021-05-03 20:36:29 +01:00
except PermissionError:
print(f"CANNOT copy this JSON file.\nPERMISSIONS incorrectly set on server for this file {destjson}. Ask a nerd to fix this.")
2021-05-03 20:36:29 +01:00
# this iterates through the scans directories (either here or on the remote server)
# and builds up the models we can access later
def load_all_scans():
print(' - Loading Survey Scans')
SingleScan.objects.all().delete()
Wallet.objects.all().delete()
2022-07-22 09:23:00 +01:00
print(' - deleting all Wallet and SingleScan objects')
2022-03-15 20:53:55 +00:00
DataIssue.objects.filter(parser='scans').delete()
2021-05-03 20:36:29 +01:00
# first do the smkhs (large kh survey scans) directory
2022-07-22 09:23:00 +01:00
# this seems to be never used ?!
2022-08-01 15:32:35 +01:00
#We should load all the scans, even for nonstandard names.
2022-04-18 20:48:49 +01:00
manywallets_smkhs = Wallet(fpath=os.path.join(settings.SCANS_ROOT, "../surveys/smkhs"), walletname="smkhs")
2021-05-03 20:36:29 +01:00
print("smkhs", end=' ')
if os.path.isdir(manywallets_smkhs.fpath):
manywallets_smkhs.save()
LoadListScansFile(manywallets_smkhs)
2022-07-22 09:23:00 +01:00
else:
print("smkhs NOT LOADED", end=' ')
2021-05-03 20:36:29 +01:00
# iterate into the surveyscans directory
print(' - ', end=' ')
2022-07-22 09:23:00 +01:00
for walletname, fpath, fisdir in GetListDir(settings.SCANS_ROOT):
2021-05-03 20:36:29 +01:00
if not fisdir:
continue
# do the year folders
2022-07-22 09:23:00 +01:00
if re.match(r"\d\d\d\d$", walletname):
print(f"{walletname}", end=' ')
2022-07-22 10:40:42 +01:00
for walletname, fpath, fisdir in GetListDir(fpath):
2022-07-22 09:23:00 +01:00
if fisdir:
wallet = Wallet(fpath=fpath, walletname=walletname)
# this is where we should load the contents.json for people so we can report on them later
# this is where we should record the year explicitly
2022-07-22 09:23:00 +01:00
# line 347 of view/uploads.py and needs refactoring for loading contentsjson
CheckEmptyDate(wallet)
CheckEmptyPeople(wallet)
2022-08-01 15:32:35 +01:00
wallet.save()
LoadListScansFile(wallet)
CopyWalletData(wallet)
2022-08-01 15:32:35 +01:00
2022-07-22 09:23:00 +01:00
else:
2022-08-01 15:32:35 +01:00
# but We should load all the scans, even for nonstandard names.
2022-07-22 09:23:00 +01:00
print(f'\n - IGNORE {walletname} - {fpath}')
2021-05-03 20:36:29 +01:00
print("", flush=True)