2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2024-12-01 06:11:51 +00:00
troggle/core/utils.py

333 lines
14 KiB
Python
Raw Normal View History

2023-09-01 18:31:19 +01:00
import hashlib
2023-01-19 18:35:56 +00:00
import logging
2023-09-01 18:31:19 +01:00
import os
2023-01-19 18:35:56 +00:00
import random
2021-04-12 23:58:48 +01:00
import resource
2023-09-01 18:31:19 +01:00
import string
import subprocess
2023-01-30 23:04:11 +00:00
from decimal import getcontext
2022-07-19 17:06:56 +01:00
from pathlib import Path
2021-04-12 23:58:48 +01:00
2023-09-05 15:19:09 +01:00
from troggle.core.models.troggle import Expedition
getcontext().prec = 2 # use 2 significant figures for decimal calculations
2021-04-12 23:58:48 +01:00
2023-01-19 18:35:56 +00:00
import settings
2021-04-12 23:58:48 +01:00
"""This file declares TROG a globally visible object for caches.
2021-04-12 23:58:48 +01:00
2022-03-24 20:59:36 +00:00
TROG is a dictionary holding globally visible indexes and cache functions.
2021-04-12 23:58:48 +01:00
It is a Global Object, see https://python-patterns.guide/python/module-globals/
2021-04-13 00:43:57 +01:00
troggle.utils.TROG
2021-04-12 23:58:48 +01:00
chaosmonkey(n) - used by survex import to regenerate some .3d files
save_carefully() - core function that saves troggle objects in the database
various git add/commit functions that need refactoring together
2023-01-27 17:24:31 +00:00
NOTE that TROG is not serialized! Two users can update it and conflict !!
This needs to be in a multi-user database with transactions. However it is
useful when doing a data import with databaseReset.py as that has a single
thread.
2023-01-27 17:24:31 +00:00
"""
2021-04-12 23:58:48 +01:00
TROG = {"pagecache": {"expedition": {}}, "caves": {"gcavelookup": {}, "gcavecount": {}}}
2023-09-01 18:31:19 +01:00
alphabet = []
sha = hashlib.new('sha256')
2021-04-12 23:58:48 +01:00
2021-04-13 00:18:30 +01:00
# This is module-level executable. This is a Bad Thing. Especially when it touches the file system.
2021-04-12 23:58:48 +01:00
try:
logging.basicConfig(level=logging.DEBUG, filename=settings.LOGFILE, filemode="w")
2021-04-12 23:58:48 +01:00
except:
# Opening of file for writing is going to fail currently, so decide it doesn't matter for now
2021-04-13 22:27:01 +01:00
pass
2021-04-12 23:58:48 +01:00
2023-09-13 16:46:10 +01:00
def sanitize_name(name):
"""Filenames sould not caontain these characters as then the system barf when it tries to use them in URLs
"""
return name.replace("#","-").replace("?","=").replace("&","+").replace(":","^")
2021-04-12 23:58:48 +01:00
def get_process_memory():
usage = resource.getrusage(resource.RUSAGE_SELF)
return usage[2] / 1024.0
2021-04-12 23:58:48 +01:00
def chaosmonkey(n):
"""returns True once every n calls - randomly"""
if random.randrange(0, n) != 0:
2021-04-12 23:58:48 +01:00
return False
# print("CHAOS strikes !", file=sys.stderr)
return True
2023-09-01 18:31:19 +01:00
def unique_slug(text, n):
"""This gives an almost-unique id based on the text,
2 hex digits would seem adequate, but we might get a collision.
Not used anywhere.
"""
sha.update(text.encode('utf-8'))
return sha.hexdigest()[0:n]
def alphabet_suffix(n):
"""This is called repeatedly during initial parsing import, hence the cached list
"""
global alphabet
if not alphabet:
alphabet = list(string.ascii_lowercase)
2023-09-02 15:49:37 +01:00
if n < len(alphabet) and n > 0:
suffix = alphabet[n-1]
2023-09-01 18:31:19 +01:00
else:
suffix = "_X_" + random.choice(string.ascii_lowercase) + random.choice(string.ascii_lowercase)
2023-09-05 15:19:09 +01:00
return suffix
def current_expo():
expos = Expedition.objects.all().order_by('-year')
if expos:
return expos[0].year
else:
2023-09-07 19:47:02 +01:00
return settings.EPOCH.year
def only_commit(fname, message):
"""Only used to commit a survex file edited and saved in view/survex.py"""
git = settings.GIT
cwd = fname.parent
filename = fname.name
# print(f'{fname=} ')
try:
cp_add = subprocess.run([git, "add", filename], cwd=cwd, capture_output=True, text=True)
if cp_add.returncode != 0:
msgdata = f"Ask a nerd to fix this problem in only_commit().\n--{cp_add.stderr}\n--{cp_add.stdout}\n--return code:{str(cp_add.returncode)}"
raise WriteAndCommitError(
f"CANNOT git ADD on server for this file {filename}. Edits saved but not added to git.\n\n" + msgdata
)
cp_commit = subprocess.run([git, "commit", "-m", message], cwd=cwd, capture_output=True, text=True)
# This produces return code = 1 if it commits OK, but when the local repo still needs to be pushed to origin/loser
# which will be the case when running a test troggle system on a development machine
devok_text = """On branch master
Your branch is ahead of 'origin/master' by 1 commit.
(use "git push" to publish your local commits)
nothing to commit, working tree clean
"""
if cp_commit.returncode == 1 and cp_commit.stdout == devok_text:
pass
else:
if cp_commit.returncode != 0 and not cp_commit.stdout.strip().endswith(
"nothing to commit, working tree clean"
):
msgdata = f'--Ask a nerd to fix this problem in only_commit().\n--{cp_commit.stderr}\n--"{cp_commit.stdout}"\n--return code:{str(cp_commit.returncode)}'
print(msgdata)
raise WriteAndCommitError(
f"Error code with git on server for this file {filename}. Edits saved, added to git, but NOT committed.\n\n"
+ msgdata
)
except subprocess.SubprocessError:
raise WriteAndCommitError(
f"CANNOT git COMMIT on server for this file {filename}. Subprocess error. Edits not saved.\nAsk a nerd to fix this."
)
def write_and_commit(files, message):
"""Writes the content to the filepath and adds and commits the file to git. If this fails, a WriteAndCommitError is raised.
This does not create any needed intermediate folders, which is what we do when writing survex files, so functionality here
is duplicated in only_commit()
These need refactoring
"""
git = settings.GIT
commands = []
try:
for filepath, content, encoding in files:
cwd = filepath.parent
filename = filepath.name
# GIT see also core/views/uploads.py dwgupload()
# GIT see also core/views/expo.py editexpopage()
os.makedirs(os.path.dirname(filepath), exist_ok = True)
if encoding:
mode = "w"
kwargs = {"encoding": encoding}
else:
mode = "wb"
kwargs = {}
try:
with open(filepath, mode, **kwargs) as f:
print(f"WRITING {cwd}---{filename} ")
# as the wsgi process www-data, we have group write-access but are not owner, so cannot chmod.
# os.chmod(filepath, 0o664) # set file permissions to rw-rw-r--
f.write(content)
except PermissionError:
raise WriteAndCommitError(
f"CANNOT save this file.\nPERMISSIONS incorrectly set on server for this file {filename}. Ask a nerd to fix this."
)
cmd_diff = [git, "diff", filename]
cp_diff = subprocess.run(cmd_diff, cwd=cwd, capture_output=True, text=True)
commands.append(cmd_diff)
if cp_diff.returncode == 0:
cmd_add = [git, "add", filename]
cp_add = subprocess.run(cmd_add, cwd=cwd, capture_output=True, text=True)
commands.append(cmd_add)
if cp_add.returncode != 0:
msgdata = (
"Ask a nerd to fix this.\n\n"
+ cp_add.stderr
+ "\n\n"
+ cp_add.stdout
+ "\n\nreturn code: "
+ str(cp_add.returncode)
)
raise WriteAndCommitError(
f"CANNOT git on server for this file {filename}. Edits saved but not added to git.\n\n"
+ msgdata
)
else:
2023-04-30 19:01:07 +01:00
print(f"No change {filepath}")
filepaths = [filepath for filepath, content, encoding in files]
cmd_commit = [git, "commit"] + filepaths + ["-m", message]
cm_status = subprocess.run(cmd_commit, cwd=cwd, capture_output=True, text=True)
commands.append(cmd_commit)
if cm_status == 0:
msgdata = (
"Commands: " + str(commands) +
"Ask a nerd to fix this.\n\n"
+ "Stderr: " + cp_status.stderr
+ "\n\n"
+ "Stdout: " + cp_status.stdout
+ "\n\nreturn code: " + str(cp_status.returncode)
)
raise WriteAndCommitError(
f"Error committing. Edits saved, added to git, but NOT committed.\n\n"
+ msgdata
)
cmd_status = [git, "status"] + filepaths
cp_status = subprocess.run(cmd_status, cwd=cwd, capture_output=True, text=True)
commands.append(cp_status)
#This produces return code = 1 if it commits OK, but when the repo still needs to be pushed to origin/expoweb
if (not cp_status.stdout) or len(cp_status.stdout) < 2 or cp_status.stdout.split("\n")[-2] != "nothing to commit, working tree clean":
msgdata = (
str(commands) +
"Ask a nerd to fix this.\n\n"
+ "Stderr: " + cp_status.stderr
+ "\n\n"
+ "Stdout: " + cp_status.stdout
+ "\n\nreturn code: " + str(cp_status.returncode)
)
raise WriteAndCommitError(
f"Error code with git on server for this file {filename}. Edits saved, added to git, but NOT committed.\n\n"
+ msgdata
)
except subprocess.SubprocessError:
raise WriteAndCommitError(
f"CANNOT git on server for this file {filename}. Subprocess error. Edits not saved.\nAsk a nerd to fix this."
)
class WriteAndCommitError(Exception):
"""Exception class for errors writing files and comitting them to git"""
def __init__(self, message):
self.message = message
def __str__(self):
return f"WriteAndCommitError: {self.message}"
2022-07-18 14:57:13 +01:00
def writetrogglefile(filepath, filecontent):
"""Commit the new saved file to git
2022-07-18 14:57:13 +01:00
Callers to cave.writeDataFile() or entrance.writeDataFile() should handle the exception PermissionsError explicitly
"""
2022-07-18 14:57:13 +01:00
# GIT see also core/views/expo.py editexpopage()
# GIT see also core/views/uploads.py dwgupload()
# Called from core/models/caves.py Cave.writeDataFile() Entrance.writeDataFile()
2022-07-18 14:57:13 +01:00
filepath = Path(filepath)
cwd = filepath.parent
filename = filepath.name
git = settings.GIT
# as the wsgi process www-data, we have group write-access but are not owner, so cannot chmod.
# do not trap exceptions, pass them up to the view that called this function
print(f"WRITING{cwd}---{filename} ")
2022-07-18 14:57:13 +01:00
with open(filepath, "w") as f:
f.write(filecontent)
# os.chmod(filepath, 0o664) # set file permissions to rw-rw-r--
2022-07-18 14:57:13 +01:00
sp = subprocess.run([git, "add", filename], cwd=cwd, capture_output=True, check=True, text=True)
if sp.returncode != 0:
2022-11-23 21:59:42 +00:00
out = sp.stdout
if len(out) > 160:
out = out[:75] + "\n <Long output curtailed>\n" + out[-75:]
print(f"git ADD {cwd}:\n\n" + str(sp.stderr) + "\n\n" + out + "\n\nreturn code: " + str(sp.returncode))
sp = subprocess.run(
[git, "commit", "-m", f"Troggle online: cave or entrance edit -{filename}"],
cwd=cwd,
capture_output=True,
check=True,
text=True,
)
2022-07-18 14:57:13 +01:00
if sp.returncode != 0:
2022-11-23 21:59:42 +00:00
out = sp.stdout
if len(out) > 160:
out = out[:75] + "\n <Long output curtailed>\n" + out[-75:]
print(f"git COMMIT {cwd}:\n\n" + str(sp.stderr) + "\n\n" + out + "\n\nreturn code: " + str(sp.returncode))
2022-07-18 14:57:13 +01:00
# not catching and re-raising any exceptions yet, inc. the stderr etc.,. We should do that.
2021-04-13 00:11:08 +01:00
def save_carefully(objectType, lookupAttribs={}, nonLookupAttribs={}):
"""Looks up instance using lookupAttribs and carries out the following:
-if instance does not exist in DB: add instance to DB, return (new instance, True)
-if instance exists in DB and was modified using Troggle: do nothing, return (existing instance, False)
-if instance exists in DB and was not modified using Troggle: overwrite instance, return (instance, False)
The checking is accomplished using Django's get_or_create and the new_since_parsing boolean field
defined in core.models.TroggleModel.
We are not using new_since_parsing - it is a fossil from Aaron Curtis's design in 2006. So it is always false.
NOTE: this takes twice as long as simply creating a new object with the given values.
As of Jan.2023 this function is not used anywhere in troggle.
2021-04-13 00:11:08 +01:00
"""
try:
instance, created = objectType.objects.get_or_create(defaults=nonLookupAttribs, **lookupAttribs)
except:
2021-04-23 16:31:52 +01:00
print(" !! - FAIL in SAVE CAREFULLY ===================", objectType)
2021-04-13 00:11:08 +01:00
print(" !! - -- objects.get_or_create()")
print(f" !! - lookupAttribs:{lookupAttribs}\n !! - nonLookupAttribs:{nonLookupAttribs}")
2021-04-13 00:11:08 +01:00
raise
if not created and not instance.new_since_parsing:
for k, v in list(
nonLookupAttribs.items()
): # overwrite the existing attributes from the logbook text (except date and title)
2021-04-13 00:11:08 +01:00
setattr(instance, k, v)
try:
instance.save()
except:
print(" !! - SAVE CAREFULLY ===================", objectType)
print(" !! - -- instance.save()")
print(f" !! - lookupAttribs:{lookupAttribs}\n !! - nonLookupAttribs:{nonLookupAttribs}")
2021-04-13 00:11:08 +01:00
raise
try:
2023-01-30 23:04:11 +00:00
str(instance)
2021-04-13 00:11:08 +01:00
except:
2023-01-30 23:04:11 +00:00
pass
2021-04-13 00:11:08 +01:00
if created:
logging.info(str(instance) + " was just added to the database for the first time. \n")
2021-04-13 00:11:08 +01:00
if not created and instance.new_since_parsing:
logging.info(
str(instance) + " has been modified using Troggle since parsing, so the current script left it as is. \n"
)
2021-04-13 00:11:08 +01:00
if not created and not instance.new_since_parsing:
logging.info(
" instance:<"
+ str(instance)
+ "> existed in the database unchanged since last parse. It have been overwritten."
)
2021-04-13 00:11:08 +01:00
return (instance, created)