mirror of
https://expo.survex.com/repositories/troggle/.git
synced 2024-11-27 17:51:53 +00:00
507 lines
20 KiB
Python
507 lines
20 KiB
Python
import json
|
|
import locale
|
|
import os
|
|
import resource
|
|
import sys
|
|
import time
|
|
|
|
import settings
|
|
from django.core.management import call_command
|
|
from django.core.management.commands import flush
|
|
|
|
""" Command-line utility for loading cave data files into troggle's database.
|
|
|
|
The command line options select which combination of classes of data will be imported,
|
|
e.g. cave data, logbook data, cavers (people) data. The set of imports requested are put
|
|
into a job queue in a valid order, toegether with any necessary initialisation if it is
|
|
a complete reset, and the queue is then executed.
|
|
|
|
In future all these functions may be moved to a control panel webpage running within the
|
|
troggle application.
|
|
"""
|
|
print(" - settings on loading databaseReset.py", flush=True)
|
|
|
|
os.environ["PYTHONPATH"] = str(settings.PYTHON_PATH)
|
|
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "settings")
|
|
|
|
print(" - settings on loading databaseReset.py")
|
|
|
|
import django
|
|
|
|
print(f" - Memory footprint before loading Django: {resource.getrusage(resource.RUSAGE_SELF)[2] / 1024.0:.3f} MB")
|
|
try:
|
|
django.setup()
|
|
except:
|
|
print(
|
|
" ! Cyclic reference failure. Can occur when the initial db is empty. Fixed now (in UploadFileForm) but easy to reintroduce.."
|
|
)
|
|
raise
|
|
print(f" - Memory footprint after loading Django: {resource.getrusage(resource.RUSAGE_SELF)[2] / 1024.0:.3f} MB")
|
|
|
|
from django.contrib.auth.models import User
|
|
from django.core import management
|
|
from django.db import connection, transaction
|
|
|
|
from troggle.core.utils import get_process_memory
|
|
from troggle.parsers.imports import (
|
|
import_caves,
|
|
import_drawingsfiles,
|
|
import_ents,
|
|
import_loadpos,
|
|
import_logbook,
|
|
import_logbooks,
|
|
import_people,
|
|
import_QMs,
|
|
import_survex,
|
|
import_survex_checks,
|
|
import_surveyscans,
|
|
)
|
|
|
|
if os.geteuid() == 0:
|
|
# This protects the server from having the wrong file permissions written on logs and caches
|
|
print("This script should be run as expo not root - quitting")
|
|
exit()
|
|
|
|
expouser = settings.EXPOUSER
|
|
expouserpass = settings.EXPOUSERPASS
|
|
expouseremail = settings.EXPOUSER_EMAIL
|
|
|
|
expoadminuser = settings.EXPOADMINUSER
|
|
expoadminuserpass = settings.EXPOADMINUSERPASS
|
|
expoadminuseremail = settings.EXPOADMINUSER_EMAIL
|
|
|
|
|
|
def reinit_db():
|
|
"""Rebuild database from scratch. Deletes the file first if sqlite is used,
|
|
otherwise it drops the database and creates it.
|
|
Note - initial loading of troggle.sqlite will already have populated the models
|
|
in memory (django python models, not the database), so there is already a full load
|
|
of stuff known. Deleting the db file does not clear memory.
|
|
"""
|
|
print("Reinitialising db ", end="")
|
|
print(django.db.connections.databases["default"]["NAME"])
|
|
currentdbname = settings.DATABASES["default"]["NAME"]
|
|
if currentdbname == ":memory:":
|
|
# closing connections should wipe the in-memory database
|
|
django.db.close_old_connections()
|
|
for conn in django.db.connections.all():
|
|
print(" ! Closing another connection to db...")
|
|
conn.close()
|
|
elif django.db.connections.databases["default"]["ENGINE"] == "django.db.backends.sqlite3":
|
|
if os.path.isfile(currentdbname):
|
|
try:
|
|
print(" - deleting " + currentdbname)
|
|
os.remove(currentdbname)
|
|
except OSError:
|
|
print(
|
|
" ! OSError on removing: "
|
|
+ currentdbname
|
|
+ "\n ! Is the file open in another app? Is the server running?\n"
|
|
)
|
|
raise
|
|
else:
|
|
print(" - No database file found: " + currentdbname + " ..continuing, will create it.\n")
|
|
else:
|
|
print(f" - Attempting to nuke : {currentdbname}\n")
|
|
# this is now completely failing to nuke MariaDB adequately, and it crashes when creating Area objects with a no null parent message
|
|
# when null parents are explciitly allowed in the model.
|
|
cursor = django.db.connection.cursor()
|
|
print(f" - - Using {cursor}")
|
|
try:
|
|
cursor.execute(f"DROP DATABASE {currentdbname}")
|
|
except:
|
|
print(f" - - Exception when attempting to: DROP DATABASE {currentdbname} with {cursor}")
|
|
pass
|
|
try:
|
|
cursor.execute(f"CREATE DATABASE {currentdbname}")
|
|
except:
|
|
print(f" - - Exception when attempting to: CREATE DATABASE {currentdbname} with {cursor}")
|
|
pass
|
|
try:
|
|
cursor.execute(f"ALTER DATABASE {currentdbname} CHARACTER SET=utf8")
|
|
except:
|
|
print(f" - - Exception when attempting to: ALTER DATABASE {currentdbname} CHARACTER SET=utf8")
|
|
pass
|
|
try:
|
|
cursor.execute(f"USE {currentdbname}")
|
|
except:
|
|
print(f" - - Exception when attempting to: USE {currentdbname}")
|
|
pass
|
|
try:
|
|
cmd = flush.Command()
|
|
call_command(cmd, verbosity=0, interactive=False)
|
|
except:
|
|
print(f" - - Exception when attempting to: FLUSH")
|
|
pass
|
|
print(f" - Nuked : {currentdbname}\n")
|
|
|
|
print(" - Migrating: " + django.db.connections.databases["default"]["NAME"])
|
|
|
|
if django.db.connections.databases["default"]["ENGINE"] == "django.db.backends.sqlite3":
|
|
# with transaction.atomic():
|
|
management.call_command("makemigrations", "core", interactive=False)
|
|
management.call_command("migrate", interactive=False)
|
|
management.call_command("migrate", "core", interactive=False)
|
|
else:
|
|
management.call_command("makemigrations", "core", interactive=False)
|
|
management.call_command("migrate", interactive=False)
|
|
management.call_command("migrate", "core", interactive=False)
|
|
|
|
print(" - done migration on: " + settings.DATABASES["default"]["NAME"])
|
|
print("users in db already: ", len(User.objects.all()))
|
|
with transaction.atomic():
|
|
try:
|
|
print(" - Setting up expo user on: " + django.db.connections.databases["default"]["NAME"])
|
|
print(f" - user: {expouser} ({expouserpass:.5}...) <{expouseremail}> ")
|
|
user = User.objects.create_user(expouser, expouseremail, expouserpass)
|
|
user.is_staff = False
|
|
user.is_superuser = False
|
|
user.save()
|
|
except:
|
|
print(" ! INTEGRITY ERROR user on: " + settings.DATABASES["default"]["NAME"])
|
|
print(django.db.connections.databases["default"]["NAME"])
|
|
print(" ! You probably have not got a clean db when you thought you had.\n")
|
|
print(" ! Also you are probably NOT running an in-memory db now.\n")
|
|
print("users in db: ", len(User.objects.all()))
|
|
print("tables in db: ", len(connection.introspection.table_names()))
|
|
memdumpsql(fn="integrityfail.sql")
|
|
django.db.connections.databases["default"]["NAME"] = ":memory:"
|
|
# raise
|
|
|
|
with transaction.atomic():
|
|
try:
|
|
print(" - Setting up expoadmin user on: " + django.db.connections.databases["default"]["NAME"])
|
|
print(f" - user: {expoadminuser} ({expoadminuserpass:.5}...) <{expoadminuseremail}> ")
|
|
user = User.objects.create_user(expoadminuser, expoadminuseremail, expoadminuserpass)
|
|
user.is_staff = True
|
|
user.is_superuser = True
|
|
user.save()
|
|
except:
|
|
print(" ! INTEGRITY ERROR user on: " + settings.DATABASES["default"]["NAME"])
|
|
print(django.db.connections.databases["default"]["NAME"])
|
|
print(" ! You probably have not got a clean db when you thought you had.\n")
|
|
print(" ! Also you are probably NOT running an in-memory db now.\n")
|
|
print("users in db: ", len(User.objects.all()))
|
|
print("tables in db: ", len(connection.introspection.table_names()))
|
|
memdumpsql(fn="integrityfail.sql")
|
|
django.db.connections.databases["default"]["NAME"] = ":memory:"
|
|
# raise
|
|
|
|
|
|
def memdumpsql(fn):
|
|
"""Unused option to dump SQL. Aborted attempt to create a cache for loading data"""
|
|
djconn = django.db.connection
|
|
from dump import _iterdump
|
|
|
|
with open(fn, "w") as f:
|
|
for line in _iterdump(djconn):
|
|
f.write(f"{line.encode('utf8')}\n")
|
|
return True
|
|
|
|
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
|
|
class JobQueue:
|
|
"""A list of import operations to run. Always reports profile times
|
|
of the import operations in the same order.
|
|
"""
|
|
|
|
def __init__(self, run):
|
|
"""Initialises the job queue object with a fixed order for reporting
|
|
options during a run. Imports the timings from previous runs.
|
|
"""
|
|
self.runlabel = run
|
|
self.queue = [] # tuples of (jobname, jobfunction)
|
|
self.results = {}
|
|
self.results_order = [
|
|
"date",
|
|
"runlabel",
|
|
"reinit",
|
|
"caves",
|
|
"people",
|
|
"logbooks",
|
|
"QMs",
|
|
"scans",
|
|
"survex",
|
|
"drawings",
|
|
"test",
|
|
]
|
|
for k in self.results_order:
|
|
self.results[k] = []
|
|
self.tfile = "import_profile.json"
|
|
self.htmlfile = "profile.html" # for HTML results table. Not yet done.
|
|
|
|
def enq(self, label, func):
|
|
"""Enqueue: Adding elements to queue"""
|
|
self.queue.append((label, func))
|
|
return True
|
|
|
|
def loadprofiles(self):
|
|
"""Load timings for previous imports for each data import type"""
|
|
if os.path.isfile(self.tfile):
|
|
try:
|
|
f = open(self.tfile, "r")
|
|
data = json.load(f)
|
|
for j in data:
|
|
self.results[j] = data[j]
|
|
except:
|
|
print(f"FAILURE parsing JSON file {self.tfile}")
|
|
# Python bug: https://github.com/ShinNoNoir/twitterwebsearch/issues/12
|
|
f.close()
|
|
for j in self.results_order:
|
|
self.results[j].append(None) # append a placeholder
|
|
return True
|
|
|
|
def dellastprofile(self):
|
|
"""trim one set of data from the results"""
|
|
for j in self.results_order:
|
|
self.results[j].pop() # delete last item
|
|
return True
|
|
|
|
def delfirstprofile(self):
|
|
"""trim one set of data from the results"""
|
|
for j in self.results_order:
|
|
self.results[j].pop(0) # delete zeroth item
|
|
return True
|
|
|
|
def saveprofiles(self):
|
|
"""Save timings for the set of imports just completed"""
|
|
with open(self.tfile, "w") as f:
|
|
json.dump(self.results, f)
|
|
return True
|
|
|
|
def runqonce(self):
|
|
"""Run all the jobs in the queue provided - once"""
|
|
print("** Running job ", self.runlabel, end=" to ")
|
|
print(django.db.connections.databases["default"]["NAME"])
|
|
jobstart = time.time()
|
|
print(f"-- Initial memory in use {get_process_memory():.3f} MB")
|
|
self.results["date"].pop()
|
|
self.results["date"].append(jobstart)
|
|
self.results["runlabel"].pop()
|
|
self.results["runlabel"].append(self.runlabel)
|
|
|
|
for runfunction in self.queue:
|
|
start = time.time()
|
|
memstart = get_process_memory()
|
|
jobname, jobparser = runfunction
|
|
# --------------------
|
|
jobparser() # invokes function passed in the second item in the tuple
|
|
# --------------------
|
|
memend = get_process_memory()
|
|
duration = time.time() - start
|
|
# print(" - MEMORY start:{:.3f} MB end:{:.3f} MB change={:.3f} MB".format(memstart,memend, ))
|
|
print(
|
|
'\n*- Ended "',
|
|
jobname,
|
|
f'" {duration:.1f} seconds + {memend - memstart:.3f} MB ({memend:.3f} MB)',
|
|
)
|
|
self.results[jobname].pop() # the null item
|
|
self.results[jobname].append(duration)
|
|
|
|
jobend = time.time()
|
|
jobduration = jobend - jobstart
|
|
print(f"** Ended job {self.runlabel} - {jobduration:.1f} seconds total.")
|
|
return True
|
|
|
|
def append_placeholders(self):
|
|
"""Ads a dummy timing for each option, to fix off by one error"""
|
|
for j in self.results_order:
|
|
self.results[j].append(None) # append a placeholder
|
|
|
|
def run_now_django_tests(self, n):
|
|
"""Runs the standard django test harness system which is in troggle/core/TESTS/tests.py"""
|
|
management.call_command("test", verbosity=n)
|
|
django.db.close_old_connections()
|
|
|
|
def run(self):
|
|
"""Initialises profile timings record, initiates relational database, runs the job queue saving the imported data as an SQL image and saves the timing profile data."""
|
|
self.loadprofiles()
|
|
print(
|
|
"-- start ",
|
|
django.db.connections.databases["default"]["ENGINE"],
|
|
django.db.connections.databases["default"]["NAME"],
|
|
)
|
|
self.runqonce()
|
|
if settings.DATABASES["default"]["NAME"] == ":memory:":
|
|
memdumpsql("memdump.sql") # saved contents of in-memory db, could be imported later..
|
|
self.saveprofiles()
|
|
return True
|
|
|
|
def showprofile(self):
|
|
"""Prints out the time it took to run the jobqueue"""
|
|
for k in self.results_order:
|
|
if k == "test":
|
|
break
|
|
elif k == "date":
|
|
print(" days ago ", end=" ")
|
|
else:
|
|
print("%10s (s)" % k, end=" ")
|
|
percen = 0
|
|
r = self.results[k]
|
|
|
|
for i in range(len(r)):
|
|
if k == "runlabel":
|
|
if r[i]:
|
|
rp = r[i]
|
|
else:
|
|
rp = " - "
|
|
print("%8s" % rp, end=" ")
|
|
elif k == "date":
|
|
# Calculate dates as days before present
|
|
if r[i]:
|
|
if i == len(r) - 1:
|
|
print(" this", end=" ")
|
|
else:
|
|
# prints one place to the left of where you expect
|
|
if r[len(r) - 1]:
|
|
s = r[i] - r[len(r) - 1]
|
|
elif r[len(r) - 2]:
|
|
s = r[i] - r[len(r) - 2]
|
|
else:
|
|
s = 0
|
|
days = (s) / (24 * 60 * 60)
|
|
print(f"{days:8.2f}", end=" ")
|
|
elif r[i]:
|
|
print(f"{r[i]:8.1f}", end=" ")
|
|
if i == len(r) - 1 and r[i - 1]:
|
|
percen = 100 * (r[i] - r[i - 1]) / r[i - 1]
|
|
if abs(percen) > 0.1:
|
|
print(f"{percen:8.1f}%", end=" ")
|
|
else:
|
|
print(" - ", end=" ")
|
|
print("")
|
|
print("\n")
|
|
return True
|
|
|
|
|
|
def usage():
|
|
"""Prints command line options, can print history of previous runs with timings"""
|
|
print(
|
|
"""Usage is 'python databaseReset.py <command> [runlabel]'
|
|
where command is:
|
|
test - testing... imports people and prints profile. Deletes nothing.
|
|
profile - print the profile from previous runs. Import nothing.
|
|
- del - deletes last entry
|
|
- delfirst - deletes first entry
|
|
|
|
reset - normal usage: clear database and reread everything from files
|
|
|
|
init - initialisation. Automatic if you run reset.
|
|
caves - read in the caves (must run first after initialisation)
|
|
people - read in the people from folk.csv (must run after 'caves')
|
|
logbooks - read in the logbooks
|
|
QMs - read in the QM csv files (older caves only)
|
|
scans - the survey scans in all the wallets (must run before survex)
|
|
drawings - read in the Tunnel & Therion files - which scans the survey scans too
|
|
survex - read in the survex files - all the survex blocks and entrances x/y/z
|
|
survex_ck - set caves and people on wallets, check wallets for *ref
|
|
ents - read just the entrances x/y/z (must run after survex)
|
|
|
|
dumplogbooks - Not used. write out autologbooks (not working? use http://localhost:8000/controlpanel )
|
|
logbook - read a single logbook. Defautl set in python code
|
|
|
|
and [runlabel] is an optional string identifying this run of the script
|
|
in the stored profiling data 'import-profile.json'
|
|
|
|
caves and logbooks must be run on an empty db before the others as they
|
|
set up db tables used by the others.
|
|
|
|
Note that running the subfunctions will not produce a consistent website
|
|
- only the full 'reset' does that.
|
|
"""
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
if os.geteuid() == 0:
|
|
print("Do not run as root or using sudo - file permissions for cache files and logs will break")
|
|
print("Aborting run.")
|
|
exit()
|
|
|
|
if sys.getfilesystemencoding() != "utf-8":
|
|
print("UTF-8 is NOT the default file encoding. You must fix this.")
|
|
print(f"- {sys.getdefaultencoding()=}")
|
|
print(f"- {sys.getfilesystemencoding()=}")
|
|
print(f"- {locale.getdefaultlocale()=}")
|
|
print(f"- {locale.getpreferredencoding()=}")
|
|
print("Aborting run.")
|
|
exit()
|
|
|
|
if len(sys.argv) > 2:
|
|
runlabel = sys.argv[len(sys.argv) - 1]
|
|
else:
|
|
runlabel = None
|
|
|
|
jq = JobQueue(runlabel)
|
|
|
|
if len(sys.argv) == 1:
|
|
usage()
|
|
exit()
|
|
elif "init" in sys.argv:
|
|
jq.enq("reinit", reinit_db)
|
|
elif "ents" in sys.argv:
|
|
jq.enq("survex", import_ents)
|
|
elif "survex_ck" in sys.argv:
|
|
jq.enq("survex", import_survex_checks)
|
|
elif "test2" in sys.argv:
|
|
jq.enq("QMs", import_QMs)
|
|
jq.enq("drawings", import_drawingsfiles)
|
|
jq.enq("survex", import_survex)
|
|
elif "caves" in sys.argv:
|
|
jq.enq("caves", import_caves)
|
|
elif "logbooks" in sys.argv:
|
|
jq.enq("logbooks", import_logbooks)
|
|
elif "logbook" in sys.argv:
|
|
jq.enq("logbooks", import_logbook) # default year set in imports.py
|
|
elif "people" in sys.argv:
|
|
jq.enq("people", import_people)
|
|
elif "QMs" in sys.argv:
|
|
jq.enq("QMs", import_QMs)
|
|
elif "reset" in sys.argv:
|
|
jq.enq("reinit", reinit_db)
|
|
jq.enq("caves", import_caves)
|
|
jq.enq("people", import_people)
|
|
jq.enq("scans", import_surveyscans)
|
|
jq.enq("logbooks", import_logbooks)
|
|
jq.enq("QMs", import_QMs)
|
|
jq.enq("drawings", import_drawingsfiles)
|
|
jq.enq("survex", import_survex)
|
|
elif "scans" in sys.argv:
|
|
jq.enq("scans", import_surveyscans)
|
|
elif "survex" in sys.argv:
|
|
jq.enq("survex", import_survex)
|
|
elif "loadpos" in sys.argv:
|
|
jq.enq("survex", import_loadpos)
|
|
elif "drawings" in sys.argv:
|
|
jq.enq("drawings", import_drawingsfiles)
|
|
elif "profile" in sys.argv:
|
|
if runlabel == "del":
|
|
jq.loadprofiles()
|
|
jq.dellastprofile()
|
|
jq.dellastprofile() # twice because loadprofiles adds a dummy
|
|
jq.showprofile()
|
|
jq.saveprofiles()
|
|
if runlabel == "delfirst":
|
|
jq.loadprofiles()
|
|
jq.dellastprofile() # remove the dummy
|
|
jq.delfirstprofile()
|
|
jq.showprofile()
|
|
jq.saveprofiles()
|
|
else:
|
|
jq.loadprofiles()
|
|
jq.showprofile()
|
|
exit()
|
|
elif "help" in sys.argv:
|
|
usage()
|
|
exit()
|
|
else:
|
|
usage()
|
|
print(f"{sys.argv[1]} not recognised as a command.")
|
|
exit()
|
|
|
|
jq.run()
|
|
jq.showprofile()
|