2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2024-11-22 07:11:52 +00:00
troggle/databaseReset.py

370 lines
14 KiB
Python
Raw Normal View History

2020-06-06 22:51:55 +01:00
import sys
2011-07-11 02:10:22 +01:00
import os
import time
import timeit
2020-05-24 13:35:47 +01:00
import json
2020-07-01 22:49:38 +01:00
import resource
2020-05-24 13:35:47 +01:00
2011-07-11 02:10:22 +01:00
import settings
os.environ['PYTHONPATH'] = settings.PYTHON_PATH
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'settings')
2020-05-24 13:35:47 +01:00
2020-06-17 22:55:51 +01:00
print(" - settings on loading databaseReset.py")
2020-06-16 16:07:36 +01:00
import django
2020-07-01 22:49:38 +01:00
print(" - Memory footprint before loading Django: {:.3f} MB".format(resource.getrusage(resource.RUSAGE_SELF)[2]/1024.0))
2020-06-17 22:55:51 +01:00
try:
django.setup()
except:
print(" ! Cyclic reference failure. Can occur when the initial db is empty. Fixed now (in UploadFileForm) but easy to reintroduce..")
raise
2020-07-01 22:49:38 +01:00
print(" - Memory footprint after loading Django: {:.3f} MB".format(resource.getrusage(resource.RUSAGE_SELF)[2]/1024.0))
2020-06-17 22:55:51 +01:00
import troggle.flatpages.models
import troggle.core.models
import troggle.core.models_survex
2011-07-11 02:10:22 +01:00
from django.core import management
from django.db import connection, close_old_connections, connections
2011-07-11 02:10:22 +01:00
from django.http import HttpResponse
2020-06-18 21:50:16 +01:00
from django.urls import reverse
2020-06-17 22:55:51 +01:00
from django.contrib.auth.models import User
2020-06-30 15:22:41 +01:00
from troggle.core.models import get_process_memory
from troggle.core.models_caves import Cave, Entrance
2020-06-16 16:07:36 +01:00
from troggle.parsers.imports import import_caves, import_people, import_surveyscans, \
2020-07-07 01:35:58 +01:00
import_logbooks, import_QMs, import_survex, import_loadpos, import_drawingsfiles, \
import_subcaves
2020-06-17 22:55:51 +01:00
import troggle.logbooksdump
2020-06-06 22:51:55 +01:00
2020-05-27 01:04:37 +01:00
if os.geteuid() == 0:
# This protects the server from having the wrong file permissions written on logs and caches
print("This script should be run as expo not root - quitting")
exit()
expouser=settings.EXPOUSER
expouserpass=settings.EXPOUSERPASS
2015-07-01 01:26:04 +01:00
expouseremail=settings.EXPOUSER_EMAIL
2011-07-11 02:10:22 +01:00
2020-04-27 23:51:41 +01:00
def reinit_db():
"""Rebuild database from scratch. Deletes the file first if sqlite is used,
otherwise it drops the database and creates it.
Note - initial loading of troggle.sqlite will already have populated the models
in memory (django python models, not the database), so there is already a full load
of stuff known. Deleting the db file does not clear memory.
2020-04-27 23:51:41 +01:00
"""
2020-06-12 18:10:07 +01:00
print("Reinitialising db ",end="")
print(django.db.connections.databases['default']['NAME'])
currentdbname = settings.DATABASES['default']['NAME']
if currentdbname == ':memory:':
# closing connections should wipe the in-memory database
django.db.close_old_connections()
for conn in django.db.connections.all():
print(" ! Closing another connection to db...")
conn.close()
elif django.db.connections.databases['default']['ENGINE'] == 'django.db.backends.sqlite3':
2020-06-12 18:10:07 +01:00
if os.path.isfile(currentdbname):
try:
print(" - deleting " + currentdbname)
os.remove(currentdbname)
except OSError:
2020-06-20 19:55:23 +01:00
print(" ! OSError on removing: " + currentdbname + "\n ! Is the file open in another app? Is the server running?\n")
2020-06-12 18:10:07 +01:00
raise
else:
print(" - No database file found: " + currentdbname + " ..continuing, will create it.\n")
2019-02-24 14:29:14 +00:00
else:
cursor = django.db.connection.cursor()
cursor.execute("DROP DATABASE %s" % currentdbname)
cursor.execute("CREATE DATABASE %s" % currentdbname)
cursor.execute("ALTER DATABASE %s CHARACTER SET=utf8" % currentdbname)
cursor.execute("USE %s" % currentdbname)
2011-07-11 02:10:22 +01:00
2020-06-12 18:10:07 +01:00
print(" - Migrating: " + django.db.connections.databases['default']['NAME'])
2020-06-17 22:55:51 +01:00
management.call_command('makemigrations','core', interactive=False)
management.call_command('makemigrations','flatpages', interactive=False)
management.call_command('migrate', interactive=False)
2020-06-17 22:55:51 +01:00
management.call_command('migrate','core', interactive=False)
management.call_command('migrate','flatpages', interactive=False)
print(" - done migration on: " + settings.DATABASES['default']['NAME'])
2020-06-12 18:10:07 +01:00
print("users in db already: ",len(User.objects.all()))
2020-06-07 16:16:35 +01:00
try:
2020-06-12 18:10:07 +01:00
print(" - Setting up admin user on: " + django.db.connections.databases['default']['NAME'])
print(" - user: {} ({:.5}...) <{}> ".format(expouser, expouserpass, expouseremail))
2020-06-07 16:16:35 +01:00
user = User.objects.create_user(expouser, expouseremail, expouserpass)
user.is_staff = True
user.is_superuser = True
user.save()
except:
print(" ! INTEGRITY ERROR user on: " + settings.DATABASES['default']['NAME'])
print(django.db.connections.databases['default']['NAME'])
2020-06-07 16:16:35 +01:00
print(" ! You probably have not got a clean db when you thought you had.\n")
print(" ! Also you are probably NOT running an in-memory db now.\n")
2020-06-12 18:10:07 +01:00
print("users in db: ",len(User.objects.all()))
print("tables in db: ",len(connection.introspection.table_names()))
memdumpsql(fn='integrityfail.sql')
django.db.connections.databases['default']['NAME'] = ':memory:'
#raise
def memdumpsql(fn):
djconn = django.db.connection
from dump import _iterdump
with open(fn, 'w') as f:
for line in _iterdump(djconn):
f.write('%s\n' % line.encode("utf8"))
return True
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2020-05-14 17:21:34 +01:00
# These functions moved to a different file - not used currently.
2020-06-30 15:22:41 +01:00
# import logbooksdump
# def import_auto_logbooks():
# def dumplogbooks():
2020-05-28 02:09:36 +01:00
2020-06-30 15:22:41 +01:00
# def writeCaves():
2020-06-01 00:42:48 +01:00
# Writes out all cave and entrance HTML files to
# folder specified in settings.CAVEDESCRIPTIONS
2020-05-28 02:09:36 +01:00
# for cave in Cave.objects.all():
# cave.writeDataFile()
# for entrance in Entrance.objects.all():
# entrance.writeDataFile()
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2020-04-16 20:36:42 +01:00
class JobQueue():
"""A list of import operations to run. Always reports profile times
2020-04-16 20:36:42 +01:00
in the same order.
"""
2020-06-06 22:51:55 +01:00
def __init__(self,run):
self.runlabel = run
self.queue = [] # tuples of (jobname, jobfunction)
self.results = {}
self.results_order=[
2020-04-27 23:51:41 +01:00
"date","runlabel","reinit", "caves", "people",
2020-06-16 16:07:36 +01:00
"logbooks", "QMs", "scans", "survex",
"drawings", "test" ]
for k in self.results_order:
self.results[k]=[]
2020-04-16 20:36:42 +01:00
self.tfile = "import_profile.json"
self.htmlfile = "profile.html" # for HTML results table. Not yet done.
2020-04-16 20:36:42 +01:00
#Adding elements to queue - enqueue
def enq(self,label,func):
self.queue.append((label,func))
return True
def loadprofiles(self):
"""Load timings for previous runs from file
"""
2020-04-16 20:36:42 +01:00
if os.path.isfile(self.tfile):
try:
2020-04-16 20:36:42 +01:00
f = open(self.tfile, "r")
data = json.load(f)
for j in data:
self.results[j] = data[j]
except:
2020-05-24 13:35:47 +01:00
print("FAILURE parsing JSON file %s" % (self.tfile))
# Python bug: https://github.com/ShinNoNoir/twitterwebsearch/issues/12
f.close()
2020-04-28 18:26:08 +01:00
for j in self.results_order:
self.results[j].append(None) # append a placeholder
return True
def saveprofiles(self):
with open(self.tfile, 'w') as f:
json.dump(self.results, f)
return True
def runqonce(self):
2020-05-14 17:21:34 +01:00
"""Run all the jobs in the queue provided - once
"""
print("** Running job ", self.runlabel,end=" to ")
print(django.db.connections.databases['default']['NAME'])
2020-04-16 20:36:42 +01:00
jobstart = time.time()
2020-06-30 15:22:41 +01:00
print("-- Initial memory in use {:.3f} MB".format(get_process_memory()))
2020-04-28 18:26:08 +01:00
self.results["date"].pop()
2020-04-16 20:36:42 +01:00
self.results["date"].append(jobstart)
2020-04-28 18:26:08 +01:00
self.results["runlabel"].pop()
2020-04-16 20:36:42 +01:00
self.results["runlabel"].append(self.runlabel)
2020-06-30 15:22:41 +01:00
for runfunction in self.queue:
2020-04-27 23:51:41 +01:00
start = time.time()
2020-06-30 15:22:41 +01:00
memstart = get_process_memory()
#--------------------
runfunction[1]() # invokes function passed in the second item in the tuple
#--------------------
memend = get_process_memory()
2020-04-27 23:51:41 +01:00
duration = time.time()-start
2020-06-30 15:22:41 +01:00
#print(" - MEMORY start:{:.3f} MB end:{:.3f} MB change={:.3f} MB".format(memstart,memend, ))
2020-07-01 22:49:38 +01:00
print("\n*- Ended \"", runfunction[0], "\" {:.1f} seconds + {:.3f} MB ({:.3f} MB)".format(duration, memend-memstart, memend))
2020-06-30 15:22:41 +01:00
self.results[runfunction[0]].pop() # the null item
self.results[runfunction[0]].append(duration)
2020-04-27 23:51:41 +01:00
2020-04-16 20:36:42 +01:00
jobend = time.time()
jobduration = jobend-jobstart
2020-05-24 13:35:47 +01:00
print("** Ended job %s - %.1f seconds total." % (self.runlabel,jobduration))
return True
2020-06-06 22:51:55 +01:00
def append_placeholders(self):
for j in self.results_order:
self.results[j].append(None) # append a placeholder
def run_now_django_tests(self,n):
management.call_command('test', verbosity=n)
django.db.close_old_connections()
def run(self):
self.loadprofiles()
2020-06-12 18:10:07 +01:00
print("-- start ", django.db.connections.databases['default']['ENGINE'], django.db.connections.databases['default']['NAME'])
2020-06-20 19:55:23 +01:00
self.runqonce()
if settings.DATABASES['default']['NAME'] ==":memory:":
memdumpsql('memdump.sql') # saved contents of in-memory db, could be imported later..
self.saveprofiles()
2020-04-16 20:36:42 +01:00
return True
def showprofile(self):
2020-05-14 17:21:34 +01:00
"""Prints out the time it took to run the jobqueue
"""
for k in self.results_order:
if k =="test":
2020-04-27 23:51:41 +01:00
break
2020-04-16 20:36:42 +01:00
elif k =="date":
2020-05-24 13:35:47 +01:00
print(" days ago ", end=' ')
2020-04-27 23:51:41 +01:00
else:
2020-05-24 13:35:47 +01:00
print('%10s (s)' % k, end=' ')
2020-04-27 23:51:41 +01:00
percen=0
r = self.results[k]
for i in range(len(r)):
if k == "runlabel":
if r[i]:
rp = r[i]
else:
rp = " - "
2020-05-24 13:35:47 +01:00
print('%8s' % rp, end=' ')
2020-04-27 23:51:41 +01:00
elif k =="date":
# Calculate dates as days before present
if r[i]:
if i == len(r)-1:
2020-05-24 13:35:47 +01:00
print(" this", end=' ')
2020-04-27 23:51:41 +01:00
else:
# prints one place to the left of where you expect
if r[len(r)-1]:
s = r[i]-r[len(r)-1]
elif r[len(r)-2]:
s = r[i]-r[len(r)-2]
else:
s = 0
days = (s)/(24*60*60)
2020-05-24 13:35:47 +01:00
print('%8.2f' % days, end=' ')
2020-04-27 23:51:41 +01:00
elif r[i]:
2020-05-24 13:35:47 +01:00
print('%8.1f' % r[i], end=' ')
2020-04-27 23:51:41 +01:00
if i == len(r)-1 and r[i-1]:
percen = 100* (r[i] - r[i-1])/r[i-1]
if abs(percen) >0.1:
2020-05-24 13:35:47 +01:00
print('%8.1f%%' % percen, end=' ')
else:
2020-05-24 13:35:47 +01:00
print(" - ", end=' ')
print("")
print("\n")
return True
2011-07-11 02:10:22 +01:00
def usage():
print("""Usage is 'python databaseReset.py <command> [runlabel]'
where command is:
2020-05-20 12:45:10 +01:00
test - testing... imports people and prints profile. Deletes nothing.
profile - print the profile from previous runs. Import nothing.
2020-04-27 23:51:41 +01:00
reset - normal usage: clear database and reread everything from files - time-consuming
2020-06-28 15:57:40 +01:00
init - initialisation. Automatic if you run reset.
caves - read in the caves (must run first after initialisation)
people - read in the people from folk.csv (must run after 'caves')
2020-04-27 23:51:41 +01:00
logbooks - read in the logbooks
QMs - read in the QM csv files (older caves only)
2020-06-01 00:42:48 +01:00
scans - the survey scans in all the wallets (must run before survex)
2020-06-16 16:07:36 +01:00
survex - read in the survex files - all the survex blocks and entrances x/y/z
drawings - read in the Tunnel & Therion files - which scans the survey scans too
2020-04-27 23:51:41 +01:00
2020-05-20 12:45:10 +01:00
autologbooks - Not used. read in autologbooks (what are these?)
dumplogbooks - Not used. write out autologbooks (not working?)
and [runlabel] is an optional string identifying this run of the script
in the stored profiling data 'import-profile.json'
2020-04-27 23:51:41 +01:00
caves and logbooks must be run on an empty db before the others as they
set up db tables used by the others.
""")
2011-07-11 02:10:22 +01:00
if __name__ == "__main__":
2020-04-14 20:46:45 +01:00
if os.geteuid() == 0:
print("Do not run as root or using sudo - file permissions for cache files and logs will break")
print("Aborting run.")
exit()
2020-04-27 23:51:41 +01:00
if len(sys.argv)>2:
runlabel = sys.argv[len(sys.argv)-1]
else:
runlabel=None
2020-07-01 22:49:38 +01:00
jq = JobQueue(runlabel)
2020-04-27 23:51:41 +01:00
if len(sys.argv)==1:
usage()
exit()
2020-06-28 15:57:40 +01:00
elif "init" in sys.argv:
jq.enq("reinit",reinit_db)
2020-04-27 23:51:41 +01:00
elif "test" in sys.argv:
2020-04-16 20:36:42 +01:00
jq.enq("caves",import_caves)
2020-07-07 01:35:58 +01:00
import_subcaves()
#jq.enq("people",import_people)
#jq.run_now_django_tests(2)
elif "caves" in sys.argv:
jq.enq("caves",import_caves)
2020-04-14 20:46:45 +01:00
elif "logbooks" in sys.argv:
jq.enq("logbooks",import_logbooks)
elif "people" in sys.argv:
2020-04-27 23:51:41 +01:00
jq.enq("people",import_people)
2011-07-11 02:10:22 +01:00
elif "QMs" in sys.argv:
jq.enq("QMs",import_QMs)
2011-07-11 02:10:22 +01:00
elif "reset" in sys.argv:
2020-04-27 23:51:41 +01:00
jq.enq("reinit",reinit_db)
jq.enq("caves",import_caves)
jq.enq("people",import_people)
jq.enq("scans",import_surveyscans)
jq.enq("logbooks",import_logbooks)
jq.enq("QMs",import_QMs)
jq.enq("drawings",import_drawingsfiles)
2020-06-16 16:07:36 +01:00
jq.enq("survex",import_survex)
2020-04-14 20:46:45 +01:00
elif "scans" in sys.argv:
jq.enq("scans",import_surveyscans)
2011-07-11 02:10:22 +01:00
elif "survex" in sys.argv:
2020-06-16 16:07:36 +01:00
jq.enq("survex",import_survex)
2020-06-30 15:22:41 +01:00
elif "loadpos" in sys.argv:
jq.enq("survex",import_loadpos)
elif "drawings" in sys.argv:
jq.enq("drawings",import_drawingsfiles)
2020-05-14 17:21:34 +01:00
elif "autologbooks" in sys.argv: # untested in 2020
2020-04-14 20:46:45 +01:00
import_auto_logbooks()
2020-05-14 17:21:34 +01:00
elif "dumplogbooks" in sys.argv: # untested in 2020
2020-04-14 20:46:45 +01:00
dumplogbooks()
2020-06-01 00:42:48 +01:00
# elif "writecaves" in sys.argv: # untested in 2020 - will overwrite input files!!
# writeCaves()
2020-05-20 12:18:12 +01:00
elif "profile" in sys.argv:
2020-05-24 13:35:47 +01:00
jq.loadprofiles()
2020-05-20 12:18:12 +01:00
jq.showprofile()
2020-05-24 13:35:47 +01:00
exit()
2020-05-20 12:45:10 +01:00
elif "help" in sys.argv:
usage()
exit()
2020-02-21 14:00:33 +00:00
else:
usage()
print("%s not recognised as a command." % sys.argv[1])
2020-04-27 23:51:41 +01:00
exit()
jq.run()
2020-04-16 20:36:42 +01:00
jq.showprofile()