forked from expo/troggle
490 lines
19 KiB
Python
490 lines
19 KiB
Python
import sys
|
|
import os
|
|
import time
|
|
import timeit
|
|
import json
|
|
|
|
import settings
|
|
os.environ['PYTHONPATH'] = settings.PYTHON_PATH
|
|
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'settings')
|
|
|
|
print(" - settings on loading databaseReset.py")
|
|
|
|
|
|
import django
|
|
print(" - django.setup - next")
|
|
try:
|
|
django.setup()
|
|
except:
|
|
print(" ! Cyclic reference failure. Can occur when the initial db is empty. Fixed now (in UploadFileForm) but easy to reintroduce..")
|
|
raise
|
|
print(" - django.setup - done")
|
|
|
|
import troggle.flatpages.models
|
|
import troggle.core.models
|
|
import troggle.core.models_survex
|
|
|
|
from django.core import management
|
|
from django.db import connection, close_old_connections, connections
|
|
from django.http import HttpResponse
|
|
from django.urls import reverse
|
|
from django.contrib.auth.models import User
|
|
|
|
from troggle.core.models_caves import Cave, Entrance
|
|
from troggle.parsers.imports import import_caves, import_people, import_surveyscans, \
|
|
import_logbooks, import_QMs, import_survex, import_tunnelfiles
|
|
import troggle.logbooksdump
|
|
|
|
|
|
# NOTE databaseReset.py is *imported* by views_other.py as it is used in the control panel
|
|
# presented there.
|
|
|
|
if os.geteuid() == 0:
|
|
# This protects the server from having the wrong file permissions written on logs and caches
|
|
print("This script should be run as expo not root - quitting")
|
|
exit()
|
|
|
|
dbengine = ""
|
|
dbname = ""
|
|
dbdefault =""
|
|
|
|
expouser=settings.EXPOUSER
|
|
expouserpass=settings.EXPOUSERPASS
|
|
expouseremail=settings.EXPOUSER_EMAIL
|
|
|
|
|
|
def reinit_db():
|
|
"""Rebuild database from scratch. Deletes the file first if sqlite is used,
|
|
otherwise it drops the database and creates it.
|
|
Note - initial loading of troggle.sqlite will already have populated the models
|
|
in memory (django python models, not the database), so there is already a full load
|
|
of stuff known. Deleting the db file does not clear memory.
|
|
"""
|
|
print("Reinitialising db ",end="")
|
|
print(django.db.connections.databases['default']['NAME'])
|
|
currentdbname = settings.DATABASES['default']['NAME']
|
|
if currentdbname == ':memory:':
|
|
# closing connections should wipe the in-memory database
|
|
django.db.close_old_connections()
|
|
for conn in django.db.connections.all():
|
|
print(" ! Closing another connection to db...")
|
|
conn.close()
|
|
elif django.db.connections.databases['default']['ENGINE'] == 'django.db.backends.sqlite3':
|
|
if os.path.isfile(currentdbname):
|
|
try:
|
|
print(" - deleting " + currentdbname)
|
|
os.remove(currentdbname)
|
|
except OSError:
|
|
print(" ! OSError on removing: " + currentdbname + " (Is the file open in another app? Is the server running?\n")
|
|
raise
|
|
else:
|
|
print(" - No database file found: " + currentdbname + " ..continuing, will create it.\n")
|
|
else:
|
|
cursor = django.db.connection.cursor()
|
|
cursor.execute("DROP DATABASE %s" % currentdbname)
|
|
cursor.execute("CREATE DATABASE %s" % currentdbname)
|
|
cursor.execute("ALTER DATABASE %s CHARACTER SET=utf8" % currentdbname)
|
|
cursor.execute("USE %s" % currentdbname)
|
|
|
|
#Sync user - needed after reload
|
|
print(" - Migrating: " + django.db.connections.databases['default']['NAME'])
|
|
|
|
management.call_command('makemigrations','core', interactive=False)
|
|
management.call_command('makemigrations','flatpages', interactive=False)
|
|
management.call_command('migrate', interactive=False)
|
|
management.call_command('migrate','core', interactive=False)
|
|
management.call_command('migrate','flatpages', interactive=False)
|
|
|
|
print(" - done migration on: " + settings.DATABASES['default']['NAME'])
|
|
print("users in db already: ",len(User.objects.all()))
|
|
try:
|
|
print(" - Setting up admin user on: " + django.db.connections.databases['default']['NAME'])
|
|
print(" - user: {} ({:.5}...) <{}> ".format(expouser, expouserpass, expouseremail))
|
|
user = User.objects.create_user(expouser, expouseremail, expouserpass)
|
|
user.is_staff = True
|
|
user.is_superuser = True
|
|
user.save()
|
|
except:
|
|
print(" ! INTEGRITY ERROR user on: " + settings.DATABASES['default']['NAME'])
|
|
print(django.db.connections.databases['default']['NAME'])
|
|
print(" ! You probably have not got a clean db when you thought you had.\n")
|
|
print(" ! Also you are probably NOT running an in-memory db now.\n")
|
|
print("users in db: ",len(User.objects.all()))
|
|
print("tables in db: ",len(connection.introspection.table_names()))
|
|
memdumpsql(fn='integrityfail.sql')
|
|
django.db.connections.databases['default']['NAME'] = ':memory:'
|
|
#raise
|
|
|
|
def memdumpsql(fn):
|
|
djconn = django.db.connection
|
|
from dump import _iterdump
|
|
with open(fn, 'w') as f:
|
|
for line in _iterdump(djconn):
|
|
f.write('%s\n' % line.encode("utf8"))
|
|
return True
|
|
|
|
def store_dbsettings():
|
|
global dbengine
|
|
global dbname
|
|
global dbdefault
|
|
dbengine = settings.DATABASES['default']['ENGINE']
|
|
dbname = settings.DATABASES['default']['NAME']
|
|
dbdefault = settings.DATABASES['default']
|
|
|
|
def restore_dbsettings():
|
|
settings.DATABASES['default'] = dbdefault
|
|
settings.DATABASES['default']['ENGINE'] = dbengine
|
|
settings.DATABASES['default']['NAME'] = dbname
|
|
django.db.connections.databases['default'] = dbdefault
|
|
django.db.connections.databases['default']['ENGINE'] = dbengine
|
|
django.db.connections.databases['default']['NAME'] = dbname
|
|
|
|
def set_in_memory_dbsettings():
|
|
django.db.close_old_connections() # needed if MySQL running?
|
|
for conn in django.db.connections.all():
|
|
print(" ! Closing another connection to db in set_in_memory_dbsettings")
|
|
conn.close()
|
|
settings.DATABASES['default'] = {'ENGINE': 'django.db.backends.sqlite3',
|
|
'AUTOCOMMIT': True,
|
|
'ATOMIC_REQUESTS': False,
|
|
'NAME': ':memory:',
|
|
'CONN_MAX_AGE': 0,
|
|
'TIME_ZONE': 'UTC',
|
|
'OPTIONS': {},
|
|
'HOST': '',
|
|
'USER': '',
|
|
'TEST': {'COLLATION': None, 'CHARSET': None, 'NAME': None, 'MIRROR': None},
|
|
'PASSWORD': '',
|
|
'PORT': ''}
|
|
settings.DATABASES['default']['ENGINE'] = 'django.db.backends.sqlite3'
|
|
settings.DATABASES['default']['NAME'] = ':memory:'
|
|
django.db.connections.databases['default']['ENGINE'] = 'django.db.backends.sqlite3'
|
|
django.db.connections.databases['default']['NAME'] = ':memory:'
|
|
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
# These functions moved to a different file - not used currently.
|
|
#import logbooksdump
|
|
#def import_auto_logbooks():
|
|
#def dumplogbooks():
|
|
|
|
#def writeCaves():
|
|
# Writes out all cave and entrance HTML files to
|
|
# folder specified in settings.CAVEDESCRIPTIONS
|
|
# for cave in Cave.objects.all():
|
|
# cave.writeDataFile()
|
|
# for entrance in Entrance.objects.all():
|
|
# entrance.writeDataFile()
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
class JobQueue():
|
|
"""A list of import operations to run. Always reports profile times
|
|
in the same order.
|
|
"""
|
|
|
|
def __init__(self,run):
|
|
self.runlabel = run
|
|
self.queue = [] # tuples of (jobname, jobfunction)
|
|
self.results = {}
|
|
self.results_order=[
|
|
"date","runlabel","reinit", "caves", "people",
|
|
"logbooks", "QMs", "scans", "survex",
|
|
"tunnel", "test" ]
|
|
for k in self.results_order:
|
|
self.results[k]=[]
|
|
self.tfile = "import_profile.json"
|
|
self.htmlfile = "profile.html" # for HTML results table. Not yet done.
|
|
|
|
#Adding elements to queue - enqueue
|
|
def enq(self,label,func):
|
|
self.queue.append((label,func))
|
|
return True
|
|
|
|
#Removing the last element from the queue - dequeue
|
|
# def deq(self):
|
|
# if len(self.queue)>0:
|
|
# return self.queue.pop()
|
|
# return ("Queue Empty!")
|
|
|
|
def loadprofiles(self):
|
|
"""Load timings for previous runs from file
|
|
"""
|
|
if os.path.isfile(self.tfile):
|
|
try:
|
|
f = open(self.tfile, "r")
|
|
data = json.load(f)
|
|
for j in data:
|
|
self.results[j] = data[j]
|
|
except:
|
|
print("FAILURE parsing JSON file %s" % (self.tfile))
|
|
# Python bug: https://github.com/ShinNoNoir/twitterwebsearch/issues/12
|
|
f.close()
|
|
for j in self.results_order:
|
|
self.results[j].append(None) # append a placeholder
|
|
return True
|
|
|
|
def saveprofiles(self):
|
|
with open(self.tfile, 'w') as f:
|
|
json.dump(self.results, f)
|
|
return True
|
|
|
|
def runqonce(self):
|
|
"""Run all the jobs in the queue provided - once
|
|
"""
|
|
|
|
print("** Running job ", self.runlabel,end=" to ")
|
|
print(django.db.connections.databases['default']['NAME'])
|
|
jobstart = time.time()
|
|
self.results["date"].pop()
|
|
self.results["date"].append(jobstart)
|
|
self.results["runlabel"].pop()
|
|
self.results["runlabel"].append(self.runlabel)
|
|
|
|
for i in self.queue:
|
|
start = time.time()
|
|
i[1]() # looks ugly but invokes function passed in the second item in the tuple
|
|
duration = time.time()-start
|
|
print("\n*- Ended \"", i[0], "\" %.1f seconds" % duration)
|
|
self.results[i[0]].pop() # the null item
|
|
self.results[i[0]].append(duration)
|
|
|
|
|
|
jobend = time.time()
|
|
jobduration = jobend-jobstart
|
|
print("** Ended job %s - %.1f seconds total." % (self.runlabel,jobduration))
|
|
return True
|
|
|
|
|
|
def append_placeholders(self):
|
|
for j in self.results_order:
|
|
self.results[j].append(None) # append a placeholder
|
|
|
|
def run_now_django_tests(self,n):
|
|
store_dbsettings()
|
|
# this leaves the db set to :memory: whatever it was initially
|
|
management.call_command('test', verbosity=n)
|
|
django.db.close_old_connections()
|
|
restore_dbsettings()
|
|
# and whatever I do, it stays that way !
|
|
|
|
def skip_memory_phase(self):
|
|
if not self.runlabel:
|
|
return True
|
|
else:
|
|
if self.runlabel == "" or self.runlabel[0:2] == "F-":
|
|
return True
|
|
return False
|
|
|
|
def run(self):
|
|
"""First runs all the jobs in the queue against a scratch in-memory db
|
|
then re-runs the import against the db specified in settings.py
|
|
Default behaviour is to skip the in-memory phase.
|
|
When MySQL is the db the in-memory phase crashes as MySQL does not properly
|
|
relinquish some kind of db connection (not fixed yet)
|
|
"""
|
|
self.loadprofiles()
|
|
store_dbsettings()
|
|
|
|
print("-- start ", django.db.connections.databases['default']['ENGINE'], django.db.connections.databases['default']['NAME'])
|
|
|
|
if dbname ==":memory:":
|
|
# just run, and save the sql file
|
|
self.runqonce()
|
|
memdumpsql('memdump.sql') # saved contents of scratch db, could be imported later..
|
|
self.saveprofiles()
|
|
elif self.skip_memory_phase():
|
|
self.runqonce()
|
|
self.saveprofiles()
|
|
else:
|
|
# run all the imports through :memory: first
|
|
set_in_memory_dbsettings()
|
|
|
|
print("-- phase 1 ", settings.DATABASES['default']['ENGINE'], settings.DATABASES['default']['NAME'])
|
|
|
|
# the jobqueue may not start from scratch so we need to initialise the db properly first
|
|
# because we are using an empty :memory: database
|
|
# But initiating twice crashes it; so be sure to do it once only.
|
|
|
|
# Damn. migrate() is still calling MySQL somehow **conn_params not sqlite3. So crashes on expo server.
|
|
if ("reinit",reinit_db) not in self.queue:
|
|
reinit_db()
|
|
if ("caves",import_caves) not in self.queue:
|
|
import_caves() # sometime extract the initialising code from this and put in reinit...
|
|
if ("people",import_people) not in self.queue:
|
|
import_people() # sometime extract the initialising code from this and put in reinit...
|
|
|
|
django.db.close_old_connections() # maybe not needed here
|
|
|
|
self.runqonce()
|
|
memdumpsql('memdump2.sql')
|
|
self.showprofile()
|
|
|
|
# restore the original db and import again
|
|
# if we wanted to, we could re-import the SQL generated in the first pass to be
|
|
# blazing fast. But for the present just re-import the lot.
|
|
restore_dbsettings()
|
|
print("-- phase 2 ", settings.DATABASES['default']['ENGINE'], settings.DATABASES['default']['NAME'])
|
|
print(django.db.connections.databases['default']['NAME'])
|
|
|
|
django.db.close_old_connections() # maybe not needed here
|
|
for j in self.results_order:
|
|
self.results[j].pop() # throw away results from :memory: run
|
|
self.append_placeholders()
|
|
|
|
django.db.close_old_connections()
|
|
#django.setup() # should this be needed?
|
|
|
|
self.runqonce()
|
|
self.saveprofiles()
|
|
|
|
return True
|
|
|
|
def showprofile(self):
|
|
"""Prints out the time it took to run the jobqueue
|
|
"""
|
|
for k in self.results_order:
|
|
if k =="test":
|
|
break
|
|
elif k =="date":
|
|
print(" days ago ", end=' ')
|
|
else:
|
|
print('%10s (s)' % k, end=' ')
|
|
percen=0
|
|
r = self.results[k]
|
|
|
|
for i in range(len(r)):
|
|
if k == "runlabel":
|
|
if r[i]:
|
|
rp = r[i]
|
|
else:
|
|
rp = " - "
|
|
print('%8s' % rp, end=' ')
|
|
elif k =="date":
|
|
# Calculate dates as days before present
|
|
if r[i]:
|
|
if i == len(r)-1:
|
|
print(" this", end=' ')
|
|
else:
|
|
# prints one place to the left of where you expect
|
|
if r[len(r)-1]:
|
|
s = r[i]-r[len(r)-1]
|
|
elif r[len(r)-2]:
|
|
s = r[i]-r[len(r)-2]
|
|
else:
|
|
s = 0
|
|
days = (s)/(24*60*60)
|
|
print('%8.2f' % days, end=' ')
|
|
elif r[i]:
|
|
print('%8.1f' % r[i], end=' ')
|
|
if i == len(r)-1 and r[i-1]:
|
|
percen = 100* (r[i] - r[i-1])/r[i-1]
|
|
if abs(percen) >0.1:
|
|
print('%8.1f%%' % percen, end=' ')
|
|
else:
|
|
print(" - ", end=' ')
|
|
print("")
|
|
print("\n")
|
|
return True
|
|
|
|
|
|
def usage():
|
|
print("""Usage is 'python databaseReset.py <command> [runlabel]'
|
|
where command is:
|
|
test - testing... imports people and prints profile. Deletes nothing.
|
|
profile - print the profile from previous runs. Import nothing.
|
|
|
|
reset - normal usage: clear database and reread everything from files - time-consuming
|
|
caves - read in the caves (must run first after initialisation)
|
|
people - read in the people from folk.csv (must run after 'caves')
|
|
logbooks - read in the logbooks
|
|
QMs - read in the QM csv files (older caves only)
|
|
scans - the survey scans in all the wallets (must run before survex)
|
|
survex - read in the survex files - all the survex blocks and entrances x/y/z
|
|
tunnel - read in the Tunnel files - which scans the survey scans too
|
|
|
|
autologbooks - Not used. read in autologbooks (what are these?)
|
|
dumplogbooks - Not used. write out autologbooks (not working?)
|
|
|
|
and [runlabel] is an optional string identifying this run of the script
|
|
in the stored profiling data 'import-profile.json'
|
|
|
|
if [runlabel] is absent or begins with "F-" then it will skip the :memory: pass
|
|
|
|
caves and logbooks must be run on an empty db before the others as they
|
|
set up db tables used by the others.
|
|
|
|
the commands are first run on an in-memory empty database before being run on
|
|
the actual persistent database. This is very fast and checks for import errors.
|
|
|
|
the initial in-memory phase is on an empty db, so always runs caves & people for this phase
|
|
""")
|
|
|
|
if __name__ == "__main__":
|
|
|
|
if os.geteuid() == 0:
|
|
print("Do not run as root or using sudo - file permissions for cache files and logs will break")
|
|
print("Aborting run.")
|
|
exit()
|
|
if len(sys.argv)>2:
|
|
runlabel = sys.argv[len(sys.argv)-1]
|
|
else:
|
|
runlabel=None
|
|
|
|
jq = JobQueue(runlabel)
|
|
|
|
if len(sys.argv)==1:
|
|
usage()
|
|
exit()
|
|
elif "test" in sys.argv:
|
|
jq.enq("caves",import_caves)
|
|
jq.enq("people",import_people)
|
|
#jq.run_now_django_tests(2)
|
|
elif "caves" in sys.argv:
|
|
jq.enq("caves",import_caves)
|
|
elif "logbooks" in sys.argv:
|
|
jq.enq("logbooks",import_logbooks)
|
|
elif "people" in sys.argv:
|
|
jq.enq("people",import_people)
|
|
elif "QMs" in sys.argv:
|
|
jq.enq("QMs",import_QMs)
|
|
elif "reset" in sys.argv:
|
|
jq.enq("reinit",reinit_db)
|
|
jq.enq("caves",import_caves)
|
|
jq.enq("people",import_people)
|
|
jq.enq("scans",import_surveyscans)
|
|
jq.enq("logbooks",import_logbooks)
|
|
jq.enq("QMs",import_QMs)
|
|
jq.enq("tunnel",import_tunnelfiles)
|
|
jq.enq("survex",import_survex)
|
|
elif "scans" in sys.argv:
|
|
jq.enq("scans",import_surveyscans)
|
|
elif "survex" in sys.argv:
|
|
jq.enq("survex",import_survex)
|
|
elif "tunnel" in sys.argv:
|
|
jq.enq("tunnel",import_tunnelfiles)
|
|
elif "autologbooks" in sys.argv: # untested in 2020
|
|
import_auto_logbooks()
|
|
elif "dumplogbooks" in sys.argv: # untested in 2020
|
|
dumplogbooks()
|
|
# elif "writecaves" in sys.argv: # untested in 2020 - will overwrite input files!!
|
|
# writeCaves()
|
|
elif "profile" in sys.argv:
|
|
jq.loadprofiles()
|
|
jq.showprofile()
|
|
exit()
|
|
elif "help" in sys.argv:
|
|
usage()
|
|
exit()
|
|
else:
|
|
usage()
|
|
print("%s not recognised as a command." % sys.argv[1])
|
|
exit()
|
|
|
|
store_dbsettings()
|
|
#set_in_memory_dbsettings()
|
|
|
|
#set_in_memory_dbsettings() # seems to be ignored. Appears to be set but in reality.
|
|
#jq.run_now_django_tests(1) # actually does set db to :memory: - but invisibly !
|
|
|
|
jq.run()
|
|
jq.showprofile()
|