2020-06-06 22:51:55 +01:00
import sys
2011-07-11 02:10:22 +01:00
import os
import time
2020-04-15 04:09:28 +01:00
import timeit
2020-05-24 13:35:47 +01:00
import json
2020-07-01 22:49:38 +01:00
import resource
2020-05-24 13:35:47 +01:00
2011-07-11 02:10:22 +01:00
import settings
2020-07-18 16:23:54 +01:00
""" Command-line utility for loading cave data files into troggle ' s database.
The command line options select which combination of classes of data will be imported ,
e . g . cave data , logbook data , cavers ( people ) data . The set of imports requested are put
into a job queue in a valid order , toegether with any necessary initialisation if it is
a complete reset , and the queue is then executed .
In future all these functions may be moved to a control panel webpage running within the
troggle application .
"""
2011-07-11 02:10:22 +01:00
os . environ [ ' PYTHONPATH ' ] = settings . PYTHON_PATH
2018-06-17 02:23:02 +01:00
os . environ . setdefault ( ' DJANGO_SETTINGS_MODULE ' , ' settings ' )
2020-05-24 13:35:47 +01:00
2020-06-17 22:55:51 +01:00
print ( " - settings on loading databaseReset.py " )
2020-06-16 16:07:36 +01:00
import django
2020-07-01 22:49:38 +01:00
print ( " - Memory footprint before loading Django: {:.3f} MB " . format ( resource . getrusage ( resource . RUSAGE_SELF ) [ 2 ] / 1024.0 ) )
2020-06-17 22:55:51 +01:00
try :
django . setup ( )
except :
print ( " ! Cyclic reference failure. Can occur when the initial db is empty. Fixed now (in UploadFileForm) but easy to reintroduce.. " )
raise
2020-07-01 22:49:38 +01:00
print ( " - Memory footprint after loading Django: {:.3f} MB " . format ( resource . getrusage ( resource . RUSAGE_SELF ) [ 2 ] / 1024.0 ) )
2020-06-17 22:55:51 +01:00
import troggle . flatpages . models
import troggle . core . models
import troggle . core . models_survex
2011-07-11 02:10:22 +01:00
from django . core import management
2020-06-08 00:11:09 +01:00
from django . db import connection , close_old_connections , connections
2011-07-11 02:10:22 +01:00
from django . http import HttpResponse
2020-06-18 21:50:16 +01:00
from django . urls import reverse
2020-06-17 22:55:51 +01:00
from django . contrib . auth . models import User
2020-06-08 00:11:09 +01:00
2020-06-30 15:22:41 +01:00
from troggle . core . models import get_process_memory
2020-05-28 04:54:53 +01:00
from troggle . core . models_caves import Cave , Entrance
2020-06-16 16:07:36 +01:00
from troggle . parsers . imports import import_caves , import_people , import_surveyscans , \
2020-07-07 01:35:58 +01:00
import_logbooks , import_QMs , import_survex , import_loadpos , import_drawingsfiles , \
import_subcaves
2020-06-17 22:55:51 +01:00
import troggle . logbooksdump
2020-06-06 22:51:55 +01:00
2020-05-27 01:04:37 +01:00
if os . geteuid ( ) == 0 :
# This protects the server from having the wrong file permissions written on logs and caches
print ( " This script should be run as expo not root - quitting " )
exit ( )
2015-07-01 01:18:25 +01:00
expouser = settings . EXPOUSER
expouserpass = settings . EXPOUSERPASS
2015-07-01 01:26:04 +01:00
expouseremail = settings . EXPOUSER_EMAIL
2011-07-11 02:10:22 +01:00
2020-04-27 23:51:41 +01:00
def reinit_db ( ) :
""" Rebuild database from scratch. Deletes the file first if sqlite is used,
otherwise it drops the database and creates it .
2020-06-08 00:11:09 +01:00
Note - initial loading of troggle . sqlite will already have populated the models
in memory ( django python models , not the database ) , so there is already a full load
of stuff known . Deleting the db file does not clear memory .
2020-04-27 23:51:41 +01:00
"""
2020-06-12 18:10:07 +01:00
print ( " Reinitialising db " , end = " " )
print ( django . db . connections . databases [ ' default ' ] [ ' NAME ' ] )
2020-04-30 23:15:57 +01:00
currentdbname = settings . DATABASES [ ' default ' ] [ ' NAME ' ]
2020-06-08 00:11:09 +01:00
if currentdbname == ' :memory: ' :
# closing connections should wipe the in-memory database
django . db . close_old_connections ( )
for conn in django . db . connections . all ( ) :
print ( " ! Closing another connection to db... " )
conn . close ( )
elif django . db . connections . databases [ ' default ' ] [ ' ENGINE ' ] == ' django.db.backends.sqlite3 ' :
2020-06-12 18:10:07 +01:00
if os . path . isfile ( currentdbname ) :
try :
print ( " - deleting " + currentdbname )
os . remove ( currentdbname )
except OSError :
2020-06-20 19:55:23 +01:00
print ( " ! OSError on removing: " + currentdbname + " \n ! Is the file open in another app? Is the server running? \n " )
2020-06-12 18:10:07 +01:00
raise
else :
print ( " - No database file found: " + currentdbname + " ..continuing, will create it. \n " )
2019-02-24 14:29:14 +00:00
else :
2020-06-08 00:11:09 +01:00
cursor = django . db . connection . cursor ( )
2020-04-30 23:15:57 +01:00
cursor . execute ( " DROP DATABASE %s " % currentdbname )
cursor . execute ( " CREATE DATABASE %s " % currentdbname )
cursor . execute ( " ALTER DATABASE %s CHARACTER SET=utf8 " % currentdbname )
cursor . execute ( " USE %s " % currentdbname )
2011-07-11 02:10:22 +01:00
2020-06-12 18:10:07 +01:00
print ( " - Migrating: " + django . db . connections . databases [ ' default ' ] [ ' NAME ' ] )
2020-06-08 00:11:09 +01:00
2020-06-17 22:55:51 +01:00
management . call_command ( ' makemigrations ' , ' core ' , interactive = False )
management . call_command ( ' makemigrations ' , ' flatpages ' , interactive = False )
2020-05-13 22:13:18 +01:00
management . call_command ( ' migrate ' , interactive = False )
2020-06-17 22:55:51 +01:00
management . call_command ( ' migrate ' , ' core ' , interactive = False )
management . call_command ( ' migrate ' , ' flatpages ' , interactive = False )
2020-06-08 00:11:09 +01:00
print ( " - done migration on: " + settings . DATABASES [ ' default ' ] [ ' NAME ' ] )
2020-06-12 18:10:07 +01:00
print ( " users in db already: " , len ( User . objects . all ( ) ) )
2020-06-07 16:16:35 +01:00
try :
2020-06-12 18:10:07 +01:00
print ( " - Setting up admin user on: " + django . db . connections . databases [ ' default ' ] [ ' NAME ' ] )
2020-06-08 00:11:09 +01:00
print ( " - user: {} ( {:.5} ...) < {} > " . format ( expouser , expouserpass , expouseremail ) )
2020-06-07 16:16:35 +01:00
user = User . objects . create_user ( expouser , expouseremail , expouserpass )
user . is_staff = True
user . is_superuser = True
user . save ( )
except :
print ( " ! INTEGRITY ERROR user on: " + settings . DATABASES [ ' default ' ] [ ' NAME ' ] )
2020-06-08 00:11:09 +01:00
print ( django . db . connections . databases [ ' default ' ] [ ' NAME ' ] )
2020-06-07 16:16:35 +01:00
print ( " ! You probably have not got a clean db when you thought you had. \n " )
2020-06-08 00:11:09 +01:00
print ( " ! Also you are probably NOT running an in-memory db now. \n " )
2020-06-12 18:10:07 +01:00
print ( " users in db: " , len ( User . objects . all ( ) ) )
print ( " tables in db: " , len ( connection . introspection . table_names ( ) ) )
2020-06-08 00:11:09 +01:00
memdumpsql ( fn = ' integrityfail.sql ' )
django . db . connections . databases [ ' default ' ] [ ' NAME ' ] = ' :memory: '
#raise
def memdumpsql ( fn ) :
djconn = django . db . connection
from dump import _iterdump
with open ( fn , ' w ' ) as f :
for line in _iterdump ( djconn ) :
f . write ( ' %s \n ' % line . encode ( " utf8 " ) )
return True
2020-04-14 20:19:41 +01:00
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2020-05-14 17:21:34 +01:00
# These functions moved to a different file - not used currently.
2020-06-30 15:22:41 +01:00
# import logbooksdump
# def import_auto_logbooks():
# def dumplogbooks():
2020-05-28 02:09:36 +01:00
2020-06-30 15:22:41 +01:00
# def writeCaves():
2020-06-01 00:42:48 +01:00
# Writes out all cave and entrance HTML files to
# folder specified in settings.CAVEDESCRIPTIONS
2020-05-28 02:09:36 +01:00
# for cave in Cave.objects.all():
# cave.writeDataFile()
# for entrance in Entrance.objects.all():
# entrance.writeDataFile()
2020-04-14 20:19:41 +01:00
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2020-04-15 04:09:28 +01:00
2020-04-16 20:36:42 +01:00
class JobQueue ( ) :
2020-04-30 23:15:57 +01:00
""" A list of import operations to run. Always reports profile times
2020-07-18 16:23:54 +01:00
of the import operations in the same order .
2020-04-16 20:36:42 +01:00
"""
2020-06-06 22:51:55 +01:00
2020-04-15 04:09:28 +01:00
def __init__ ( self , run ) :
self . runlabel = run
self . queue = [ ] # tuples of (jobname, jobfunction)
self . results = { }
self . results_order = [
2020-04-27 23:51:41 +01:00
" date " , " runlabel " , " reinit " , " caves " , " people " ,
2020-06-16 16:07:36 +01:00
" logbooks " , " QMs " , " scans " , " survex " ,
2020-06-30 15:52:29 +01:00
" drawings " , " test " ]
2020-04-15 04:09:28 +01:00
for k in self . results_order :
self . results [ k ] = [ ]
2020-04-16 20:36:42 +01:00
self . tfile = " import_profile.json "
2020-04-30 23:15:57 +01:00
self . htmlfile = " profile.html " # for HTML results table. Not yet done.
2020-04-15 04:09:28 +01:00
2020-04-16 20:36:42 +01:00
#Adding elements to queue - enqueue
2020-04-15 04:09:28 +01:00
def enq ( self , label , func ) :
self . queue . append ( ( label , func ) )
return True
2020-04-30 23:15:57 +01:00
def loadprofiles ( self ) :
2020-07-18 16:23:54 +01:00
""" Load timings for previous imports for each data import type
2020-04-30 23:15:57 +01:00
"""
2020-04-16 20:36:42 +01:00
if os . path . isfile ( self . tfile ) :
2020-04-15 04:09:28 +01:00
try :
2020-04-16 20:36:42 +01:00
f = open ( self . tfile , " r " )
2020-04-15 04:09:28 +01:00
data = json . load ( f )
for j in data :
self . results [ j ] = data [ j ]
except :
2020-05-24 13:35:47 +01:00
print ( " FAILURE parsing JSON file %s " % ( self . tfile ) )
2020-04-15 04:09:28 +01:00
# Python bug: https://github.com/ShinNoNoir/twitterwebsearch/issues/12
f . close ( )
2020-04-28 18:26:08 +01:00
for j in self . results_order :
self . results [ j ] . append ( None ) # append a placeholder
2020-04-30 23:15:57 +01:00
return True
def saveprofiles ( self ) :
2020-07-18 16:23:54 +01:00
""" Save timings for the set of imports just completed
"""
2020-04-30 23:15:57 +01:00
with open ( self . tfile , ' w ' ) as f :
json . dump ( self . results , f )
return True
def runqonce ( self ) :
2020-05-14 17:21:34 +01:00
""" Run all the jobs in the queue provided - once
2020-04-30 23:15:57 +01:00
"""
2020-06-08 00:11:09 +01:00
print ( " ** Running job " , self . runlabel , end = " to " )
print ( django . db . connections . databases [ ' default ' ] [ ' NAME ' ] )
2020-04-16 20:36:42 +01:00
jobstart = time . time ( )
2020-06-30 15:22:41 +01:00
print ( " -- Initial memory in use {:.3f} MB " . format ( get_process_memory ( ) ) )
2020-04-28 18:26:08 +01:00
self . results [ " date " ] . pop ( )
2020-04-16 20:36:42 +01:00
self . results [ " date " ] . append ( jobstart )
2020-04-28 18:26:08 +01:00
self . results [ " runlabel " ] . pop ( )
2020-04-16 20:36:42 +01:00
self . results [ " runlabel " ] . append ( self . runlabel )
2020-06-30 15:22:41 +01:00
for runfunction in self . queue :
2020-04-27 23:51:41 +01:00
start = time . time ( )
2020-06-30 15:22:41 +01:00
memstart = get_process_memory ( )
#--------------------
runfunction [ 1 ] ( ) # invokes function passed in the second item in the tuple
#--------------------
memend = get_process_memory ( )
2020-04-27 23:51:41 +01:00
duration = time . time ( ) - start
2020-06-30 15:22:41 +01:00
#print(" - MEMORY start:{:.3f} MB end:{:.3f} MB change={:.3f} MB".format(memstart,memend, ))
2020-07-01 22:49:38 +01:00
print ( " \n *- Ended \" " , runfunction [ 0 ] , " \" {:.1f} seconds + {:.3f} MB ( {:.3f} MB) " . format ( duration , memend - memstart , memend ) )
2020-06-30 15:22:41 +01:00
self . results [ runfunction [ 0 ] ] . pop ( ) # the null item
self . results [ runfunction [ 0 ] ] . append ( duration )
2020-04-27 23:51:41 +01:00
2020-04-16 20:36:42 +01:00
jobend = time . time ( )
jobduration = jobend - jobstart
2020-05-24 13:35:47 +01:00
print ( " ** Ended job %s - %.1f seconds total. " % ( self . runlabel , jobduration ) )
2020-04-30 23:15:57 +01:00
return True
2020-06-06 22:51:55 +01:00
def append_placeholders ( self ) :
for j in self . results_order :
self . results [ j ] . append ( None ) # append a placeholder
def run_now_django_tests ( self , n ) :
2020-07-18 16:23:54 +01:00
""" Runs the standard django test harness system which is in troggle/core/TESTS/tests.py
"""
2020-06-06 22:51:55 +01:00
management . call_command ( ' test ' , verbosity = n )
django . db . close_old_connections ( )
2020-04-30 23:15:57 +01:00
def run ( self ) :
2020-07-18 16:23:54 +01:00
""" Initialises profile timings record, initiates relational database, runs the job queue saving the imported data as an SQL image and saves the timing profile data.
"""
2020-04-30 23:15:57 +01:00
self . loadprofiles ( )
2020-06-12 18:10:07 +01:00
print ( " -- start " , django . db . connections . databases [ ' default ' ] [ ' ENGINE ' ] , django . db . connections . databases [ ' default ' ] [ ' NAME ' ] )
2020-06-20 19:55:23 +01:00
self . runqonce ( )
if settings . DATABASES [ ' default ' ] [ ' NAME ' ] == " :memory: " :
memdumpsql ( ' memdump.sql ' ) # saved contents of in-memory db, could be imported later..
self . saveprofiles ( )
2020-04-16 20:36:42 +01:00
return True
def showprofile ( self ) :
2020-05-14 17:21:34 +01:00
""" Prints out the time it took to run the jobqueue
"""
2020-04-15 23:29:59 +01:00
for k in self . results_order :
2020-06-08 00:11:09 +01:00
if k == " test " :
2020-04-27 23:51:41 +01:00
break
2020-04-16 20:36:42 +01:00
elif k == " date " :
2020-05-24 13:35:47 +01:00
print ( " days ago " , end = ' ' )
2020-04-27 23:51:41 +01:00
else :
2020-05-24 13:35:47 +01:00
print ( ' %10s (s) ' % k , end = ' ' )
2020-04-27 23:51:41 +01:00
percen = 0
r = self . results [ k ]
for i in range ( len ( r ) ) :
if k == " runlabel " :
if r [ i ] :
rp = r [ i ]
else :
rp = " - "
2020-05-24 13:35:47 +01:00
print ( ' %8s ' % rp , end = ' ' )
2020-04-27 23:51:41 +01:00
elif k == " date " :
# Calculate dates as days before present
if r [ i ] :
if i == len ( r ) - 1 :
2020-05-24 13:35:47 +01:00
print ( " this " , end = ' ' )
2020-04-27 23:51:41 +01:00
else :
# prints one place to the left of where you expect
2020-05-26 02:21:03 +01:00
if r [ len ( r ) - 1 ] :
s = r [ i ] - r [ len ( r ) - 1 ]
2020-06-08 00:11:09 +01:00
elif r [ len ( r ) - 2 ] :
s = r [ i ] - r [ len ( r ) - 2 ]
2020-05-26 02:21:03 +01:00
else :
s = 0
days = ( s ) / ( 24 * 60 * 60 )
2020-05-24 13:35:47 +01:00
print ( ' %8.2f ' % days , end = ' ' )
2020-04-27 23:51:41 +01:00
elif r [ i ] :
2020-05-24 13:35:47 +01:00
print ( ' %8.1f ' % r [ i ] , end = ' ' )
2020-04-27 23:51:41 +01:00
if i == len ( r ) - 1 and r [ i - 1 ] :
percen = 100 * ( r [ i ] - r [ i - 1 ] ) / r [ i - 1 ]
if abs ( percen ) > 0.1 :
2020-05-24 13:35:47 +01:00
print ( ' %8.1f %% ' % percen , end = ' ' )
2020-04-30 23:15:57 +01:00
else :
2020-05-24 13:35:47 +01:00
print ( " - " , end = ' ' )
print ( " " )
print ( " \n " )
2020-04-15 04:09:28 +01:00
return True
2011-07-11 02:10:22 +01:00
2013-06-24 01:31:14 +01:00
def usage ( ) :
2020-04-15 23:29:59 +01:00
print ( """ Usage is ' python databaseReset.py <command> [runlabel] '
2013-06-24 01:31:14 +01:00
where command is :
2020-05-20 12:45:10 +01:00
test - testing . . . imports people and prints profile . Deletes nothing .
profile - print the profile from previous runs . Import nothing .
2020-04-27 23:51:41 +01:00
reset - normal usage : clear database and reread everything from files - time - consuming
2020-06-28 15:57:40 +01:00
init - initialisation . Automatic if you run reset .
2020-06-08 00:11:09 +01:00
caves - read in the caves ( must run first after initialisation )
people - read in the people from folk . csv ( must run after ' caves ' )
2020-04-27 23:51:41 +01:00
logbooks - read in the logbooks
QMs - read in the QM csv files ( older caves only )
2020-06-01 00:42:48 +01:00
scans - the survey scans in all the wallets ( must run before survex )
2020-06-30 15:52:29 +01:00
drawings - read in the Tunnel & Therion files - which scans the survey scans too
2020-07-20 22:53:26 +01:00
survex - read in the survex files - all the survex blocks and entrances x / y / z
2020-04-27 23:51:41 +01:00
2020-05-20 12:45:10 +01:00
autologbooks - Not used . read in autologbooks ( what are these ? )
dumplogbooks - Not used . write out autologbooks ( not working ? )
2020-04-15 23:29:59 +01:00
and [ runlabel ] is an optional string identifying this run of the script
in the stored profiling data ' import-profile.json '
2020-06-08 00:11:09 +01:00
2020-04-27 23:51:41 +01:00
caves and logbooks must be run on an empty db before the others as they
set up db tables used by the others .
2019-02-24 13:03:34 +00:00
""" )
2012-06-10 14:59:21 +01:00
2011-07-11 02:10:22 +01:00
if __name__ == " __main__ " :
2020-04-14 20:46:45 +01:00
2020-05-26 02:21:03 +01:00
if os . geteuid ( ) == 0 :
print ( " Do not run as root or using sudo - file permissions for cache files and logs will break " )
print ( " Aborting run. " )
exit ( )
2020-04-27 23:51:41 +01:00
if len ( sys . argv ) > 2 :
runlabel = sys . argv [ len ( sys . argv ) - 1 ]
else :
runlabel = None
2020-07-01 22:49:38 +01:00
2020-04-15 04:09:28 +01:00
jq = JobQueue ( runlabel )
2020-04-27 23:51:41 +01:00
if len ( sys . argv ) == 1 :
usage ( )
exit ( )
2020-06-28 15:57:40 +01:00
elif " init " in sys . argv :
jq . enq ( " reinit " , reinit_db )
2020-04-27 23:51:41 +01:00
elif " test " in sys . argv :
2020-04-16 20:36:42 +01:00
jq . enq ( " caves " , import_caves )
2020-07-20 22:53:26 +01:00
jq . enq ( " people " , import_people )
2020-07-20 13:04:30 +01:00
elif " test2 " in sys . argv :
jq . enq ( " QMs " , import_QMs )
jq . enq ( " drawings " , import_drawingsfiles )
jq . enq ( " survex " , import_survex )
2020-04-15 04:09:28 +01:00
elif " caves " in sys . argv :
jq . enq ( " caves " , import_caves )
2020-04-14 20:46:45 +01:00
elif " logbooks " in sys . argv :
2020-04-15 04:09:28 +01:00
jq . enq ( " logbooks " , import_logbooks )
2013-07-02 00:34:58 +01:00
elif " people " in sys . argv :
2020-04-27 23:51:41 +01:00
jq . enq ( " people " , import_people )
2011-07-11 02:10:22 +01:00
elif " QMs " in sys . argv :
2020-04-15 04:09:28 +01:00
jq . enq ( " QMs " , import_QMs )
2011-07-11 02:10:22 +01:00
elif " reset " in sys . argv :
2020-04-27 23:51:41 +01:00
jq . enq ( " reinit " , reinit_db )
2020-04-15 04:09:28 +01:00
jq . enq ( " caves " , import_caves )
2020-04-15 23:29:59 +01:00
jq . enq ( " people " , import_people )
2020-04-15 04:09:28 +01:00
jq . enq ( " scans " , import_surveyscans )
jq . enq ( " logbooks " , import_logbooks )
jq . enq ( " QMs " , import_QMs )
2020-06-30 15:52:29 +01:00
jq . enq ( " drawings " , import_drawingsfiles )
2020-06-16 16:07:36 +01:00
jq . enq ( " survex " , import_survex )
2020-04-14 20:46:45 +01:00
elif " scans " in sys . argv :
2020-04-15 04:09:28 +01:00
jq . enq ( " scans " , import_surveyscans )
2011-07-11 02:10:22 +01:00
elif " survex " in sys . argv :
2020-06-16 16:07:36 +01:00
jq . enq ( " survex " , import_survex )
2020-06-30 15:22:41 +01:00
elif " loadpos " in sys . argv :
jq . enq ( " survex " , import_loadpos )
2020-06-30 15:52:29 +01:00
elif " drawings " in sys . argv :
jq . enq ( " drawings " , import_drawingsfiles )
2020-05-14 17:21:34 +01:00
elif " autologbooks " in sys . argv : # untested in 2020
2020-04-14 20:46:45 +01:00
import_auto_logbooks ( )
2020-05-14 17:21:34 +01:00
elif " dumplogbooks " in sys . argv : # untested in 2020
2020-04-14 20:46:45 +01:00
dumplogbooks ( )
2020-06-01 00:42:48 +01:00
# elif "writecaves" in sys.argv: # untested in 2020 - will overwrite input files!!
# writeCaves()
2020-05-20 12:18:12 +01:00
elif " profile " in sys . argv :
2020-05-24 13:35:47 +01:00
jq . loadprofiles ( )
2020-05-20 12:18:12 +01:00
jq . showprofile ( )
2020-05-24 13:35:47 +01:00
exit ( )
2020-05-20 12:45:10 +01:00
elif " help " in sys . argv :
usage ( )
exit ( )
2020-02-21 14:00:33 +00:00
else :
2013-06-24 01:31:14 +01:00
usage ( )
2020-05-26 02:21:03 +01:00
print ( " %s not recognised as a command. " % sys . argv [ 1 ] )
2020-04-27 23:51:41 +01:00
exit ( )
2020-04-15 04:09:28 +01:00
jq . run ( )
2020-04-16 20:36:42 +01:00
jq . showprofile ( )