Dumps loaded data into a .sql file

This commit is contained in:
Philip Sargent 2020-04-16 20:36:42 +01:00
parent e5c288c764
commit b123f6ada7
3 changed files with 140 additions and 75 deletions

View File

@ -11,6 +11,7 @@ from django.http import HttpResponse
from django.core.urlresolvers import reverse from django.core.urlresolvers import reverse
from troggle.core.models import Cave, Entrance from troggle.core.models import Cave, Entrance
import troggle.flatpages.models import troggle.flatpages.models
import json
databasename=settings.DATABASES['default']['NAME'] databasename=settings.DATABASES['default']['NAME']
expouser=settings.EXPOUSER expouser=settings.EXPOUSER
@ -61,7 +62,7 @@ def import_survex():
def import_QMs(): def import_QMs():
import parsers.QMs import parsers.QMs
# import process runs on qm.csv in only 3 caves, not 264! # import process itself runs on qm.csv in only 3 caves, not 264!
def import_surveys(): def import_surveys():
import parsers.surveys import parsers.surveys
@ -80,21 +81,6 @@ def pageredirects():
f = troggle.flatpages.models.Redirect(originalURL = oldURL, newURL = newURL) f = troggle.flatpages.models.Redirect(originalURL = oldURL, newURL = newURL)
f.save() f.save()
def reset(): # unused now that we have a jobqueue
""" Wipe the troggle database and import everything from legacy data
"""
reload_db()
make_dirs()
pageredirects()
import_caves()
import_people()
import_surveyscans()
import_logbooks()
import_QMs()
import_survex()
import_tunnelfiles()
import_surveys()
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def import_auto_logbooks(): def import_auto_logbooks():
import parsers.logbooks import parsers.logbooks
@ -149,12 +135,11 @@ def dumplogbooks():
f.write(unicode(output).encode( "utf-8" )) f.write(unicode(output).encode( "utf-8" ))
f.close() f.close()
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
class JobQueue(): class JobQueue():
""" A list of import operations to run. Always reports times """A list of import operations to run. Always reports times
in the same order """ in the same order.
"""
#Constructor creates a list
def __init__(self,run): def __init__(self,run):
self.runlabel = run self.runlabel = run
self.queue = [] # tuples of (jobname, jobfunction) self.queue = [] # tuples of (jobname, jobfunction)
@ -165,81 +150,88 @@ class JobQueue():
"tunnel", "surveys", "test", "makedirs", "redirect" ] "tunnel", "surveys", "test", "makedirs", "redirect" ]
for k in self.results_order: for k in self.results_order:
self.results[k]=[] self.results[k]=[]
self.tfile = "import_profile.json"
self.htmlfile = "profile.html"
#Adding elements to queue #Adding elements to queue - enqueue
def enq(self,label,func): def enq(self,label,func):
self.queue.append((label,func)) self.queue.append((label,func))
return True return True
#Removing the last element from the queue #Removing the last element from the queue - dequeue
def deq(self): # def deq(self):
if len(self.queue)>0: # if len(self.queue)>0:
return self.queue.pop() # return self.queue.pop()
return ("Queue Empty!") # return ("Queue Empty!")
def size(self):
return len(self.queue)
def run(self): def run(self):
import json if os.path.isfile(self.tfile):
tfile = "import_profile.json"
if os.path.isfile(tfile):
try: try:
f = open(tfile, "r") f = open(self.tfile, "r")
data = json.load(f) data = json.load(f)
for j in data: for j in data:
self.results[j] = data[j] self.results[j] = data[j]
except: except:
print "FAILURE parsing JSON file %s" % (tfile) print "FAILURE parsing JSON file %s" % (self.tfile)
# Python bug: https://github.com/ShinNoNoir/twitterwebsearch/issues/12 # Python bug: https://github.com/ShinNoNoir/twitterwebsearch/issues/12
f.close() f.close()
for i in self.queue:
print i, self.results[i[0]]
self.results[i[0]].append(1.0)
print "** Running job ", self.runlabel print "** Running job ", self.runlabel
jobstart = time.time()
self.results["date"].append(jobstart)
self.results["runlabel"].append(self.runlabel)
for i in self.queue: for i in self.queue:
#print "*- Running \"", i[0], "\""
start = time.time() start = time.time()
i[1]() i[1]() # looks ugly but invokes function passed in the second item in the tuple
duration = time.time()-start duration = time.time()-start
print "\n*- Ended \"", i[0], "\" %.1f seconds" % duration print "\n*- Ended \"", i[0], "\" %.1f seconds" % duration
self.results[i[0]].append(duration) self.results[i[0]].append(duration)
self.results["date"].append(start)
self.results["runlabel"].append(self.runlabel)
print "** Ended all jobs."
#print self.results
with open(tfile, 'w') as f: with open(self.tfile, 'w') as f:
json.dump(self.results, f) json.dump(self.results, f)
for k in self.results_order:
percen=0
if k == "runlabel":
pass
if k =="date":
# Calculate dates as days before present to one decimal place
pass
elif len(self.results[k])>3:
lst = self.results[k]
e = len(lst)-1
percen = 100* (lst[e] - lst[e-1])/lst[e-1]
if abs(percen) >0.1:
print '%15s %8.1f%%' % (k, percen)
else:
print '%15s ' % (k)
jobend = time.time()
jobduration = jobend-jobstart
print "** Ended all jobs. %.1f seconds" % jobduration
# currently uses django db whatever it was. CHANGE this to explicitly use
# a new sqlite3 db and then import the sql dump of that into the troggle db
# instead of loading directly into the troggle sqlite db.
# in-menmor ":memory:" sqlite is ~ 7x faster and all of troggle can be
# loaded in 6 minutes that way
djconn = django.db.connection
from dump import _iterdump
with open('memdump.sql', 'w') as f:
for line in _iterdump(djconn):
f.write('%s\n' % line.encode("utf8"))
# now import the memory image sql into (to do)
return True
def showprofile(self):
"""Prints out the time it took to run the jobqueue"""
for k in self.results_order:
percen=0
lst = self.results[k]
if k == "runlabel":
r = lst[len(lst)-1]
print '%15s %s' % (k,r)
elif k =="date":
# Calculate dates as days before present to one decimal place
r = lst[len(lst)-1]
if len(lst)>2:
days = (lst[len(lst)-2]-r)/(24*60*60)
print '%15s %8.1f days ago' % (k,days)
elif len(lst)>2:
e = len(lst)-1
percen = 100* (lst[e] - lst[e-1])/lst[e-1]
if abs(percen) >0.1:
print '%15s %8.1f%%' % (k, percen)
else:
print '%15s ' % (k)
return True return True
def importtest():
from random import randrange
k = 0
for i in range(5+randrange(15)):
for j in range(i):
k += i
#print k,
return True
def usage(): def usage():
print("""Usage is 'python databaseReset.py <command> [runlabel]' print("""Usage is 'python databaseReset.py <command> [runlabel]'
@ -277,9 +269,12 @@ if __name__ == "__main__":
jq = JobQueue(runlabel) jq = JobQueue(runlabel)
if "test" in sys.argv: if "test" in sys.argv:
jq.enq("test",importtest) jq.enq("reload",reload_db)
jq.enq("caves",importtest) jq.enq("makedirs",make_dirs)
jq.enq("people",importtest) jq.enq("caves",import_caves)
jq.enq("survex",import_survex)
jq.enq("surveys",import_surveys)
elif "caves" in sys.argv: elif "caves" in sys.argv:
jq.enq("caves",import_caves) jq.enq("caves",import_caves)
elif "logbooks" in sys.argv: elif "logbooks" in sys.argv:
@ -335,3 +330,4 @@ if __name__ == "__main__":
usage() usage()
jq.run() jq.run()
jq.showprofile()

69
dump.py Normal file
View File

@ -0,0 +1,69 @@
# Mimic the sqlite3 console shell's .dump command
# Author: Paul Kippes <kippesp@gmail.com>
# Every identifier in sql is quoted based on a comment in sqlite
# documentation "SQLite adds new keywords from time to time when it
# takes on new features. So to prevent your code from being broken by
# future enhancements, you should normally quote any identifier that
# is an English language word, even if you do not have to."
def _iterdump(connection):
"""
Returns an iterator to the dump of the database in an SQL text format.
Used to produce an SQL dump of the database. Useful to save an in-memory
database for later restoration. This function should not be called
directly but instead called from the Connection method, iterdump().
"""
cu = connection.cursor()
yield('BEGIN TRANSACTION;')
# sqlite_master table contains the SQL CREATE statements for the database.
q = """
SELECT "name", "type", "sql"
FROM "sqlite_master"
WHERE "sql" NOT NULL AND
"type" == 'table'
ORDER BY "name"
"""
schema_res = cu.execute(q)
for table_name, type, sql in schema_res.fetchall():
if table_name == 'sqlite_sequence':
yield('DELETE FROM "sqlite_sequence";')
elif table_name == 'sqlite_stat1':
yield('ANALYZE "sqlite_master";')
elif table_name.startswith('sqlite_'):
continue
# NOTE: Virtual table support not implemented
#elif sql.startswith('CREATE VIRTUAL TABLE'):
# qtable = table_name.replace("'", "''")
# yield("INSERT INTO sqlite_master(type,name,tbl_name,rootpage,sql)"\
# "VALUES('table','{0}','{0}',0,'{1}');".format(
# qtable,
# sql.replace("''")))
else:
yield('{0};'.format(sql))
# Build the insert statement for each row of the current table
table_name_ident = table_name.replace('"', '""')
res = cu.execute('PRAGMA table_info("{0}")'.format(table_name_ident))
column_names = [str(table_info[1]) for table_info in res.fetchall()]
q = """SELECT 'INSERT INTO "{0}" VALUES({1})' FROM "{0}";""".format(
table_name_ident,
",".join("""'||quote("{0}")||'""".format(col.replace('"', '""')) for col in column_names))
query_res = cu.execute(q)
for row in query_res:
yield(row[0]) # '{0}'.format(row[0]) had unicode errors
# Now when the type is 'index', 'trigger', or 'view'
q = """
SELECT "name", "type", "sql"
FROM "sqlite_master"
WHERE "sql" NOT NULL AND
"type" IN ('index', 'trigger', 'view')
"""
schema_res = cu.execute(q)
for name, type, sql in schema_res.fetchall():
yield('{0};'.format(sql))
yield('COMMIT;')

View File

@ -401,4 +401,4 @@ def LoadPos():
ss.z = float(z) ss.z = float(z)
ss.save() ss.save()
except: except:
print("%s not parsed in survex %s" % (name, pos)) print "%s not parsed in survex %s.pos" % (name, settings.SURVEX_TOPNAME)