From b123f6ada7ef78526878ac84c259c93df4bfa9e3 Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Thu, 16 Apr 2020 20:36:42 +0100 Subject: [PATCH] Dumps loaded data into a .sql file --- databaseReset.py | 144 ++++++++++++++++++++++------------------------ dump.py | 69 ++++++++++++++++++++++ parsers/survex.py | 2 +- 3 files changed, 140 insertions(+), 75 deletions(-) create mode 100644 dump.py diff --git a/databaseReset.py b/databaseReset.py index 4b45f36..9adfa4a 100644 --- a/databaseReset.py +++ b/databaseReset.py @@ -11,6 +11,7 @@ from django.http import HttpResponse from django.core.urlresolvers import reverse from troggle.core.models import Cave, Entrance import troggle.flatpages.models +import json databasename=settings.DATABASES['default']['NAME'] expouser=settings.EXPOUSER @@ -61,7 +62,7 @@ def import_survex(): def import_QMs(): import parsers.QMs - # import process runs on qm.csv in only 3 caves, not 264! + # import process itself runs on qm.csv in only 3 caves, not 264! def import_surveys(): import parsers.surveys @@ -80,21 +81,6 @@ def pageredirects(): f = troggle.flatpages.models.Redirect(originalURL = oldURL, newURL = newURL) f.save() -def reset(): # unused now that we have a jobqueue - """ Wipe the troggle database and import everything from legacy data - """ - reload_db() - make_dirs() - pageredirects() - import_caves() - import_people() - import_surveyscans() - import_logbooks() - import_QMs() - import_survex() - import_tunnelfiles() - import_surveys() - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def import_auto_logbooks(): import parsers.logbooks @@ -149,12 +135,11 @@ def dumplogbooks(): f.write(unicode(output).encode( "utf-8" )) f.close() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + class JobQueue(): - """ A list of import operations to run. Always reports times - in the same order """ - - - #Constructor creates a list + """A list of import operations to run. Always reports times + in the same order. + """ def __init__(self,run): self.runlabel = run self.queue = [] # tuples of (jobname, jobfunction) @@ -165,81 +150,88 @@ class JobQueue(): "tunnel", "surveys", "test", "makedirs", "redirect" ] for k in self.results_order: self.results[k]=[] + self.tfile = "import_profile.json" + self.htmlfile = "profile.html" - #Adding elements to queue + #Adding elements to queue - enqueue def enq(self,label,func): self.queue.append((label,func)) return True - #Removing the last element from the queue - def deq(self): - if len(self.queue)>0: - return self.queue.pop() - return ("Queue Empty!") - - def size(self): - return len(self.queue) + #Removing the last element from the queue - dequeue + # def deq(self): + # if len(self.queue)>0: + # return self.queue.pop() + # return ("Queue Empty!") def run(self): - import json - tfile = "import_profile.json" - if os.path.isfile(tfile): + if os.path.isfile(self.tfile): try: - f = open(tfile, "r") + f = open(self.tfile, "r") data = json.load(f) for j in data: self.results[j] = data[j] except: - print "FAILURE parsing JSON file %s" % (tfile) + print "FAILURE parsing JSON file %s" % (self.tfile) # Python bug: https://github.com/ShinNoNoir/twitterwebsearch/issues/12 f.close() - for i in self.queue: - print i, self.results[i[0]] - self.results[i[0]].append(1.0) - print "** Running job ", self.runlabel + jobstart = time.time() + self.results["date"].append(jobstart) + self.results["runlabel"].append(self.runlabel) + for i in self.queue: - #print "*- Running \"", i[0], "\"" start = time.time() - i[1]() + i[1]() # looks ugly but invokes function passed in the second item in the tuple duration = time.time()-start print "\n*- Ended \"", i[0], "\" %.1f seconds" % duration self.results[i[0]].append(duration) - self.results["date"].append(start) - self.results["runlabel"].append(self.runlabel) - print "** Ended all jobs." - #print self.results - with open(tfile, 'w') as f: - json.dump(self.results, f) - - for k in self.results_order: - percen=0 - if k == "runlabel": - pass - if k =="date": - # Calculate dates as days before present to one decimal place - pass - elif len(self.results[k])>3: - lst = self.results[k] - e = len(lst)-1 - percen = 100* (lst[e] - lst[e-1])/lst[e-1] - if abs(percen) >0.1: - print '%15s %8.1f%%' % (k, percen) - else: - print '%15s ' % (k) + with open(self.tfile, 'w') as f: + json.dump(self.results, f) + jobend = time.time() + jobduration = jobend-jobstart + print "** Ended all jobs. %.1f seconds" % jobduration + + # currently uses django db whatever it was. CHANGE this to explicitly use + # a new sqlite3 db and then import the sql dump of that into the troggle db + # instead of loading directly into the troggle sqlite db. + # in-menmor ":memory:" sqlite is ~ 7x faster and all of troggle can be + # loaded in 6 minutes that way + djconn = django.db.connection + from dump import _iterdump + with open('memdump.sql', 'w') as f: + for line in _iterdump(djconn): + f.write('%s\n' % line.encode("utf8")) + + # now import the memory image sql into (to do) + return True + + def showprofile(self): + """Prints out the time it took to run the jobqueue""" + for k in self.results_order: + percen=0 + lst = self.results[k] + if k == "runlabel": + r = lst[len(lst)-1] + print '%15s %s' % (k,r) + elif k =="date": + # Calculate dates as days before present to one decimal place + r = lst[len(lst)-1] + if len(lst)>2: + days = (lst[len(lst)-2]-r)/(24*60*60) + print '%15s %8.1f days ago' % (k,days) + elif len(lst)>2: + e = len(lst)-1 + percen = 100* (lst[e] - lst[e-1])/lst[e-1] + if abs(percen) >0.1: + print '%15s %8.1f%%' % (k, percen) + else: + print '%15s ' % (k) return True -def importtest(): - from random import randrange - k = 0 - for i in range(5+randrange(15)): - for j in range(i): - k += i - #print k, - return True def usage(): print("""Usage is 'python databaseReset.py [runlabel]' @@ -277,9 +269,12 @@ if __name__ == "__main__": jq = JobQueue(runlabel) if "test" in sys.argv: - jq.enq("test",importtest) - jq.enq("caves",importtest) - jq.enq("people",importtest) + jq.enq("reload",reload_db) + jq.enq("makedirs",make_dirs) + jq.enq("caves",import_caves) + jq.enq("survex",import_survex) + jq.enq("surveys",import_surveys) + elif "caves" in sys.argv: jq.enq("caves",import_caves) elif "logbooks" in sys.argv: @@ -335,3 +330,4 @@ if __name__ == "__main__": usage() jq.run() + jq.showprofile() diff --git a/dump.py b/dump.py new file mode 100644 index 0000000..9206690 --- /dev/null +++ b/dump.py @@ -0,0 +1,69 @@ +# Mimic the sqlite3 console shell's .dump command +# Author: Paul Kippes + +# Every identifier in sql is quoted based on a comment in sqlite +# documentation "SQLite adds new keywords from time to time when it +# takes on new features. So to prevent your code from being broken by +# future enhancements, you should normally quote any identifier that +# is an English language word, even if you do not have to." + +def _iterdump(connection): + """ + Returns an iterator to the dump of the database in an SQL text format. + Used to produce an SQL dump of the database. Useful to save an in-memory + database for later restoration. This function should not be called + directly but instead called from the Connection method, iterdump(). + """ + + cu = connection.cursor() + yield('BEGIN TRANSACTION;') + + # sqlite_master table contains the SQL CREATE statements for the database. + q = """ + SELECT "name", "type", "sql" + FROM "sqlite_master" + WHERE "sql" NOT NULL AND + "type" == 'table' + ORDER BY "name" + """ + schema_res = cu.execute(q) + for table_name, type, sql in schema_res.fetchall(): + if table_name == 'sqlite_sequence': + yield('DELETE FROM "sqlite_sequence";') + elif table_name == 'sqlite_stat1': + yield('ANALYZE "sqlite_master";') + elif table_name.startswith('sqlite_'): + continue + # NOTE: Virtual table support not implemented + #elif sql.startswith('CREATE VIRTUAL TABLE'): + # qtable = table_name.replace("'", "''") + # yield("INSERT INTO sqlite_master(type,name,tbl_name,rootpage,sql)"\ + # "VALUES('table','{0}','{0}',0,'{1}');".format( + # qtable, + # sql.replace("''"))) + else: + yield('{0};'.format(sql)) + + # Build the insert statement for each row of the current table + table_name_ident = table_name.replace('"', '""') + res = cu.execute('PRAGMA table_info("{0}")'.format(table_name_ident)) + column_names = [str(table_info[1]) for table_info in res.fetchall()] + q = """SELECT 'INSERT INTO "{0}" VALUES({1})' FROM "{0}";""".format( + table_name_ident, + ",".join("""'||quote("{0}")||'""".format(col.replace('"', '""')) for col in column_names)) + query_res = cu.execute(q) + for row in query_res: + yield(row[0]) # '{0}'.format(row[0]) had unicode errors + + # Now when the type is 'index', 'trigger', or 'view' + q = """ + SELECT "name", "type", "sql" + FROM "sqlite_master" + WHERE "sql" NOT NULL AND + "type" IN ('index', 'trigger', 'view') + """ + schema_res = cu.execute(q) + for name, type, sql in schema_res.fetchall(): + yield('{0};'.format(sql)) + + yield('COMMIT;') \ No newline at end of file diff --git a/parsers/survex.py b/parsers/survex.py index 3cda53a..3d78c59 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -401,4 +401,4 @@ def LoadPos(): ss.z = float(z) ss.save() except: - print("%s not parsed in survex %s" % (name, pos)) + print "%s not parsed in survex %s.pos" % (name, settings.SURVEX_TOPNAME) \ No newline at end of file