Dumps loaded data into a .sql file

2020-04-16 20:36:42 +01:00
parent e5c288c764
commit b123f6ada7
3 changed files with 140 additions and 75 deletions
--- a/databaseReset.py
+++ b/databaseReset.py
@@ -11,6 +11,7 @@ from django.http import HttpResponse
 from django.core.urlresolvers import reverse
 from troggle.core.models import Cave, Entrance
 import troggle.flatpages.models
 import json
 databasename=settings.DATABASES['default']['NAME']
 expouser=settings.EXPOUSER
@@ -61,7 +62,7 @@ def import_survex():
 def import_QMs():
    import parsers.QMs
-    # import process runs on qm.csv in only 3 caves, not 264!
+    # import process itself runs on qm.csv in only 3 caves, not 264!
 def import_surveys():
    import parsers.surveys
@@ -80,21 +81,6 @@ def pageredirects():
        f = troggle.flatpages.models.Redirect(originalURL = oldURL, newURL = newURL)
        f.save()
 def reset(): # unused now that we have a jobqueue
    """ Wipe the troggle database and import everything from legacy data
    """
    reload_db()
    make_dirs()
    pageredirects()
    import_caves()
    import_people()
    import_surveyscans()
    import_logbooks()
    import_QMs()
    import_survex()
    import_tunnelfiles()
    import_surveys()
 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
 def import_auto_logbooks():
    import parsers.logbooks
@@ -149,12 +135,11 @@ def dumplogbooks():
            f.write(unicode(output).encode( "utf-8" ))
            f.close()
 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
 class JobQueue():
-    """ A list of import operations to run. Always reports times
+    """A list of import operations to run. Always reports times
-    in the same order """
+    in the same order. 
-
+    """
    #Constructor creates a list
    def __init__(self,run):
        self.runlabel = run
        self.queue = [] # tuples of (jobname, jobfunction)
@@ -165,81 +150,88 @@ class JobQueue():
            "tunnel", "surveys", "test", "makedirs", "redirect" ]
        for k in self.results_order:
            self.results[k]=[]
        self.tfile = "import_profile.json"
        self.htmlfile = "profile.html"
-    #Adding elements to queue
+    #Adding elements to queue - enqueue
    def enq(self,label,func):
        self.queue.append((label,func))
        return True
-    #Removing the last element from the queue
+    #Removing the last element from the queue - dequeue
-    def deq(self):
+    # def deq(self):
-        if len(self.queue)>0:
+    #     if len(self.queue)>0:
-            return self.queue.pop()
+    #         return self.queue.pop()
-        return ("Queue Empty!")
+    #     return ("Queue Empty!")
    def size(self):
        return len(self.queue)
    def run(self):
-        import json
+        if os.path.isfile(self.tfile):
        tfile = "import_profile.json"
        if os.path.isfile(tfile):
            try:
-                f = open(tfile, "r")
+                f = open(self.tfile, "r")
                data = json.load(f)
                for j in data:
                    self.results[j] = data[j]
            except:
-                print "FAILURE parsing JSON file %s" % (tfile)
+                print "FAILURE parsing JSON file %s" % (self.tfile)
                # Python bug: https://github.com/ShinNoNoir/twitterwebsearch/issues/12
            f.close()
        for i in self.queue:
            print i, self.results[i[0]]
            self.results[i[0]].append(1.0) 
        print "** Running job ", self.runlabel
        jobstart = time.time()
        self.results["date"].append(jobstart)
        self.results["runlabel"].append(self.runlabel)
        for i in self.queue:
           #print "*- Running \"", i[0], "\""
           start = time.time()
-           i[1]()
+           i[1]()    #  looks ugly but invokes function passed in the second item in the tuple
           duration = time.time()-start
           print "\n*- Ended \"",  i[0], "\"  %.1f seconds" % duration
           self.results[i[0]].append(duration)
        self.results["date"].append(start)
        self.results["runlabel"].append(self.runlabel)
        print "** Ended all jobs."
        #print self.results
-        with open(tfile, 'w') as f:
+        with open(self.tfile, 'w') as f:
-            json.dump(self.results, f)
+            json.dump(self.results, f)     
        for k in self.results_order:
            percen=0
            if k == "runlabel":     
                pass
            if k =="date":
                # Calculate dates as days before present to one decimal place
                pass
            elif len(self.results[k])>3:        
                lst = self.results[k]
                e = len(lst)-1
                percen = 100* (lst[e] - lst[e-1])/lst[e-1]
            if abs(percen) >0.1:
                print '%15s %8.1f%%' % (k,  percen)
            else:
                print '%15s ' % (k)
        jobend = time.time()
        jobduration = jobend-jobstart
        print "** Ended all jobs. %.1f seconds" % jobduration
        # currently uses django db whatever it was. CHANGE this to explicitly use
        # a new sqlite3 db and then import the sql dump of that into the troggle db
        # instead of loading directly into the troggle sqlite db.
        # in-menmor ":memory:" sqlite is ~ 7x faster and all of troggle can be
        # loaded in 6 minutes that way
        djconn = django.db.connection
        from dump import _iterdump
        with open('memdump.sql', 'w') as f:
            for line in _iterdump(djconn):
                f.write('%s\n' % line.encode("utf8"))
        # now import the memory image sql into   (to do)   
        return True
    def showprofile(self):
        """Prints out the time it took to run the jobqueue"""
        for k in self.results_order:
            percen=0
            lst = self.results[k]  
            if k == "runlabel": 
                r =   lst[len(lst)-1]
                print '%15s %s' % (k,r)
            elif k =="date":
                # Calculate dates as days before present to one decimal place
                r =   lst[len(lst)-1]
                if len(lst)>2:
                    days = (lst[len(lst)-2]-r)/(24*60*60)
                    print '%15s %8.1f days ago' % (k,days)
            elif len(lst)>2:        
                e = len(lst)-1
                percen = 100* (lst[e] - lst[e-1])/lst[e-1]
                if abs(percen) >0.1:
                    print '%15s %8.1f%%' % (k,  percen)
                else:
                    print '%15s ' % (k)
        return True
 def importtest():
    from  random import randrange
    k = 0
    for i  in range(5+randrange(15)):
        for j in range(i):
            k += i
        #print k,
    return True
 def usage():
    print("""Usage is 'python databaseReset.py <command> [runlabel]'
@@ -277,9 +269,12 @@ if __name__ == "__main__":
    jq = JobQueue(runlabel)
    if "test" in sys.argv:
-        jq.enq("test",importtest)
+        jq.enq("reload",reload_db)
-        jq.enq("caves",importtest)
+        jq.enq("makedirs",make_dirs)
-        jq.enq("people",importtest)
+        jq.enq("caves",import_caves)
        jq.enq("survex",import_survex)
        jq.enq("surveys",import_surveys)
    elif "caves" in sys.argv:
        jq.enq("caves",import_caves)
    elif "logbooks" in sys.argv:
@@ -335,3 +330,4 @@ if __name__ == "__main__":
        usage()
    jq.run()
    jq.showprofile()
--- a/dump.py
+++ b/dump.py
@@ -0,0 +1,69 @@
 # Mimic the sqlite3 console shell's .dump command
 # Author: Paul Kippes <kippesp@gmail.com>
 # Every identifier in sql is quoted based on a comment in sqlite
 # documentation "SQLite adds new keywords from time to time when it
 # takes on new features. So to prevent your code from being broken by
 # future enhancements, you should normally quote any identifier that
 # is an English language word, even if you do not have to."
 def _iterdump(connection):
    """
    Returns an iterator to the dump of the database in an SQL text format.
    Used to produce an SQL dump of the database.  Useful to save an in-memory
    database for later restoration.  This function should not be called
    directly but instead called from the Connection method, iterdump().
    """
    cu = connection.cursor()
    yield('BEGIN TRANSACTION;')
    # sqlite_master table contains the SQL CREATE statements for the database.
    q = """
        SELECT "name", "type", "sql"
        FROM "sqlite_master"
            WHERE "sql" NOT NULL AND
            "type" == 'table'
            ORDER BY "name"
        """
    schema_res = cu.execute(q)
    for table_name, type, sql in schema_res.fetchall():
        if table_name == 'sqlite_sequence':
            yield('DELETE FROM "sqlite_sequence";')
        elif table_name == 'sqlite_stat1':
            yield('ANALYZE "sqlite_master";')
        elif table_name.startswith('sqlite_'):
            continue
        # NOTE: Virtual table support not implemented
        #elif sql.startswith('CREATE VIRTUAL TABLE'):
        #    qtable = table_name.replace("'", "''")
        #    yield("INSERT INTO sqlite_master(type,name,tbl_name,rootpage,sql)"\
        #        "VALUES('table','{0}','{0}',0,'{1}');".format(
        #        qtable,
        #        sql.replace("''")))
        else:
            yield('{0};'.format(sql))
        # Build the insert statement for each row of the current table
        table_name_ident = table_name.replace('"', '""')
        res = cu.execute('PRAGMA table_info("{0}")'.format(table_name_ident))
        column_names = [str(table_info[1]) for table_info in res.fetchall()]
        q = """SELECT 'INSERT INTO "{0}" VALUES({1})' FROM "{0}";""".format(
            table_name_ident,
            ",".join("""'||quote("{0}")||'""".format(col.replace('"', '""')) for col in column_names))
        query_res = cu.execute(q)
        for row in query_res:
            yield(row[0]) # '{0}'.format(row[0]) had unicode errors
    # Now when the type is 'index', 'trigger', or 'view'
    q = """
        SELECT "name", "type", "sql"
        FROM "sqlite_master"
            WHERE "sql" NOT NULL AND
            "type" IN ('index', 'trigger', 'view')
        """
    schema_res = cu.execute(q)
    for name, type, sql in schema_res.fetchall():
        yield('{0};'.format(sql))
    yield('COMMIT;')
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -401,4 +401,4 @@ def LoadPos():
                ss.z = float(z)
                ss.save()
            except:
-                print("%s not parsed in survex %s" % (name, pos))
+                print "%s not parsed in survex %s.pos" % (name, settings.SURVEX_TOPNAME)