troggle-unchained/databaseReset.py

from __future__ import (absolute_import, division,
                        print_function)
import os
import time
import timeit
import json

import settings
os.environ['PYTHONPATH'] = settings.PYTHON_PATH
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'settings')

from django.core import management
from django.db import connection, close_old_connections
from django.contrib.auth.models import User
from django.http import HttpResponse
from django.core.urlresolvers import reverse

from troggle.core.models import Cave, Entrance
import troggle.settings
import troggle.flatpages.models
import troggle.logbooksdump

# NOTE databaseReset.py is *imported* by views_other.py as it is used in the control panel
# presented there.

expouser=settings.EXPOUSER
expouserpass=settings.EXPOUSERPASS
expouseremail=settings.EXPOUSER_EMAIL

def reinit_db():
    """Rebuild database from scratch. Deletes the file first if sqlite is used,
    otherwise it drops the database and creates it.
    """
    currentdbname = settings.DATABASES['default']['NAME']
    if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.sqlite3':
        try:
            os.remove(currentdbname)
        except OSError:
            pass
    else:
        cursor = connection.cursor()
        cursor.execute("DROP DATABASE %s" % currentdbname)
        cursor.execute("CREATE DATABASE %s" % currentdbname)
        cursor.execute("ALTER DATABASE %s CHARACTER SET=utf8" % currentdbname)
        cursor.execute("USE %s" % currentdbname)
    syncuser()

def syncuser():
    """Sync user - needed after reload
    """
    print("Synchronizing user")
    management.call_command('migrate', interactive=False)
    user = User.objects.create_user(expouser, expouseremail, expouserpass)
    user.is_staff = True
    user.is_superuser = True
    user.save()

def dirsredirect():
    """Make directories that troggle requires and sets up page redirects
    """
    #should also deal with permissions here.
    #if not os.path.isdir(settings.PHOTOS_ROOT):
        #os.mkdir(settings.PHOTOS_ROOT)
    for oldURL, newURL in [("indxal.htm", reverse("caveindex"))]:
        f = troggle.flatpages.models.Redirect(originalURL = oldURL, newURL = newURL)
        f.save()

def import_caves():
    import troggle.parsers.caves
    print("Importing Caves")
    troggle.parsers.caves.readcaves()

def import_people():
    import troggle.parsers.people
    print("Importing People (folk.csv)")
    troggle.parsers.people.LoadPersonsExpos()

def import_logbooks():
    import troggle.parsers.logbooks
    print("Importing Logbooks")
    troggle.parsers.logbooks.LoadLogbooks()

def import_QMs():
    print("Importing QMs (old caves)")
    import troggle.parsers.QMs
    # import process itself runs on qm.csv in only 3 old caves, not the modern ones!

def import_survexblks():
    import troggle.parsers.survex
    print("Importing Survex Blocks")
    troggle.parsers.survex.LoadAllSurvexBlocks()

def import_survexpos():
    import troggle.parsers.survex
    print("Importing Survex x/y/z Positions")
    troggle.parsers.survex.LoadPos()

def import_surveyimgs():
    """This appears to store data in unused objects. The code is kept
    for future re-working to manage progress against notes, plans and elevs.
    """
    #import troggle.parsers.surveys
    print("NOT Importing survey images")
    #troggle.parsers.surveys.parseSurveys(logfile=settings.LOGFILE)

def import_surveyscans():
    import troggle.parsers.surveys
    print("Importing Survey Scans")
    troggle.parsers.surveys.LoadListScans()

def import_tunnelfiles():
    import troggle.parsers.surveys
    print("Importing Tunnel files")
    troggle.parsers.surveys.LoadTunnelFiles()

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# These functions moved to a different file - not used currently.
#import logbooksdump
#def import_auto_logbooks():
#def dumplogbooks():
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

class JobQueue():
    """A list of import operations to run. Always reports profile times
    in the same order.
    """
    def __init__(self,run):
        self.runlabel = run
        self.queue = [] # tuples of (jobname, jobfunction)
        self.results = {}
        self.results_order=[
            "date","runlabel","reinit", "caves", "people",
            "logbooks", "QMs", "survexblks", "survexpos",
            "tunnel", "scans", "surveyimgs", "test", "dirsredirect", "syncuser"  ]
        for k in self.results_order:
            self.results[k]=[]
        self.tfile = "import_profile.json"
        self.htmlfile = "profile.html" # for HTML results table. Not yet done.

    #Adding elements to queue - enqueue
    def enq(self,label,func):
        self.queue.append((label,func))
        return True

    #Removing the last element from the queue - dequeue
    # def deq(self):
    #     if len(self.queue)>0:
    #         return self.queue.pop()
    #     return ("Queue Empty!")

    def loadprofiles(self):
        """Load timings for previous runs from file
        """
        if os.path.isfile(self.tfile):
            try:
                f = open(self.tfile, "r")
                data = json.load(f)
                for j in data:
                    self.results[j] = data[j]
            except:
                print("FAILURE parsing JSON file %s" % (self.tfile))
                # Python bug: https://github.com/ShinNoNoir/twitterwebsearch/issues/12
            f.close()
        for j in self.results_order:
            self.results[j].append(None) # append a placeholder
        return True

    def saveprofiles(self):
        with open(self.tfile, 'w') as f:
            json.dump(self.results, f)
        return True

    def memdumpsql(self):
        djconn = django.db.connection
        from dump import _iterdump
        with open('memdump.sql', 'w') as f:
            for line in _iterdump(djconn):
                f.write('%s\n' % line.encode("utf8"))
        return True

    def runqonce(self):
        """Run all the jobs in the queue provided - once
        """

        print("** Running job ", self.runlabel)
        jobstart = time.time()
        self.results["date"].pop()
        self.results["date"].append(jobstart)
        self.results["runlabel"].pop()
        self.results["runlabel"].append(self.runlabel)

        for i in self.queue:
            start = time.time()
            i[1]()    #  looks ugly but invokes function passed in the second item in the tuple
            duration = time.time()-start
            print("\n*- Ended \"",  i[0], "\"  %.1f seconds" % duration)
            self.results[i[0]].pop()  # the null item
            self.results[i[0]].append(duration)


        jobend = time.time()
        jobduration = jobend-jobstart
        print("** Ended job   %s  -  %.1f seconds total." % (self.runlabel,jobduration))

        return True


    def run(self):
        """First runs all the jobs in the queue against a scratch in-memory db
        then re-runs the import against the db specified in settings.py
        Default behaviour is to skip the in-memory phase.
        When MySQL is the db the in-memory phase crashes as MySQL does not properly
        relinquish some kind of db connection (not fixed yet)
        """
        self.loadprofiles()
        # save db settings for later
        dbengine = settings.DATABASES['default']['ENGINE']
        dbname = settings.DATABASES['default']['NAME']
        dbdefault = settings.DATABASES['default']

        skipmem = False
        if self.runlabel:
            if self.runlabel == "":
                skipmem = True
            elif self.runlabel[0:2] == "F-":
                skipmem = True
        else:
            skipmem = True

        print("--  ", settings.DATABASES['default']['NAME'], settings.DATABASES['default']['ENGINE'])
        #print "--  DATABASES.default", settings.DATABASES['default']

        if dbname ==":memory:":
            # just run, and save the sql file
            self.runqonce()
            self.memdumpsql() # saved contents of scratch db, could be imported later..
            self.saveprofiles()
        elif skipmem:
            self.runqonce()
            self.saveprofiles()
        else:
            django.db.close_old_connections() # needed if MySQL running?
            # run all the imports through :memory: first
            settings.DATABASES['default']['ENGINE'] = 'django.db.backends.sqlite3'
            settings.DATABASES['default']['NAME'] = ":memory:"
            settings.DATABASES['default'] = {'ENGINE': 'django.db.backends.sqlite3',
                'AUTOCOMMIT': True,
                'ATOMIC_REQUESTS': False,
                'NAME': ':memory:',
                'CONN_MAX_AGE': 0,
                'TIME_ZONE': 'UTC',
                'OPTIONS': {},
                'HOST': '',
                'USER': '',
                'TEST': {'COLLATION': None, 'CHARSET': None, 'NAME': None, 'MIRROR': None},
                'PASSWORD': '',
                'PORT': ''}


            print("--  ", settings.DATABASES['default']['NAME'], settings.DATABASES['default']['ENGINE'])
            #print("--  DATABASES.default", settings.DATABASES['default'])

            # but because the user may be expecting to add this to a db with lots of tables already there,
            # the jobqueue may not start from scratch so we need to initialise the db properly first
            # because we are using an empty :memory: database
            # But initiating twice crashes it; so be sure to do it once only.


            # Damn. syncdb() is still calling MySQL somehow **conn_params not sqlite3. So crashes on expo server.
            if ("reinit",reinit_db) not in self.queue:
                reinit_db()
            if ("dirsredirect",dirsredirect) not in self.queue:
                dirsredirect()
            if ("caves",import_caves) not in self.queue:
                import_caves() # sometime extract the initialising code from this and put in reinit...
            if ("people",import_people) not in self.queue:
                import_people() # sometime extract the initialising code from this and put in reinit...

            django.db.close_old_connections() # maybe not needed here

            self.runqonce()
            self.memdumpsql()
            self.showprofile()

            # restore the original db and import again
            # if we wanted to, we could re-import the SQL generated in the first pass to be
            # blazing fast. But for the present just re-import the lot.
            settings.DATABASES['default'] = dbdefault
            settings.DATABASES['default']['ENGINE'] = dbengine
            settings.DATABASES['default']['NAME'] = dbname
            print("--  ", settings.DATABASES['default']['NAME'], settings.DATABASES['default']['ENGINE'])

            django.db.close_old_connections() # maybe not needed here
            for j in self.results_order:
                self.results[j].pop() # throw away results from :memory: run
                self.results[j].append(None) # append a placeholder

            django.db.close_old_connections() # magic rune. works. found by looking in django.db__init__.py
            #django.setup()  # should this be needed?

            self.runqonce() # crashes because it thinks it has no migrations to apply, when it does.
            self.saveprofiles()

        return True

    def showprofile(self):
        """Prints out the time it took to run the jobqueue
        """
        for k in self.results_order:
            if k =="dirsredirect":
                break
            elif k =="syncuser":
                break
            elif k =="test":
                break
            elif k =="date":
                print("     days ago ", end=' ')
            else:
                print('%10s (s)' % k, end=' ')
            percen=0
            r = self.results[k]

            for i in range(len(r)):
                if k == "runlabel":
                    if r[i]:
                        rp =   r[i]
                    else:
                        rp = "      - "
                    print('%8s' % rp, end=' ')
                elif k =="date":
                    # Calculate dates as days before present
                    if r[i]:
                        if i == len(r)-1:
                            print("    this", end=' ')
                        else:
                            # prints one place to the left of where you expect
                            if r[len(r)-1]:
                                s = r[i]-r[len(r)-1]
                            else:
                                s = 0
                            days = (s)/(24*60*60)
                            print('%8.2f' % days, end=' ')
                elif r[i]:
                    print('%8.1f' % r[i], end=' ')
                    if i == len(r)-1 and r[i-1]:
                        percen = 100* (r[i] - r[i-1])/r[i-1]
                        if abs(percen) >0.1:
                            print('%8.1f%%' % percen, end=' ')
                else:
                    print("      - ", end=' ')
            print("")
        print("\n")
        return True


def usage():
    print("""Usage is 'python databaseReset.py <command> [runlabel]'
             where command is:
             test      - testing... imports people and prints profile. Deletes nothing.
             profile   - print the profile from previous runs. Import nothing.

             reset     - normal usage: clear database and reread everything from files - time-consuming
             caves     - read in the caves
             logbooks  - read in the logbooks
             people    - read in the people from folk.csv
             QMs       - read in the QM csv files (older caves only)
             scans     - the survey scans in all the wallets
             survex    - read in the survex files - all the survex blocks but not the x/y/z positions
             survexpos - just the x/y/z Pos out of the survex files
             survexall - both survex and survexpos

             tunnel    - read in the Tunnel files - which scans the survey scans too
             drawings  - Tunnel, QMs, scans

             reinit       - clear database (delete everything) and make empty tables. Import nothing.
             syncuser     - needed after reloading database from SQL backup
             autologbooks - Not used. read in autologbooks (what are these?)
             dumplogbooks - Not used. write out autologbooks (not working?)
             surveyimgs   - Not used. read in scans by-expo, must run after "people".

             and [runlabel] is an optional string identifying this run of the script
             in the stored profiling data 'import-profile.json'
             if [runlabel] is absent or begins with "F-" then it will skip the :memory: pass

             caves and logbooks must be run on an empty db before the others as they
             set up db tables used by the others.
             """)

if __name__ == "__main__":
    import troggle.core.models
    import sys
    import django
    django.setup()

    if len(sys.argv)>2:
        runlabel = sys.argv[len(sys.argv)-1]
    else:
        runlabel=None

    jq = JobQueue(runlabel)

    if len(sys.argv)==1:
        usage()
        exit()
    elif "test" in sys.argv:
        jq.enq("caves",import_caves)
        jq.enq("people",import_people)
    elif "caves" in sys.argv:
        jq.enq("caves",import_caves)
    elif "logbooks" in sys.argv:
        jq.enq("logbooks",import_logbooks)
    elif "people" in sys.argv:
        jq.enq("people",import_people)
    elif "QMs" in sys.argv:
        jq.enq("QMs",import_QMs)
    elif "reinit" in sys.argv:
        jq.enq("reinit",reinit_db)
    elif "reset" in sys.argv:
        jq.enq("reinit",reinit_db)
        jq.enq("dirsredirect",dirsredirect)
        jq.enq("caves",import_caves)
        jq.enq("people",import_people)
        jq.enq("scans",import_surveyscans)
        jq.enq("logbooks",import_logbooks)
        jq.enq("QMs",import_QMs)
        jq.enq("survexblks",import_survexblks)
        jq.enq("survexpos",import_survexpos)
        jq.enq("tunnel",import_tunnelfiles)
    elif "scans" in sys.argv:
        jq.enq("scans",import_surveyscans)
    elif "survex" in sys.argv:
        jq.enq("survexblks",import_survexblks)
    elif "survexpos" in sys.argv:
        jq.enq("survexpos",import_survexpos)
    elif "tunnel" in sys.argv:
        jq.enq("tunnel",import_tunnelfiles)
    elif "survexall" in sys.argv:
        jq.enq("survexblks",import_survexblks)
        jq.enq("survexpos",import_survexpos)
    elif "drawings" in sys.argv:
        jq.enq("QMs",import_QMs)
        jq.enq("scans",import_surveyscans)
        jq.enq("tunnel",import_tunnelfiles)
    elif "surveyimgs" in sys.argv:
        jq.enq("surveyimgs",import_surveyimgs) # imports into tables which are never read
    elif "autologbooks" in sys.argv: # untested in 2020
        import_auto_logbooks()
    elif "dumplogbooks" in sys.argv: # untested in 2020
        dumplogbooks()
    elif "profile" in sys.argv:
        jq.loadprofiles()
        jq.showprofile()
        exit()
    elif "help" in sys.argv:
        usage()
        exit()
    else:
        usage()
        print(("%s not recognised as a command." % sys.argv[1]))
        exit()

    jq.run()
    jq.showprofile()