troggle-unchained/databaseReset.py

import sys
import os
import time
import timeit
import json
import resource

import settings
os.environ['PYTHONPATH'] = settings.PYTHON_PATH
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'settings')

print(" - settings on loading databaseReset.py")

import django
print(" - Memory footprint before loading Django: {:.3f} MB".format(resource.getrusage(resource.RUSAGE_SELF)[2]/1024.0))
try:
    django.setup()
except:
    print(" ! Cyclic reference failure. Can occur when the initial db is empty. Fixed now (in UploadFileForm) but easy to reintroduce..")
    raise
print(" - Memory footprint after loading Django: {:.3f} MB".format(resource.getrusage(resource.RUSAGE_SELF)[2]/1024.0))

import troggle.flatpages.models
import troggle.core.models
import troggle.core.models_survex

from django.core import management
from django.db import connection, close_old_connections, connections
from django.http import HttpResponse
from django.urls import reverse
from django.contrib.auth.models import User

from troggle.core.models import get_process_memory
from troggle.core.models_caves import Cave, Entrance
from troggle.parsers.imports import import_caves, import_people, import_surveyscans, \
        import_logbooks, import_QMs, import_survex, import_loadpos, import_drawingsfiles, \
        import_subcaves
import troggle.logbooksdump

if os.geteuid() == 0:
    # This protects the server from having the wrong file permissions written on logs and caches
    print("This script should be run as expo not root - quitting")
    exit()

expouser=settings.EXPOUSER
expouserpass=settings.EXPOUSERPASS
expouseremail=settings.EXPOUSER_EMAIL

def reinit_db():
    """Rebuild database from scratch. Deletes the file first if sqlite is used,
    otherwise it drops the database and creates it.
    Note - initial loading of troggle.sqlite will already have populated the models
    in memory (django python models, not the database), so there is already a full load
    of stuff known. Deleting the db file does not clear memory.
    """
    print("Reinitialising db ",end="")
    print(django.db.connections.databases['default']['NAME'])
    currentdbname = settings.DATABASES['default']['NAME']
    if currentdbname == ':memory:':
        # closing connections should wipe the in-memory database
        django.db.close_old_connections()
        for conn in django.db.connections.all():
            print(" ! Closing another connection to db...")
            conn.close()
    elif django.db.connections.databases['default']['ENGINE'] == 'django.db.backends.sqlite3':
        if os.path.isfile(currentdbname):
            try:
                print(" - deleting " + currentdbname)
                os.remove(currentdbname)
            except OSError:
                print(" ! OSError on removing: " + currentdbname + "\n ! Is the file open in another app? Is the server running?\n")
                raise
        else:
            print(" - No database file found: " + currentdbname + " ..continuing, will create it.\n")
    else:
        cursor = django.db.connection.cursor()
        cursor.execute("DROP DATABASE %s" % currentdbname)
        cursor.execute("CREATE DATABASE %s" % currentdbname)
        cursor.execute("ALTER DATABASE %s CHARACTER SET=utf8" % currentdbname)
        cursor.execute("USE %s" % currentdbname)

    print(" - Migrating: " + django.db.connections.databases['default']['NAME'])

    management.call_command('makemigrations','core', interactive=False)
    management.call_command('makemigrations','flatpages', interactive=False)
    management.call_command('migrate', interactive=False)
    management.call_command('migrate','core', interactive=False)
    management.call_command('migrate','flatpages', interactive=False)

    print(" - done migration on: " + settings.DATABASES['default']['NAME'])
    print("users in db already: ",len(User.objects.all()))
    try:
        print(" - Setting up admin user on: " + django.db.connections.databases['default']['NAME'])
        print(" - user: {} ({:.5}...) <{}> ".format(expouser, expouserpass, expouseremail))
        user = User.objects.create_user(expouser, expouseremail, expouserpass)
        user.is_staff = True
        user.is_superuser = True
        user.save()
    except:
        print(" ! INTEGRITY ERROR user on: " + settings.DATABASES['default']['NAME'])
        print(django.db.connections.databases['default']['NAME'])
        print(" ! You probably have not got a clean db when you thought you had.\n")
        print(" ! Also you are probably NOT running an in-memory db now.\n")
        print("users  in db: ",len(User.objects.all()))
        print("tables in db: ",len(connection.introspection.table_names()))
        memdumpsql(fn='integrityfail.sql')
        django.db.connections.databases['default']['NAME'] = ':memory:'
        #raise

def memdumpsql(fn):
    djconn = django.db.connection
    from dump import _iterdump
    with open(fn, 'w') as f:
        for line in _iterdump(djconn):
            f.write('%s\n' % line.encode("utf8"))
    return True

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# These functions moved to a different file - not used currently.
# import logbooksdump
# def import_auto_logbooks():
# def dumplogbooks():

# def writeCaves():
    # Writes out all cave and entrance HTML files to
    # folder specified in settings.CAVEDESCRIPTIONS
#    for cave in Cave.objects.all():
#        cave.writeDataFile()
#    for entrance in Entrance.objects.all():
#        entrance.writeDataFile()
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

class JobQueue():
    """A list of import operations to run. Always reports profile times
    in the same order.
    """

    def __init__(self,run):
        self.runlabel = run
        self.queue = [] # tuples of (jobname, jobfunction)
        self.results = {}
        self.results_order=[
            "date","runlabel","reinit", "caves", "people",
            "logbooks", "QMs", "scans", "survex",
            "drawings", "test" ]
        for k in self.results_order:
            self.results[k]=[]
        self.tfile = "import_profile.json"
        self.htmlfile = "profile.html" # for HTML results table. Not yet done.

    #Adding elements to queue - enqueue
    def enq(self,label,func):
        self.queue.append((label,func))
        return True

    def loadprofiles(self):
        """Load timings for previous runs from file
        """
        if os.path.isfile(self.tfile):
            try:
                f = open(self.tfile, "r")
                data = json.load(f)
                for j in data:
                    self.results[j] = data[j]
            except:
                print("FAILURE parsing JSON file %s" % (self.tfile))
                # Python bug: https://github.com/ShinNoNoir/twitterwebsearch/issues/12
            f.close()
        for j in self.results_order:
            self.results[j].append(None) # append a placeholder
        return True

    def saveprofiles(self):
        with open(self.tfile, 'w') as f:
            json.dump(self.results, f)
        return True

    def runqonce(self):
        """Run all the jobs in the queue provided - once
        """
        print("** Running job ", self.runlabel,end=" to ")
        print(django.db.connections.databases['default']['NAME'])
        jobstart = time.time()
        print("-- Initial memory in use {:.3f} MB".format(get_process_memory()))
        self.results["date"].pop()
        self.results["date"].append(jobstart)
        self.results["runlabel"].pop()
        self.results["runlabel"].append(self.runlabel)

        for runfunction in self.queue:
            start = time.time()
            memstart = get_process_memory()
            #--------------------
            runfunction[1]()    #  invokes function passed in the second item in the tuple
            #--------------------
            memend = get_process_memory()
            duration = time.time()-start
            #print(" - MEMORY start:{:.3f} MB end:{:.3f} MB change={:.3f} MB".format(memstart,memend, ))
            print("\n*- Ended \"",  runfunction[0], "\"  {:.1f} seconds + {:.3f} MB ({:.3f} MB)".format(duration, memend-memstart, memend))
            self.results[runfunction[0]].pop()  # the null item
            self.results[runfunction[0]].append(duration)


        jobend = time.time()
        jobduration = jobend-jobstart
        print("** Ended job   %s  -  %.1f seconds total." % (self.runlabel,jobduration))
        return True


    def append_placeholders(self):
        for j in self.results_order:
            self.results[j].append(None) # append a placeholder

    def run_now_django_tests(self,n):
        management.call_command('test', verbosity=n)
        django.db.close_old_connections()

    def run(self):
        self.loadprofiles()
        print("-- start   ",  django.db.connections.databases['default']['ENGINE'], django.db.connections.databases['default']['NAME'])
        self.runqonce()
        if settings.DATABASES['default']['NAME'] ==":memory:":
            memdumpsql('memdump.sql') # saved contents of in-memory db, could be imported later..
        self.saveprofiles()
        return True

    def showprofile(self):
        """Prints out the time it took to run the jobqueue
        """
        for k in self.results_order:
            if k =="test":
                break
            elif k =="date":
                print("     days ago ", end=' ')
            else:
                print('%10s (s)' % k, end=' ')
            percen=0
            r = self.results[k]

            for i in range(len(r)):
                if k == "runlabel":
                    if r[i]:
                        rp =   r[i]
                    else:
                        rp = "      - "
                    print('%8s' % rp, end=' ')
                elif k =="date":
                    # Calculate dates as days before present
                    if r[i]:
                        if i == len(r)-1:
                            print("    this", end=' ')
                        else:
                            # prints one place to the left of where you expect
                            if r[len(r)-1]:
                                s = r[i]-r[len(r)-1]
                            elif r[len(r)-2]:
                                s = r[i]-r[len(r)-2]
                            else:
                                s = 0
                            days = (s)/(24*60*60)
                            print('%8.2f' % days, end=' ')
                elif r[i]:
                    print('%8.1f' % r[i], end=' ')
                    if i == len(r)-1 and r[i-1]:
                        percen = 100* (r[i] - r[i-1])/r[i-1]
                        if abs(percen) >0.1:
                            print('%8.1f%%' % percen, end=' ')
                else:
                    print("      - ", end=' ')
            print("")
        print("\n")
        return True


def usage():
    print("""Usage is 'python databaseReset.py <command> [runlabel]'
             where command is:
             test      - testing... imports people and prints profile. Deletes nothing.
             profile   - print the profile from previous runs. Import nothing.

             reset     - normal usage: clear database and reread everything from files - time-consuming

             init      - initialisation. Automatic if you run reset.
             caves     - read in the caves (must run first after initialisation)
             people    - read in the people from folk.csv (must run after 'caves')
             logbooks  - read in the logbooks
             QMs       - read in the QM csv files (older caves only)
             scans     - the survey scans in all the wallets (must run before survex)
             survex    - read in the survex files - all the survex blocks and entrances x/y/z
             drawings  - read in the Tunnel & Therion files - which scans the survey scans too

             autologbooks - Not used. read in autologbooks (what are these?)
             dumplogbooks - Not used. write out autologbooks (not working?)

             and [runlabel] is an optional string identifying this run of the script
             in the stored profiling data 'import-profile.json'

             caves and logbooks must be run on an empty db before the others as they
             set up db tables used by the others.
             """)

if __name__ == "__main__":

    if os.geteuid() == 0:
        print("Do not run as root or using sudo - file permissions for cache files and logs will break")
        print("Aborting run.")
        exit()
    if len(sys.argv)>2:
        runlabel = sys.argv[len(sys.argv)-1]
    else:
        runlabel=None

    jq = JobQueue(runlabel)

    if len(sys.argv)==1:
        usage()
        exit()
    elif "init" in sys.argv:
        jq.enq("reinit",reinit_db)
    elif "test" in sys.argv:
        jq.enq("caves",import_caves)
        import_subcaves()
        #jq.enq("people",import_people)
        #jq.run_now_django_tests(2)
    elif "caves" in sys.argv:
        jq.enq("caves",import_caves)
    elif "logbooks" in sys.argv:
        jq.enq("logbooks",import_logbooks)
    elif "people" in sys.argv:
        jq.enq("people",import_people)
    elif "QMs" in sys.argv:
        jq.enq("QMs",import_QMs)
    elif "reset" in sys.argv:
        jq.enq("reinit",reinit_db)
        jq.enq("caves",import_caves)
        jq.enq("people",import_people)
        jq.enq("scans",import_surveyscans)
        jq.enq("logbooks",import_logbooks)
        jq.enq("QMs",import_QMs)
        jq.enq("drawings",import_drawingsfiles)
        jq.enq("survex",import_survex)
    elif "scans" in sys.argv:
        jq.enq("scans",import_surveyscans)
    elif "survex" in sys.argv:
        jq.enq("survex",import_survex)
    elif "loadpos" in sys.argv:
        jq.enq("survex",import_loadpos)
    elif "drawings" in sys.argv:
        jq.enq("drawings",import_drawingsfiles)
    elif "autologbooks" in sys.argv: # untested in 2020
        import_auto_logbooks()
    elif "dumplogbooks" in sys.argv: # untested in 2020
        dumplogbooks()
#   elif "writecaves" in sys.argv: # untested in 2020 - will overwrite input files!!
#       writeCaves()
    elif "profile" in sys.argv:
        jq.loadprofiles()
        jq.showprofile()
        exit()
    elif "help" in sys.argv:
        usage()
        exit()
    else:
        usage()
        print("%s not recognised as a command." % sys.argv[1])
        exit()

    jq.run()
    jq.showprofile()