from __future__ import (absolute_import, division, print_function) import os import time import timeit import json import settings os.environ['PYTHONPATH'] = settings.PYTHON_PATH os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'settings') from django.core import management from django.db import connection, close_old_connections from django.contrib.auth.models import User from django.http import HttpResponse from django.core.urlresolvers import reverse from troggle.core.models_caves import Cave, Entrance import troggle.settings import troggle.flatpages.models import troggle.logbooksdump # NOTE databaseReset.py is *imported* by views_other.py as it is used in the control panel # presented there. if os.geteuid() == 0: # This protects the server from having the wrong file permissions written on logs and caches print("This script should be run as expo not root - quitting") exit() expouser=settings.EXPOUSER expouserpass=settings.EXPOUSERPASS expouseremail=settings.EXPOUSER_EMAIL def reinit_db(): """Rebuild database from scratch. Deletes the file first if sqlite is used, otherwise it drops the database and creates it. """ currentdbname = settings.DATABASES['default']['NAME'] if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.sqlite3': try: os.remove(currentdbname) except OSError: pass else: cursor = connection.cursor() cursor.execute("DROP DATABASE %s" % currentdbname) cursor.execute("CREATE DATABASE %s" % currentdbname) cursor.execute("ALTER DATABASE %s CHARACTER SET=utf8" % currentdbname) cursor.execute("USE %s" % currentdbname) syncuser() def syncuser(): """Sync user - needed after reload """ print("Synchronizing user") management.call_command('migrate', interactive=False) user = User.objects.create_user(expouser, expouseremail, expouserpass) user.is_staff = True user.is_superuser = True user.save() def dirsredirect(): """Make directories that troggle requires and sets up page redirects """ #should also deal with permissions here. #if not os.path.isdir(settings.PHOTOS_ROOT): #os.mkdir(settings.PHOTOS_ROOT) for oldURL, newURL in [("indxal.htm", reverse("caveindex"))]: f = troggle.flatpages.models.Redirect(originalURL = oldURL, newURL = newURL) f.save() def import_caves(): import troggle.parsers.caves print("Importing Caves") troggle.parsers.caves.readcaves() def import_people(): import troggle.parsers.people print("Importing People (folk.csv)") troggle.parsers.people.LoadPersonsExpos() def import_logbooks(): import troggle.parsers.logbooks print("Importing Logbooks") troggle.parsers.logbooks.LoadLogbooks() def import_QMs(): print("Importing QMs (old caves)") import troggle.parsers.QMs # import process itself runs on qm.csv in only 3 old caves, not the modern ones! def import_survexblks(): import troggle.parsers.survex print("Importing Survex Blocks") troggle.parsers.survex.LoadAllSurvexBlocks() def import_survexpos(): import troggle.parsers.survex print("Importing Survex x/y/z Positions") troggle.parsers.survex.LoadPos() def import_surveyimgs(): """This appears to store data in unused objects. The code is kept for future re-working to manage progress against notes, plans and elevs. """ #import troggle.parsers.surveys print("NOT Importing survey images") #troggle.parsers.surveys.parseSurveys(logfile=settings.LOGFILE) def import_surveyscans(): import troggle.parsers.surveys print("Importing Survey Scans") troggle.parsers.surveys.LoadListScans() def import_tunnelfiles(): import troggle.parsers.surveys print("Importing Tunnel files") troggle.parsers.surveys.LoadTunnelFiles() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # These functions moved to a different file - not used currently. #import logbooksdump #def import_auto_logbooks(): #def dumplogbooks(): #def writeCaves(): # Writes out all cave and entrance HTML files to # folder specified in settings.CAVEDESCRIPTIONS # for cave in Cave.objects.all(): # cave.writeDataFile() # for entrance in Entrance.objects.all(): # entrance.writeDataFile() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - class JobQueue(): """A list of import operations to run. Always reports profile times in the same order. """ def __init__(self,run): self.runlabel = run self.queue = [] # tuples of (jobname, jobfunction) self.results = {} self.results_order=[ "date","runlabel","reinit", "caves", "people", "logbooks", "QMs", "scans", "survexblks", "survexpos", "tunnel", "surveyimgs", "test", "dirsredirect", "syncuser" ] for k in self.results_order: self.results[k]=[] self.tfile = "import_profile.json" self.htmlfile = "profile.html" # for HTML results table. Not yet done. #Adding elements to queue - enqueue def enq(self,label,func): self.queue.append((label,func)) return True #Removing the last element from the queue - dequeue # def deq(self): # if len(self.queue)>0: # return self.queue.pop() # return ("Queue Empty!") def loadprofiles(self): """Load timings for previous runs from file """ if os.path.isfile(self.tfile): try: f = open(self.tfile, "r") data = json.load(f) for j in data: self.results[j] = data[j] except: print("FAILURE parsing JSON file %s" % (self.tfile)) # Python bug: https://github.com/ShinNoNoir/twitterwebsearch/issues/12 f.close() for j in self.results_order: self.results[j].append(None) # append a placeholder return True def saveprofiles(self): with open(self.tfile, 'w') as f: json.dump(self.results, f) return True def memdumpsql(self): djconn = django.db.connection from dump import _iterdump with open('memdump.sql', 'w') as f: for line in _iterdump(djconn): f.write('%s\n' % line.encode("utf8")) return True def runqonce(self): """Run all the jobs in the queue provided - once """ print("** Running job ", self.runlabel) jobstart = time.time() self.results["date"].pop() self.results["date"].append(jobstart) self.results["runlabel"].pop() self.results["runlabel"].append(self.runlabel) for i in self.queue: start = time.time() i[1]() # looks ugly but invokes function passed in the second item in the tuple duration = time.time()-start print("\n*- Ended \"", i[0], "\" %.1f seconds" % duration) self.results[i[0]].pop() # the null item self.results[i[0]].append(duration) jobend = time.time() jobduration = jobend-jobstart print("** Ended job %s - %.1f seconds total." % (self.runlabel,jobduration)) return True def run(self): """First runs all the jobs in the queue against a scratch in-memory db then re-runs the import against the db specified in settings.py Default behaviour is to skip the in-memory phase. When MySQL is the db the in-memory phase crashes as MySQL does not properly relinquish some kind of db connection (not fixed yet) """ self.loadprofiles() # save db settings for later dbengine = settings.DATABASES['default']['ENGINE'] dbname = settings.DATABASES['default']['NAME'] dbdefault = settings.DATABASES['default'] skipmem = False if self.runlabel: if self.runlabel == "": skipmem = True elif self.runlabel[0:2] == "F-": skipmem = True else: skipmem = True print("-- ", settings.DATABASES['default']['NAME'], settings.DATABASES['default']['ENGINE']) #print "-- DATABASES.default", settings.DATABASES['default'] if dbname ==":memory:": # just run, and save the sql file self.runqonce() self.memdumpsql() # saved contents of scratch db, could be imported later.. self.saveprofiles() elif skipmem: self.runqonce() self.saveprofiles() else: django.db.close_old_connections() # needed if MySQL running? # run all the imports through :memory: first settings.DATABASES['default']['ENGINE'] = 'django.db.backends.sqlite3' settings.DATABASES['default']['NAME'] = ":memory:" settings.DATABASES['default'] = {'ENGINE': 'django.db.backends.sqlite3', 'AUTOCOMMIT': True, 'ATOMIC_REQUESTS': False, 'NAME': ':memory:', 'CONN_MAX_AGE': 0, 'TIME_ZONE': 'UTC', 'OPTIONS': {}, 'HOST': '', 'USER': '', 'TEST': {'COLLATION': None, 'CHARSET': None, 'NAME': None, 'MIRROR': None}, 'PASSWORD': '', 'PORT': ''} print("-- ", settings.DATABASES['default']['NAME'], settings.DATABASES['default']['ENGINE']) #print("-- DATABASES.default", settings.DATABASES['default']) # but because the user may be expecting to add this to a db with lots of tables already there, # the jobqueue may not start from scratch so we need to initialise the db properly first # because we are using an empty :memory: database # But initiating twice crashes it; so be sure to do it once only. # Damn. syncdb() is still calling MySQL somehow **conn_params not sqlite3. So crashes on expo server. if ("reinit",reinit_db) not in self.queue: reinit_db() if ("dirsredirect",dirsredirect) not in self.queue: dirsredirect() if ("caves",import_caves) not in self.queue: import_caves() # sometime extract the initialising code from this and put in reinit... if ("people",import_people) not in self.queue: import_people() # sometime extract the initialising code from this and put in reinit... django.db.close_old_connections() # maybe not needed here self.runqonce() self.memdumpsql() self.showprofile() # restore the original db and import again # if we wanted to, we could re-import the SQL generated in the first pass to be # blazing fast. But for the present just re-import the lot. settings.DATABASES['default'] = dbdefault settings.DATABASES['default']['ENGINE'] = dbengine settings.DATABASES['default']['NAME'] = dbname print("-- ", settings.DATABASES['default']['NAME'], settings.DATABASES['default']['ENGINE']) django.db.close_old_connections() # maybe not needed here for j in self.results_order: self.results[j].pop() # throw away results from :memory: run self.results[j].append(None) # append a placeholder django.db.close_old_connections() # magic rune. works. found by looking in django.db__init__.py #django.setup() # should this be needed? self.runqonce() # crashes because it thinks it has no migrations to apply, when it does. self.saveprofiles() return True def showprofile(self): """Prints out the time it took to run the jobqueue """ for k in self.results_order: if k =="dirsredirect": break if k =="surveyimgs": break elif k =="syncuser": break elif k =="test": break elif k =="date": print(" days ago ", end=' ') else: print('%10s (s)' % k, end=' ') percen=0 r = self.results[k] for i in range(len(r)): if k == "runlabel": if r[i]: rp = r[i] else: rp = " - " print('%8s' % rp, end=' ') elif k =="date": # Calculate dates as days before present if r[i]: if i == len(r)-1: print(" this", end=' ') else: # prints one place to the left of where you expect if r[len(r)-1]: s = r[i]-r[len(r)-1] else: s = 0 days = (s)/(24*60*60) print('%8.2f' % days, end=' ') elif r[i]: print('%8.1f' % r[i], end=' ') if i == len(r)-1 and r[i-1]: percen = 100* (r[i] - r[i-1])/r[i-1] if abs(percen) >0.1: print('%8.1f%%' % percen, end=' ') else: print(" - ", end=' ') print("") print("\n") return True def usage(): print("""Usage is 'python databaseReset.py [runlabel]' where command is: test - testing... imports people and prints profile. Deletes nothing. profile - print the profile from previous runs. Import nothing. reset - normal usage: clear database and reread everything from files - time-consuming caves - read in the caves (must run first after reset) people - read in the people from folk.csv (must run before logbooks) logbooks - read in the logbooks QMs - read in the QM csv files (older caves only) scans - the survey scans in all the wallets (must run before survex) survex - read in the survex files - all the survex blocks but not the x/y/z positions survexpos - just the x/y/z Pos out of the survex files tunnel - read in the Tunnel files - which scans the survey scans too reinit - clear database (delete everything) and make empty tables. Import nothing. syncuser - needed after reloading database from SQL backup autologbooks - Not used. read in autologbooks (what are these?) dumplogbooks - Not used. write out autologbooks (not working?) surveyimgs - Not used. read in scans by-expo, must run after "people". and [runlabel] is an optional string identifying this run of the script in the stored profiling data 'import-profile.json' if [runlabel] is absent or begins with "F-" then it will skip the :memory: pass caves and logbooks must be run on an empty db before the others as they set up db tables used by the others. the in-memory phase is on an empty db, so always runs reinit, caves & people for this phase """) if __name__ == "__main__": import troggle.core.models import sys import django django.setup() if os.geteuid() == 0: print("Do not run as root or using sudo - file permissions for cache files and logs will break") print("Aborting run.") exit() if len(sys.argv)>2: runlabel = sys.argv[len(sys.argv)-1] else: runlabel=None jq = JobQueue(runlabel) if len(sys.argv)==1: usage() exit() elif "test" in sys.argv: jq.enq("caves",import_caves) jq.enq("people",import_people) elif "caves" in sys.argv: jq.enq("caves",import_caves) elif "logbooks" in sys.argv: jq.enq("logbooks",import_logbooks) elif "people" in sys.argv: jq.enq("people",import_people) elif "QMs" in sys.argv: jq.enq("QMs",import_QMs) elif "reset" in sys.argv: jq.enq("reinit",reinit_db) jq.enq("dirsredirect",dirsredirect) jq.enq("caves",import_caves) jq.enq("people",import_people) jq.enq("scans",import_surveyscans) jq.enq("logbooks",import_logbooks) jq.enq("QMs",import_QMs) jq.enq("survexblks",import_survexblks) jq.enq("survexpos",import_survexpos) jq.enq("tunnel",import_tunnelfiles) elif "scans" in sys.argv: jq.enq("scans",import_surveyscans) elif "survex" in sys.argv: jq.enq("survexblks",import_survexblks) elif "survexpos" in sys.argv: jq.enq("survexpos",import_survexpos) elif "tunnel" in sys.argv: jq.enq("tunnel",import_tunnelfiles) elif "surveyimgs" in sys.argv: jq.enq("surveyimgs",import_surveyimgs) # imports into tables which are never read elif "autologbooks" in sys.argv: # untested in 2020 import_auto_logbooks() elif "dumplogbooks" in sys.argv: # untested in 2020 dumplogbooks() # elif "writecaves" in sys.argv: # untested in 2020 - will overwrite input files!! # writeCaves() elif "profile" in sys.argv: jq.loadprofiles() jq.showprofile() exit() elif "help" in sys.argv: usage() exit() else: usage() print("%s not recognised as a command." % sys.argv[1]) exit() jq.run() jq.showprofile()