import os import time import timeit import settings os.environ['PYTHONPATH'] = settings.PYTHON_PATH os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'settings') from django.core import management from django.db import connection from django.contrib.auth.models import User from django.http import HttpResponse from django.core.urlresolvers import reverse from troggle.core.models import Cave, Entrance import troggle.flatpages.models import json databasename=settings.DATABASES['default']['NAME'] expouser=settings.EXPOUSER expouserpass=settings.EXPOUSERPASS expouseremail=settings.EXPOUSER_EMAIL def reinit_db(): """Rebuild database from scratch. Deletes the file first if sqlite is used, otherwise it drops the database and creates it. """ if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.sqlite3': try: os.remove(databasename) except OSError: pass else: cursor = connection.cursor() cursor.execute("DROP DATABASE %s" % databasename) cursor.execute("CREATE DATABASE %s" % databasename) cursor.execute("ALTER DATABASE %s CHARACTER SET=utf8" % databasename) cursor.execute("USE %s" % databasename) syncuser() def syncuser(): """Sync user - needed after reload """ print("Synchronizing user") management.call_command('syncdb', interactive=False) user = User.objects.create_user(expouser, expouseremail, expouserpass) user.is_staff = True user.is_superuser = True user.save() def dirsredirect(): """Make directories that troggle requires and sets up page redirects """ #should also deal with permissions here. if not os.path.isdir(settings.PHOTOS_ROOT): os.mkdir(settings.PHOTOS_ROOT) for oldURL, newURL in [("indxal.htm", reverse("caveindex"))]: f = troggle.flatpages.models.Redirect(originalURL = oldURL, newURL = newURL) f.save() def import_caves(): import parsers.caves print("Importing Caves") parsers.caves.readcaves() def import_people(): import parsers.people print("Importing People (folk.csv)") parsers.people.LoadPersonsExpos() def import_logbooks(): import parsers.logbooks print("Importing Logbooks") parsers.logbooks.LoadLogbooks() def import_QMs(): print("Importing QMs (old caves)") import parsers.QMs # import process itself runs on qm.csv in only 3 caves, not 264! def import_survexblks(): import parsers.survex print("Importing Survex Blocks") parsers.survex.LoadAllSurvexBlocks() def import_survexpos(): import parsers.survex print("Importing Survex x/y/z Positions") parsers.survex.LoadPos() def import_surveyimgs(): """This appears to store data in unused objects. The code is kept for future re-working to manage progress against notes, plans and elevs. """ import parsers.surveys print("Importing survey images") parsers.surveys.parseSurveys(logfile=settings.LOGFILE) def import_surveyscans(): import parsers.surveys print("Importing Survey Scans") parsers.surveys.LoadListScans() def import_tunnelfiles(): import parsers.surveys print("Importing Tunnel files") parsers.surveys.LoadTunnelFiles() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def import_auto_logbooks(): import parsers.logbooks import os for pt in troggle.core.models.PersonTrip.objects.all(): pt.delete() for lbe in troggle.core.models.LogbookEntry.objects.all(): lbe.delete() for expedition in troggle.core.models.Expedition.objects.all(): directory = os.path.join(settings.EXPOWEB, "years", expedition.year, "autologbook") for root, dirs, filenames in os.walk(directory): for filename in filenames: print(os.path.join(root, filename)) parsers.logbooks.parseAutoLogBookEntry(os.path.join(root, filename)) #Temporary function until definitive source of data transfered. from django.template.defaultfilters import slugify from django.template import Context, loader def dumplogbooks(): def get_name(pe): if pe.nickname: return pe.nickname else: return pe.person.first_name for lbe in troggle.core.models.LogbookEntry.objects.all(): dateStr = lbe.date.strftime("%Y-%m-%d") directory = os.path.join(settings.EXPOWEB, "years", lbe.expedition.year, "autologbook") if not os.path.isdir(directory): os.mkdir(directory) filename = os.path.join(directory, dateStr + "." + slugify(lbe.title)[:50] + ".html") if lbe.cave: print(lbe.cave.reference()) trip = {"title": lbe.title, "html":lbe.text, "cave": lbe.cave.reference(), "caveOrLocation": "cave"} else: trip = {"title": lbe.title, "html":lbe.text, "location":lbe.place, "caveOrLocation": "location"} pts = [pt for pt in lbe.persontrip_set.all() if pt.personexpedition] persons = [{"name": get_name(pt.personexpedition), "TU": pt.time_underground, "author": pt.is_logbook_entry_author} for pt in pts] f = open(filename, "wb") template = loader.get_template('dataformat/logbookentry.html') context = Context({'trip': trip, 'persons': persons, 'date': dateStr, 'expeditionyear': lbe.expedition.year}) output = template.render(context) f.write(unicode(output).encode( "utf-8" )) f.close() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - class JobQueue(): """A list of import operations to run. Always reports times in the same order. """ def __init__(self,run): self.runlabel = run self.queue = [] # tuples of (jobname, jobfunction) self.results = {} self.results_order=[ "date","runlabel","reinit", "caves", "people", "logbooks", "scans", "QMs", "survexblks", "tunnel", "surveyimgs", "test", "dirsredirect", "syncuser", "survexpos" ] for k in self.results_order: self.results[k]=[] self.tfile = "import_profile.json" self.htmlfile = "profile.html" #Adding elements to queue - enqueue def enq(self,label,func): self.queue.append((label,func)) return True #Removing the last element from the queue - dequeue # def deq(self): # if len(self.queue)>0: # return self.queue.pop() # return ("Queue Empty!") def run(self): if os.path.isfile(self.tfile): try: f = open(self.tfile, "r") data = json.load(f) for j in data: self.results[j] = data[j] except: print "FAILURE parsing JSON file %s" % (self.tfile) # Python bug: https://github.com/ShinNoNoir/twitterwebsearch/issues/12 f.close() print "** Running job ", self.runlabel jobstart = time.time() self.results["date"].append(jobstart) self.results["runlabel"].append(self.runlabel) for i in self.queue: start = time.time() i[1]() # looks ugly but invokes function passed in the second item in the tuple duration = time.time()-start print "\n*- Ended \"", i[0], "\" %.1f seconds" % duration self.results[i[0]].append(duration) with open(self.tfile, 'w') as f: json.dump(self.results, f) jobend = time.time() jobduration = jobend-jobstart print "** Ended all jobs. %.1f seconds" % jobduration # currently uses django db whatever it was. CHANGE this to explicitly use # a new sqlite3 db and then import the sql dump of that into the troggle db # instead of loading directly into the troggle sqlite db. # in-memory ":memory:" sqlite is ~ 7x faster and all of troggle can be # loaded in 6 minutes that way djconn = django.db.connection from dump import _iterdump with open('memdump.sql', 'w') as f: for line in _iterdump(djconn): f.write('%s\n' % line.encode("utf8")) # now import the memory image sql into (to do) return True def showprofile(self): """Prints out the time it took to run the jobqueue""" for k in self.results_order: if k =="dirsredirect": break elif k =="syncuser": break elif k =="test": break elif k =="date": print " days ago ", else: print '%9s (s)' % k, percen=0 r = self.results[k] #print "min=",min for i in range(len(r)): if k == "runlabel": if r[i]: rp = r[i] else: rp = " - " print '%8s' % rp, elif k =="date": # Calculate dates as days before present if r[i]: if i == len(r)-1: print " this", else: # prints one place to the left of where you expect days = (r[i]-r[len(r)-1])/(24*60*60) print '%8.2f' % days, elif r[i]: print '%8.1f' % r[i], if i == len(r)-1 and r[i-1]: percen = 100* (r[i] - r[i-1])/r[i-1] if abs(percen) >0.1: print '%8.1f%%' % percen, else: print " - ", print "" return True def usage(): print("""Usage is 'python databaseReset.py [runlabel]' where command is: reset - normal usage: clear database and reread everything from files - time-consuming caves - read in the caves logbooks - read in the logbooks people - read in the people from folk.csv QMs - read in the QM csv files (older caves only) reinit - clear database (delete everything) and make empty tables. Import nothing. scans - the survey scans in all the wallets survex - read in the survex files - all the survex blocks and the x/y/z positions survexpos - just the x/y/z Pos out of the survex files tunnel - read in the Tunnel files - which scans the survey scans too resetend - (archaic?) writecaves - *disabled* (archaic?) autologbooks - read in autologbooks (what are these?) dumplogbooks - write out autologbooks (not working?) syncuser - needed after reloading database from SQL backup surveyimgs - read in scans by expo, must run after "people". Not used. test - testing... and [runlabel] is an optional string identifying this run of the script in the stored profiling data 'import-profile.json' caves and logbooks must be run on an empty db before the others as they set up db tables used by the others. """) if __name__ == "__main__": import troggle.core.models import sys import django django.setup() if len(sys.argv)>2: runlabel = sys.argv[len(sys.argv)-1] else: runlabel=None jq = JobQueue(runlabel) if len(sys.argv)==1: usage() exit() elif "test" in sys.argv: jq.enq("reinit",reinit_db) jq.enq("dirsredirect",dirsredirect) jq.enq("caves",import_caves) jq.enq("people",import_people) jq.enq("survex",import_survexblks) #jq.enq("logbooks",import_logbooks) elif "caves" in sys.argv: jq.enq("caves",import_caves) elif "logbooks" in sys.argv: jq.enq("logbooks",import_logbooks) elif "people" in sys.argv: jq.enq("people",import_people) elif "QMs" in sys.argv: jq.enq("QMs",import_QMs) elif "reload_db" in sys.argv: jq.enq("reload",reload_db) elif "reset" in sys.argv: jq.enq("reinit",reinit_db) jq.enq("dirsredirect",dirsredirect) jq.enq("caves",import_caves) jq.enq("people",import_people) jq.enq("scans",import_surveyscans) jq.enq("logbooks",import_logbooks) jq.enq("QMs",import_QMs) jq.enq("survexblks",import_survexblks) jq.enq("survexpos",import_survexpos) jq.enq("tunnel",import_tunnelfiles) elif "scans" in sys.argv: jq.enq("scans",import_surveyscans) elif "survex" in sys.argv: jq.enq("survexblks",import_survexblks) jq.enq("survexpos",import_survexpos) elif "survexpos" in sys.argv: jq.enq("survexpos",import_survexpos) elif "surveys" in sys.argv: jq.enq("surveyimgs",import_surveyimgs) elif "tunnel" in sys.argv: jq.enq("tunnel",import_tunnelfiles) elif "help" in sys.argv: usage() elif "resetend" in sys.argv: jq.enq("QMs",import_QMs) jq.enq("tunnel",import_tunnelfiles) jq.enq("surveyimgs",import_surveyimgs) #import_descriptions() # no longer present #parse_descriptions() # no longer present # elif "writeCaves" in sys.argv: # writeCaves() # no longer present elif "autologbooks" in sys.argv: import_auto_logbooks() elif "dumplogbooks" in sys.argv: dumplogbooks() else: usage() print("%s not recognised as a command." % sys.argv[1]) exit() jq.run() jq.showprofile()