import os import time import timeit import settings os.environ['PYTHONPATH'] = settings.PYTHON_PATH os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'settings') from django.core import management from django.db import connection from django.contrib.auth.models import User from django.http import HttpResponse from django.core.urlresolvers import reverse from troggle.core.models import Cave, Entrance import troggle.flatpages.models databasename=settings.DATABASES['default']['NAME'] expouser=settings.EXPOUSER expouserpass=settings.EXPOUSERPASS expouseremail=settings.EXPOUSER_EMAIL def reload_db(): if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.sqlite3': try: os.remove(databasename) except OSError: pass else: cursor = connection.cursor() cursor.execute("DROP DATABASE %s" % databasename) cursor.execute("CREATE DATABASE %s" % databasename) cursor.execute("ALTER DATABASE %s CHARACTER SET=utf8" % databasename) cursor.execute("USE %s" % databasename) management.call_command('syncdb', interactive=False) user = User.objects.create_user(expouser, expouseremail, expouserpass) user.is_staff = True user.is_superuser = True user.save() def make_dirs(): """Make directories that troggle requires""" #should also deal with permissions here. if not os.path.isdir(settings.PHOTOS_ROOT): os.mkdir(settings.PHOTOS_ROOT) def import_caves(): import parsers.caves print("Importing Caves") parsers.caves.readcaves() def import_people(): import parsers.people parsers.people.LoadPersonsExpos() def import_logbooks(): import parsers.logbooks parsers.logbooks.LoadLogbooks() def import_survex(): import parsers.survex parsers.survex.LoadAllSurvexBlocks() parsers.survex.LoadPos() def import_QMs(): import parsers.QMs # import process runs on qm.csv in only 3 caves, not 264! def import_surveys(): import parsers.surveys parsers.surveys.parseSurveys(logfile=settings.LOGFILE) def import_surveyscans(): import parsers.surveys parsers.surveys.LoadListScans() def import_tunnelfiles(): import parsers.surveys parsers.surveys.LoadTunnelFiles() def pageredirects(): for oldURL, newURL in [("indxal.htm", reverse("caveindex"))]: f = troggle.flatpages.models.Redirect(originalURL = oldURL, newURL = newURL) f.save() def reset(): # unused now that we have a jobqueue """ Wipe the troggle database and import everything from legacy data """ reload_db() make_dirs() pageredirects() import_caves() import_people() import_surveyscans() import_logbooks() import_QMs() import_survex() import_tunnelfiles() import_surveys() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def import_auto_logbooks(): import parsers.logbooks import os for pt in troggle.core.models.PersonTrip.objects.all(): pt.delete() for lbe in troggle.core.models.LogbookEntry.objects.all(): lbe.delete() for expedition in troggle.core.models.Expedition.objects.all(): directory = os.path.join(settings.EXPOWEB, "years", expedition.year, "autologbook") for root, dirs, filenames in os.walk(directory): for filename in filenames: print(os.path.join(root, filename)) parsers.logbooks.parseAutoLogBookEntry(os.path.join(root, filename)) #Temporary function until definitive source of data transfered. from django.template.defaultfilters import slugify from django.template import Context, loader def dumplogbooks(): def get_name(pe): if pe.nickname: return pe.nickname else: return pe.person.first_name for lbe in troggle.core.models.LogbookEntry.objects.all(): dateStr = lbe.date.strftime("%Y-%m-%d") directory = os.path.join(settings.EXPOWEB, "years", lbe.expedition.year, "autologbook") if not os.path.isdir(directory): os.mkdir(directory) filename = os.path.join(directory, dateStr + "." + slugify(lbe.title)[:50] + ".html") if lbe.cave: print(lbe.cave.reference()) trip = {"title": lbe.title, "html":lbe.text, "cave": lbe.cave.reference(), "caveOrLocation": "cave"} else: trip = {"title": lbe.title, "html":lbe.text, "location":lbe.place, "caveOrLocation": "location"} pts = [pt for pt in lbe.persontrip_set.all() if pt.personexpedition] persons = [{"name": get_name(pt.personexpedition), "TU": pt.time_underground, "author": pt.is_logbook_entry_author} for pt in pts] f = open(filename, "wb") template = loader.get_template('dataformat/logbookentry.html') context = Context({'trip': trip, 'persons': persons, 'date': dateStr, 'expeditionyear': lbe.expedition.year}) output = template.render(context) f.write(unicode(output).encode( "utf-8" )) f.close() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - class JobQueue(): """ A list of import operations to run. Always reports times in the same order """ #Constructor creates a list def __init__(self,run): self.runlabel = run self.queue = [] # tuples of (jobname, jobfunction) self.results = {} self.results_order=[ "date","runlabel","reload", "caves", "people", "logbooks", "scans", "QMs", "survex", "tunnel", "surveys", "test", "makedirs", "redirect" ] for k in self.results_order: self.results[k]=[] #Adding elements to queue def enq(self,label,func): self.queue.append((label,func)) return True #Removing the last element from the queue def deq(self): if len(self.queue)>0: return self.queue.pop() return ("Queue Empty!") def size(self): return len(self.queue) def run(self): import json tfile = "import_profile.json" if os.path.isfile(tfile): try: f = open(tfile, "r") data = json.load(f) for j in data: self.results[j] = data[j] except: print "FAILURE parsing JSON file %s" % (tfile) # Python bug: https://github.com/ShinNoNoir/twitterwebsearch/issues/12 f.close() for i in self.queue: print i, self.results[i[0]] self.results[i[0]].append(1.0) print "** Running job ", self.runlabel for i in self.queue: #print "*- Running \"", i[0], "\"" start = time.time() i[1]() duration = time.time()-start print "\n*- Ended \"", i[0], "\" %.1f seconds" % duration self.results[i[0]].append(duration) self.results["date"].append(start) self.results["runlabel"].append(self.runlabel) print "** Ended all jobs." #print self.results with open(tfile, 'w') as f: json.dump(self.results, f) for k in self.results_order: percen=0 if k == "runlabel": pass if k =="date": # Calculate dates as days before present to one decimal place pass elif len(self.results[k])>3: lst = self.results[k] e = len(lst)-1 percen = 100* (lst[e] - lst[e-1])/lst[e-1] if abs(percen) >0.1: print '%15s %8.1f%%' % (k, percen) else: print '%15s ' % (k) return True def importtest(): from random import randrange k = 0 for i in range(5+randrange(15)): for j in range(i): k += i #print k, return True def usage(): print("""Usage is 'python databaseReset.py [runlabel]' where command is: reset - this is normal usage, clear database and reread everything from files - time-consuming caves - read in the caves logbooks - read in just the logbooks people - read in the people from folk.csv QMs - read in the QM csv files reload_db - clear database (delete everything) and make empty tables scans - NOT the scanned surveynotes ?! survex - read in the survex files - all the survex blocks surveys - read in the scanned surveynotes tunnel - read in the Tunnel files - which scans the surveyscans too survexpos - just the Pos out of the survex files (not part of reset) resetend - (archaic?) writecaves - *disabled* (archaic?) autologbooks - read in autologbooks (what are these?) dumplogbooks - write out autologbooks (not working?) test - testing... and [runlabel] is an optional string identifying this run of the script in the stored profiling data 'import-profile.json' """) if __name__ == "__main__": import troggle.core.models import sys import django django.setup() runlabel = sys.argv[len(sys.argv)-1] jq = JobQueue(runlabel) if "test" in sys.argv: jq.enq("test",importtest) jq.enq("caves",importtest) jq.enq("people",importtest) elif "caves" in sys.argv: jq.enq("caves",import_caves) elif "logbooks" in sys.argv: # management.call_command('syncdb', interactive=False) # this sets the path so that import settings works in import_survex jq.enq("logbooks",import_logbooks) elif "people" in sys.argv: jq.enq("logbooks",import_people) elif "QMs" in sys.argv: jq.enq("QMs",import_QMs) elif "reload_db" in sys.argv: jq.enq("reload",reload_db) elif "reset" in sys.argv: jq.enq("reload",reload_db) jq.enq("makedirs",make_dirs) jq.enq("redirect",pageredirects) jq.enq("caves",import_caves) jq.enq("people",import_people) jq.enq("scans",import_surveyscans) jq.enq("logbooks",import_logbooks) jq.enq("QMs",import_QMs) jq.enq("survex",import_survex) jq.enq("tunnel",import_tunnelfiles) jq.enq("surveys",import_surveys) elif "scans" in sys.argv: jq.enq("scans",import_surveyscans) elif "survex" in sys.argv: # management.call_command('syncdb', interactive=False) # this sets the path so that import settings works in import_survex jq.enq("survex",import_survex) elif "survexpos" in sys.argv: # management.call_command('syncdb', interactive=False) # this sets the path so that import settings works in import_survex import parsers.survex jq.enq("survexpos",parsers.survex.LoadPos) elif "surveys" in sys.argv: jq.enq("surveys",import_surveys) elif "tunnel" in sys.argv: jq.enq("tunnel",import_tunnelfiles) elif "help" in sys.argv: usage() elif "resetend" in sys.argv: jq.enq("QMs",import_QMs) jq.enq("tunnel",import_tunnelfiles) jq.enq("surveys",import_surveys) #import_descriptions() # no longer present #parse_descriptions() # no longer present # elif "writeCaves" in sys.argv: # writeCaves() # no longer present elif "autologbooks" in sys.argv: import_auto_logbooks() elif "dumplogbooks" in sys.argv: dumplogbooks() else: print("%s not recognised" % sys.argv) usage() jq.run()