forked from expo/troggle
390 lines
14 KiB
Python
390 lines
14 KiB
Python
import os
|
|
import time
|
|
import timeit
|
|
import settings
|
|
os.environ['PYTHONPATH'] = settings.PYTHON_PATH
|
|
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'settings')
|
|
from django.core import management
|
|
from django.db import connection
|
|
from django.contrib.auth.models import User
|
|
from django.http import HttpResponse
|
|
from django.core.urlresolvers import reverse
|
|
from troggle.core.models import Cave, Entrance
|
|
import troggle.flatpages.models
|
|
import json
|
|
|
|
databasename=settings.DATABASES['default']['NAME']
|
|
expouser=settings.EXPOUSER
|
|
expouserpass=settings.EXPOUSERPASS
|
|
expouseremail=settings.EXPOUSER_EMAIL
|
|
|
|
def reinit_db():
|
|
"""Rebuild database from scratch. Deletes the file first if sqlite is used,
|
|
otherwise it drops the database and creates it.
|
|
"""
|
|
if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.sqlite3':
|
|
try:
|
|
os.remove(databasename)
|
|
except OSError:
|
|
pass
|
|
else:
|
|
cursor = connection.cursor()
|
|
cursor.execute("DROP DATABASE %s" % databasename)
|
|
cursor.execute("CREATE DATABASE %s" % databasename)
|
|
cursor.execute("ALTER DATABASE %s CHARACTER SET=utf8" % databasename)
|
|
cursor.execute("USE %s" % databasename)
|
|
syncuser()
|
|
|
|
def syncuser():
|
|
"""Sync user - needed after reload
|
|
"""
|
|
print("Synchronizing user")
|
|
management.call_command('syncdb', interactive=False)
|
|
user = User.objects.create_user(expouser, expouseremail, expouserpass)
|
|
user.is_staff = True
|
|
user.is_superuser = True
|
|
user.save()
|
|
|
|
def dirsredirect():
|
|
"""Make directories that troggle requires and sets up page redirects
|
|
"""
|
|
#should also deal with permissions here.
|
|
if not os.path.isdir(settings.PHOTOS_ROOT):
|
|
os.mkdir(settings.PHOTOS_ROOT)
|
|
for oldURL, newURL in [("indxal.htm", reverse("caveindex"))]:
|
|
f = troggle.flatpages.models.Redirect(originalURL = oldURL, newURL = newURL)
|
|
f.save()
|
|
|
|
def import_caves():
|
|
import parsers.caves
|
|
print("Importing Caves")
|
|
parsers.caves.readcaves()
|
|
|
|
def import_people():
|
|
import parsers.people
|
|
print("Importing People (folk.csv)")
|
|
parsers.people.LoadPersonsExpos()
|
|
|
|
def import_logbooks():
|
|
import parsers.logbooks
|
|
print("Importing Logbooks")
|
|
parsers.logbooks.LoadLogbooks()
|
|
|
|
def import_QMs():
|
|
print("Importing QMs (old caves)")
|
|
import parsers.QMs
|
|
# import process itself runs on qm.csv in only 3 caves, not 264!
|
|
|
|
def import_survexblks():
|
|
import parsers.survex
|
|
print("Importing Survex Blocks")
|
|
parsers.survex.LoadAllSurvexBlocks()
|
|
|
|
def import_survexpos():
|
|
import parsers.survex
|
|
print("Importing Survex x/y/z Positions")
|
|
parsers.survex.LoadPos()
|
|
|
|
def import_surveyimgs():
|
|
"""This appears to store data in unused objects. The code is kept
|
|
for future re-working to manage progress against notes, plans and elevs.
|
|
"""
|
|
import parsers.surveys
|
|
print("Importing survey images")
|
|
parsers.surveys.parseSurveys(logfile=settings.LOGFILE)
|
|
|
|
def import_surveyscans():
|
|
import parsers.surveys
|
|
print("Importing Survey Scans")
|
|
parsers.surveys.LoadListScans()
|
|
|
|
def import_tunnelfiles():
|
|
import parsers.surveys
|
|
print("Importing Tunnel files")
|
|
parsers.surveys.LoadTunnelFiles()
|
|
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
def import_auto_logbooks():
|
|
import parsers.logbooks
|
|
import os
|
|
for pt in troggle.core.models.PersonTrip.objects.all():
|
|
pt.delete()
|
|
for lbe in troggle.core.models.LogbookEntry.objects.all():
|
|
lbe.delete()
|
|
for expedition in troggle.core.models.Expedition.objects.all():
|
|
directory = os.path.join(settings.EXPOWEB,
|
|
"years",
|
|
expedition.year,
|
|
"autologbook")
|
|
for root, dirs, filenames in os.walk(directory):
|
|
for filename in filenames:
|
|
print(os.path.join(root, filename))
|
|
parsers.logbooks.parseAutoLogBookEntry(os.path.join(root, filename))
|
|
|
|
#Temporary function until definitive source of data transfered.
|
|
from django.template.defaultfilters import slugify
|
|
from django.template import Context, loader
|
|
def dumplogbooks():
|
|
def get_name(pe):
|
|
if pe.nickname:
|
|
return pe.nickname
|
|
else:
|
|
return pe.person.first_name
|
|
for lbe in troggle.core.models.LogbookEntry.objects.all():
|
|
dateStr = lbe.date.strftime("%Y-%m-%d")
|
|
directory = os.path.join(settings.EXPOWEB,
|
|
"years",
|
|
lbe.expedition.year,
|
|
"autologbook")
|
|
if not os.path.isdir(directory):
|
|
os.mkdir(directory)
|
|
filename = os.path.join(directory,
|
|
dateStr + "." + slugify(lbe.title)[:50] + ".html")
|
|
if lbe.cave:
|
|
print(lbe.cave.reference())
|
|
trip = {"title": lbe.title, "html":lbe.text, "cave": lbe.cave.reference(), "caveOrLocation": "cave"}
|
|
else:
|
|
trip = {"title": lbe.title, "html":lbe.text, "location":lbe.place, "caveOrLocation": "location"}
|
|
pts = [pt for pt in lbe.persontrip_set.all() if pt.personexpedition]
|
|
persons = [{"name": get_name(pt.personexpedition), "TU": pt.time_underground, "author": pt.is_logbook_entry_author} for pt in pts]
|
|
f = open(filename, "wb")
|
|
template = loader.get_template('dataformat/logbookentry.html')
|
|
context = Context({'trip': trip,
|
|
'persons': persons,
|
|
'date': dateStr,
|
|
'expeditionyear': lbe.expedition.year})
|
|
output = template.render(context)
|
|
f.write(unicode(output).encode( "utf-8" ))
|
|
f.close()
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
class JobQueue():
|
|
"""A list of import operations to run. Always reports times
|
|
in the same order.
|
|
"""
|
|
def __init__(self,run):
|
|
self.runlabel = run
|
|
self.queue = [] # tuples of (jobname, jobfunction)
|
|
self.results = {}
|
|
self.results_order=[
|
|
"date","runlabel","reinit", "caves", "people",
|
|
"logbooks", "QMs", "survexblks", "survexpos",
|
|
"tunnel", "scans", "surveyimgs", "test", "dirsredirect", "syncuser" ]
|
|
for k in self.results_order:
|
|
self.results[k]=[]
|
|
self.tfile = "import_profile.json"
|
|
self.htmlfile = "profile.html"
|
|
|
|
#Adding elements to queue - enqueue
|
|
def enq(self,label,func):
|
|
self.queue.append((label,func))
|
|
return True
|
|
|
|
#Removing the last element from the queue - dequeue
|
|
# def deq(self):
|
|
# if len(self.queue)>0:
|
|
# return self.queue.pop()
|
|
# return ("Queue Empty!")
|
|
|
|
def run(self):
|
|
if os.path.isfile(self.tfile):
|
|
try:
|
|
f = open(self.tfile, "r")
|
|
data = json.load(f)
|
|
for j in data:
|
|
self.results[j] = data[j]
|
|
except:
|
|
print "FAILURE parsing JSON file %s" % (self.tfile)
|
|
# Python bug: https://github.com/ShinNoNoir/twitterwebsearch/issues/12
|
|
f.close()
|
|
|
|
for j in self.results_order:
|
|
self.results[j].append(None) # append a placeholder
|
|
|
|
print "** Running job ", self.runlabel
|
|
jobstart = time.time()
|
|
self.results["date"].pop()
|
|
self.results["date"].append(jobstart)
|
|
self.results["runlabel"].pop()
|
|
self.results["runlabel"].append(self.runlabel)
|
|
|
|
for i in self.queue:
|
|
start = time.time()
|
|
i[1]() # looks ugly but invokes function passed in the second item in the tuple
|
|
duration = time.time()-start
|
|
print "\n*- Ended \"", i[0], "\" %.1f seconds" % duration
|
|
self.results[i[0]].pop() # the null item
|
|
self.results[i[0]].append(duration)
|
|
|
|
with open(self.tfile, 'w') as f:
|
|
json.dump(self.results, f)
|
|
|
|
jobend = time.time()
|
|
jobduration = jobend-jobstart
|
|
print "** Ended all jobs. %.1f seconds" % jobduration
|
|
|
|
# currently uses django db whatever it was. CHANGE this to explicitly use
|
|
# a new sqlite3 db and then import the sql dump of that into the troggle db
|
|
# instead of loading directly into the troggle sqlite db.
|
|
# in-memory ":memory:" sqlite is ~ 7x faster and all of troggle can be
|
|
# loaded in 6 minutes that way
|
|
djconn = django.db.connection
|
|
from dump import _iterdump
|
|
with open('memdump.sql', 'w') as f:
|
|
for line in _iterdump(djconn):
|
|
f.write('%s\n' % line.encode("utf8"))
|
|
|
|
# now import the memory image sql into (to do)
|
|
return True
|
|
|
|
def showprofile(self):
|
|
"""Prints out the time it took to run the jobqueue"""
|
|
for k in self.results_order:
|
|
if k =="dirsredirect":
|
|
break
|
|
elif k =="syncuser":
|
|
break
|
|
elif k =="test":
|
|
break
|
|
elif k =="date":
|
|
print " days ago ",
|
|
else:
|
|
print '%10s (s)' % k,
|
|
percen=0
|
|
r = self.results[k]
|
|
#print "min=",min
|
|
|
|
for i in range(len(r)):
|
|
if k == "runlabel":
|
|
if r[i]:
|
|
rp = r[i]
|
|
else:
|
|
rp = " - "
|
|
print '%8s' % rp,
|
|
elif k =="date":
|
|
# Calculate dates as days before present
|
|
if r[i]:
|
|
if i == len(r)-1:
|
|
print " this",
|
|
else:
|
|
# prints one place to the left of where you expect
|
|
days = (r[i]-r[len(r)-1])/(24*60*60)
|
|
print '%8.2f' % days,
|
|
elif r[i]:
|
|
print '%8.1f' % r[i],
|
|
if i == len(r)-1 and r[i-1]:
|
|
percen = 100* (r[i] - r[i-1])/r[i-1]
|
|
if abs(percen) >0.1:
|
|
print '%8.1f%%' % percen,
|
|
else:
|
|
print " - ",
|
|
print ""
|
|
return True
|
|
|
|
|
|
def usage():
|
|
print("""Usage is 'python databaseReset.py <command> [runlabel]'
|
|
where command is:
|
|
reset - normal usage: clear database and reread everything from files - time-consuming
|
|
caves - read in the caves
|
|
logbooks - read in the logbooks
|
|
people - read in the people from folk.csv
|
|
QMs - read in the QM csv files (older caves only)
|
|
reinit - clear database (delete everything) and make empty tables. Import nothing.
|
|
scans - the survey scans in all the wallets
|
|
survex - read in the survex files - all the survex blocks but not the x/y/z positions
|
|
survexpos - just the x/y/z Pos out of the survex files
|
|
|
|
tunnel - read in the Tunnel files - which scans the survey scans too
|
|
|
|
resetend - (archaic?)
|
|
writecaves - *disabled* (archaic?)
|
|
autologbooks - read in autologbooks (what are these?)
|
|
dumplogbooks - write out autologbooks (not working?)
|
|
syncuser - needed after reloading database from SQL backup
|
|
surveyimgs - read in scans by expo, must run after "people". Not used.
|
|
test - testing...
|
|
|
|
and [runlabel] is an optional string identifying this run of the script
|
|
in the stored profiling data 'import-profile.json'
|
|
|
|
caves and logbooks must be run on an empty db before the others as they
|
|
set up db tables used by the others.
|
|
""")
|
|
|
|
if __name__ == "__main__":
|
|
import troggle.core.models
|
|
import sys
|
|
import django
|
|
django.setup()
|
|
|
|
if len(sys.argv)>2:
|
|
runlabel = sys.argv[len(sys.argv)-1]
|
|
else:
|
|
runlabel=None
|
|
|
|
jq = JobQueue(runlabel)
|
|
|
|
if len(sys.argv)==1:
|
|
usage()
|
|
exit()
|
|
elif "test" in sys.argv:
|
|
jq.enq("reinit",reinit_db)
|
|
jq.enq("dirsredirect",dirsredirect)
|
|
jq.enq("caves",import_caves)
|
|
jq.enq("survexblks",import_survexblks)
|
|
jq.enq("survexpos",import_survexpos)
|
|
elif "caves" in sys.argv:
|
|
jq.enq("caves",import_caves)
|
|
elif "logbooks" in sys.argv:
|
|
jq.enq("logbooks",import_logbooks)
|
|
elif "people" in sys.argv:
|
|
jq.enq("people",import_people)
|
|
elif "QMs" in sys.argv:
|
|
jq.enq("QMs",import_QMs)
|
|
elif "reload_db" in sys.argv:
|
|
jq.enq("reload",reload_db)
|
|
elif "reset" in sys.argv:
|
|
jq.enq("reinit",reinit_db)
|
|
jq.enq("dirsredirect",dirsredirect)
|
|
jq.enq("caves",import_caves)
|
|
jq.enq("people",import_people)
|
|
jq.enq("scans",import_surveyscans)
|
|
jq.enq("logbooks",import_logbooks)
|
|
jq.enq("QMs",import_QMs)
|
|
jq.enq("survexblks",import_survexblks)
|
|
jq.enq("survexpos",import_survexpos)
|
|
jq.enq("tunnel",import_tunnelfiles)
|
|
elif "scans" in sys.argv:
|
|
jq.enq("scans",import_surveyscans)
|
|
elif "survex" in sys.argv:
|
|
jq.enq("survexblks",import_survexblks)
|
|
jq.enq("survexpos",import_survexpos)
|
|
elif "survexpos" in sys.argv:
|
|
jq.enq("survexpos",import_survexpos)
|
|
elif "surveys" in sys.argv:
|
|
jq.enq("surveyimgs",import_surveyimgs)
|
|
elif "tunnel" in sys.argv:
|
|
jq.enq("tunnel",import_tunnelfiles)
|
|
elif "help" in sys.argv:
|
|
usage()
|
|
elif "resetend" in sys.argv:
|
|
jq.enq("QMs",import_QMs)
|
|
jq.enq("tunnel",import_tunnelfiles)
|
|
jq.enq("surveyimgs",import_surveyimgs)
|
|
#import_descriptions() # no longer present
|
|
#parse_descriptions() # no longer present
|
|
# elif "writeCaves" in sys.argv:
|
|
# writeCaves() # no longer present
|
|
elif "autologbooks" in sys.argv:
|
|
import_auto_logbooks()
|
|
elif "dumplogbooks" in sys.argv:
|
|
dumplogbooks()
|
|
else:
|
|
usage()
|
|
print("%s not recognised as a command." % sys.argv[1])
|
|
exit()
|
|
|
|
jq.run()
|
|
jq.showprofile()
|