troggle-unchained/databaseReset.py

459 lines
17 KiB
Python
Raw Normal View History

2011-07-11 02:10:22 +01:00
import os
import time
import timeit
2011-07-11 02:10:22 +01:00
import settings
os.environ['PYTHONPATH'] = settings.PYTHON_PATH
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'settings')
2011-07-11 02:10:22 +01:00
from django.core import management
from django.db import connection, close_old_connections
2011-07-11 02:10:22 +01:00
from django.contrib.auth.models import User
from django.http import HttpResponse
from django.core.urlresolvers import reverse
from troggle.core.models import Cave, Entrance
import troggle.flatpages.models
2020-04-16 20:36:42 +01:00
import json
2011-07-11 02:10:22 +01:00
# NOTE databaseRest.py is *imported* by views_other.py as it is used in the control panel
# presented there.
databasename=settings.DATABASES['default']['NAME']
expouser=settings.EXPOUSER
expouserpass=settings.EXPOUSERPASS
2015-07-01 01:26:04 +01:00
expouseremail=settings.EXPOUSER_EMAIL
2011-07-11 02:10:22 +01:00
2020-04-27 23:51:41 +01:00
def reinit_db():
"""Rebuild database from scratch. Deletes the file first if sqlite is used,
otherwise it drops the database and creates it.
"""
currentdbname = settings.DATABASES['default']['NAME']
if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.sqlite3':
2011-07-11 02:10:22 +01:00
try:
os.remove(currentdbname)
2011-07-11 02:10:22 +01:00
except OSError:
pass
2019-02-24 14:29:14 +00:00
else:
2011-07-11 02:10:22 +01:00
cursor = connection.cursor()
cursor.execute("DROP DATABASE %s" % currentdbname)
cursor.execute("CREATE DATABASE %s" % currentdbname)
cursor.execute("ALTER DATABASE %s CHARACTER SET=utf8" % currentdbname)
cursor.execute("USE %s" % currentdbname)
2020-04-27 23:51:41 +01:00
syncuser()
2011-07-11 02:10:22 +01:00
def syncuser():
2020-04-27 23:51:41 +01:00
"""Sync user - needed after reload
"""
print("Synchronizing user")
management.call_command('syncdb', interactive=False)
user = User.objects.create_user(expouser, expouseremail, expouserpass)
user.is_staff = True
user.is_superuser = True
user.save()
2020-04-27 23:51:41 +01:00
def dirsredirect():
"""Make directories that troggle requires and sets up page redirects
"""
2011-07-11 02:10:22 +01:00
#should also deal with permissions here.
if not os.path.isdir(settings.PHOTOS_ROOT):
os.mkdir(settings.PHOTOS_ROOT)
2020-04-27 23:51:41 +01:00
for oldURL, newURL in [("indxal.htm", reverse("caveindex"))]:
f = troggle.flatpages.models.Redirect(originalURL = oldURL, newURL = newURL)
f.save()
2011-07-11 02:10:22 +01:00
def import_caves():
import parsers.caves
print("Importing Caves")
parsers.caves.readcaves()
2011-07-11 02:10:22 +01:00
def import_people():
import parsers.people
2020-04-27 23:51:41 +01:00
print("Importing People (folk.csv)")
2011-07-11 02:10:22 +01:00
parsers.people.LoadPersonsExpos()
def import_logbooks():
import parsers.logbooks
2020-04-27 23:51:41 +01:00
print("Importing Logbooks")
2011-07-11 02:10:22 +01:00
parsers.logbooks.LoadLogbooks()
2020-04-27 23:51:41 +01:00
def import_QMs():
print("Importing QMs (old caves)")
import parsers.QMs
# import process itself runs on qm.csv in only 3 old caves, not the modern ones!
2020-04-27 23:51:41 +01:00
2020-04-28 01:18:57 +01:00
def import_survexblks():
2011-07-11 02:10:22 +01:00
import parsers.survex
2020-04-27 23:51:41 +01:00
print("Importing Survex Blocks")
2011-07-11 02:10:22 +01:00
parsers.survex.LoadAllSurvexBlocks()
2020-04-27 23:51:41 +01:00
def import_survexpos():
import parsers.survex
2020-04-28 01:18:57 +01:00
print("Importing Survex x/y/z Positions")
2011-07-11 02:10:22 +01:00
parsers.survex.LoadPos()
2020-04-28 01:18:57 +01:00
def import_surveyimgs():
2020-04-27 23:51:41 +01:00
"""This appears to store data in unused objects. The code is kept
for future re-working to manage progress against notes, plans and elevs.
"""
2011-07-11 02:10:22 +01:00
import parsers.surveys
2020-04-28 01:18:57 +01:00
print("Importing survey images")
2011-07-11 02:10:22 +01:00
parsers.surveys.parseSurveys(logfile=settings.LOGFILE)
def import_surveyscans():
import parsers.surveys
2020-04-27 23:51:41 +01:00
print("Importing Survey Scans")
2011-07-11 02:10:22 +01:00
parsers.surveys.LoadListScans()
def import_tunnelfiles():
import parsers.surveys
2020-04-27 23:51:41 +01:00
print("Importing Tunnel files")
2011-07-11 02:10:22 +01:00
parsers.surveys.LoadTunnelFiles()
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2011-07-11 02:10:22 +01:00
def import_auto_logbooks():
import parsers.logbooks
import os
for pt in troggle.core.models.PersonTrip.objects.all():
2011-07-11 02:10:22 +01:00
pt.delete()
for lbe in troggle.core.models.LogbookEntry.objects.all():
2011-07-11 02:10:22 +01:00
lbe.delete()
for expedition in troggle.core.models.Expedition.objects.all():
directory = os.path.join(settings.EXPOWEB,
"years",
expedition.year,
"autologbook")
2011-07-11 02:10:22 +01:00
for root, dirs, filenames in os.walk(directory):
for filename in filenames:
2019-02-24 14:29:14 +00:00
print(os.path.join(root, filename))
2011-07-11 02:10:22 +01:00
parsers.logbooks.parseAutoLogBookEntry(os.path.join(root, filename))
#Temporary function until definitive source of data transfered.
2011-07-11 02:10:22 +01:00
from django.template.defaultfilters import slugify
from django.template import Context, loader
def dumplogbooks():
def get_name(pe):
if pe.nickname:
return pe.nickname
else:
return pe.person.first_name
for lbe in troggle.core.models.LogbookEntry.objects.all():
2011-07-11 02:10:22 +01:00
dateStr = lbe.date.strftime("%Y-%m-%d")
directory = os.path.join(settings.EXPOWEB,
"years",
2011-07-11 02:10:22 +01:00
lbe.expedition.year,
"autologbook")
if not os.path.isdir(directory):
os.mkdir(directory)
filename = os.path.join(directory,
dateStr + "." + slugify(lbe.title)[:50] + ".html")
if lbe.cave:
2019-02-24 14:29:14 +00:00
print(lbe.cave.reference())
2011-07-11 02:10:22 +01:00
trip = {"title": lbe.title, "html":lbe.text, "cave": lbe.cave.reference(), "caveOrLocation": "cave"}
else:
trip = {"title": lbe.title, "html":lbe.text, "location":lbe.place, "caveOrLocation": "location"}
pts = [pt for pt in lbe.persontrip_set.all() if pt.personexpedition]
persons = [{"name": get_name(pt.personexpedition), "TU": pt.time_underground, "author": pt.is_logbook_entry_author} for pt in pts]
f = open(filename, "wb")
template = loader.get_template('dataformat/logbookentry.html')
context = Context({'trip': trip,
'persons': persons,
'date': dateStr,
'expeditionyear': lbe.expedition.year})
output = template.render(context)
f.write(unicode(output).encode( "utf-8" ))
f.close()
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2020-04-16 20:36:42 +01:00
class JobQueue():
"""A list of import operations to run. Always reports profile times
2020-04-16 20:36:42 +01:00
in the same order.
"""
def __init__(self,run):
self.runlabel = run
self.queue = [] # tuples of (jobname, jobfunction)
self.results = {}
self.results_order=[
2020-04-27 23:51:41 +01:00
"date","runlabel","reinit", "caves", "people",
2020-04-28 18:26:08 +01:00
"logbooks", "QMs", "survexblks", "survexpos",
"tunnel", "scans", "surveyimgs", "test", "dirsredirect", "syncuser" ]
for k in self.results_order:
self.results[k]=[]
2020-04-16 20:36:42 +01:00
self.tfile = "import_profile.json"
self.htmlfile = "profile.html" # for HTML results table. Not yet done.
2020-04-16 20:36:42 +01:00
#Adding elements to queue - enqueue
def enq(self,label,func):
self.queue.append((label,func))
return True
2020-04-16 20:36:42 +01:00
#Removing the last element from the queue - dequeue
# def deq(self):
# if len(self.queue)>0:
# return self.queue.pop()
# return ("Queue Empty!")
def loadprofiles(self):
"""Load timings for previous runs from file
"""
2020-04-16 20:36:42 +01:00
if os.path.isfile(self.tfile):
try:
2020-04-16 20:36:42 +01:00
f = open(self.tfile, "r")
data = json.load(f)
for j in data:
self.results[j] = data[j]
except:
2020-04-16 20:36:42 +01:00
print "FAILURE parsing JSON file %s" % (self.tfile)
# Python bug: https://github.com/ShinNoNoir/twitterwebsearch/issues/12
f.close()
2020-04-28 18:26:08 +01:00
for j in self.results_order:
self.results[j].append(None) # append a placeholder
return True
def saveprofiles(self):
with open(self.tfile, 'w') as f:
json.dump(self.results, f)
return True
def memdumpsql(self):
djconn = django.db.connection
from dump import _iterdump
with open('memdump.sql', 'w') as f:
for line in _iterdump(djconn):
f.write('%s\n' % line.encode("utf8"))
return True
def runqonce(self):
"""Run all the jobs in the queue provided once
"""
2020-04-28 18:26:08 +01:00
print "** Running job ", self.runlabel
2020-04-16 20:36:42 +01:00
jobstart = time.time()
2020-04-28 18:26:08 +01:00
self.results["date"].pop()
2020-04-16 20:36:42 +01:00
self.results["date"].append(jobstart)
2020-04-28 18:26:08 +01:00
self.results["runlabel"].pop()
2020-04-16 20:36:42 +01:00
self.results["runlabel"].append(self.runlabel)
for i in self.queue:
2020-04-27 23:51:41 +01:00
start = time.time()
i[1]() # looks ugly but invokes function passed in the second item in the tuple
duration = time.time()-start
print "\n*- Ended \"", i[0], "\" %.1f seconds" % duration
2020-04-28 18:26:08 +01:00
self.results[i[0]].pop() # the null item
2020-04-27 23:51:41 +01:00
self.results[i[0]].append(duration)
2020-04-16 20:36:42 +01:00
jobend = time.time()
jobduration = jobend-jobstart
print "** Ended job %s - %.1f seconds total." % (self.runlabel,jobduration)
return True
def run(self):
self.loadprofiles()
2020-04-16 20:36:42 +01:00
dbengine = settings.DATABASES['default']['ENGINE']
dbname = settings.DATABASES['default']['NAME']
if dbname ==":memory:":
# just run, and save the sql file
print "-- ", settings.DATABASES['default']['NAME'], settings.DATABASES['default']['ENGINE']
self.runqonce()
self.memdumpsql()
self.saveprofiles()
else:
# run all the imports through :memory: first
settings.DATABASES['default']['ENGINE'] = 'django.db.backends.sqlite3'
settings.DATABASES['default']['NAME'] = ":memory:"
print "-- ", settings.DATABASES['default']['NAME'], settings.DATABASES['default']['ENGINE']
# but because the user may be expecting to add this to a db with lots of tables already there,
2020-05-13 19:57:07 +01:00
# the jobque may not start from scratch so we need to initialise the db properly first
# because we are using an empty :memory: database
# But initiating twice crashes, so be sure to do it once only.
if ("reinit",reinit_db) not in self.queue:
reinit_db()
if ("dirsredirect",dirsredirect) not in self.queue:
dirsredirect()
if ("caves",import_caves) not in self.queue:
2020-05-13 19:57:07 +01:00
import_caves() # sometime extract the initialising code from this and put in reinit
if ("people",import_people) not in self.queue:
2020-05-13 19:57:07 +01:00
import_people() # sometime extract the initialising code from this and put in reinit
django.db.close_old_connections() # maybe not needed here
self.runqonce()
self.memdumpsql()
self.showprofile()
# restore the original db and import again
# if we wanted to, we could re-import the SQL generated in the first pass to be
# blazing fast. But for the present just re-import the lot.
settings.DATABASES['default']['ENGINE'] = dbengine
settings.DATABASES['default']['NAME'] = dbname
print "-- ", settings.DATABASES['default']['NAME'], settings.DATABASES['default']['ENGINE']
2020-05-13 19:57:07 +01:00
django.db.close_old_connections() # maybe not needed here
for j in self.results_order:
self.results[j].pop() # throw away results from :memory: run
self.results[j].append(None) # append a placeholder
django.db.close_old_connections() # magic rune. works. found by looking in django.db__init__.py
#django.setup() # should this be needed?
2020-04-16 20:36:42 +01:00
self.runqonce() # crashes because it thinks it has no migrations to apply, when it does.
self.saveprofiles()
2020-04-16 20:36:42 +01:00
return True
def showprofile(self):
"""Prints out the time it took to run the jobqueue"""
for k in self.results_order:
2020-04-27 23:51:41 +01:00
if k =="dirsredirect":
break
elif k =="syncuser":
break
elif k =="test":
break
2020-04-16 20:36:42 +01:00
elif k =="date":
2020-04-28 18:26:08 +01:00
print " days ago ",
2020-04-27 23:51:41 +01:00
else:
2020-04-28 18:26:08 +01:00
print '%10s (s)' % k,
2020-04-27 23:51:41 +01:00
percen=0
r = self.results[k]
#print "min=",min
for i in range(len(r)):
if k == "runlabel":
if r[i]:
rp = r[i]
else:
rp = " - "
print '%8s' % rp,
elif k =="date":
# Calculate dates as days before present
if r[i]:
if i == len(r)-1:
print " this",
else:
# prints one place to the left of where you expect
days = (r[i]-r[len(r)-1])/(24*60*60)
print '%8.2f' % days,
elif r[i]:
print '%8.1f' % r[i],
if i == len(r)-1 and r[i-1]:
percen = 100* (r[i] - r[i-1])/r[i-1]
if abs(percen) >0.1:
print '%8.1f%%' % percen,
else:
print " - ",
2020-04-27 23:51:41 +01:00
print ""
print "\n"
return True
2011-07-11 02:10:22 +01:00
def usage():
print("""Usage is 'python databaseReset.py <command> [runlabel]'
where command is:
2020-04-27 23:51:41 +01:00
reset - normal usage: clear database and reread everything from files - time-consuming
caves - read in the caves
2020-04-27 23:51:41 +01:00
logbooks - read in the logbooks
people - read in the people from folk.csv
2020-04-27 23:51:41 +01:00
QMs - read in the QM csv files (older caves only)
reinit - clear database (delete everything) and make empty tables. Import nothing.
scans - the survey scans in all the wallets
2020-04-28 18:26:08 +01:00
survex - read in the survex files - all the survex blocks but not the x/y/z positions
2020-04-28 01:18:57 +01:00
survexpos - just the x/y/z Pos out of the survex files
2020-04-27 23:51:41 +01:00
tunnel - read in the Tunnel files - which scans the survey scans too
resetend - (archaic?)
writecaves - *disabled* (archaic?)
autologbooks - read in autologbooks (what are these?)
2020-02-19 22:52:00 +00:00
dumplogbooks - write out autologbooks (not working?)
2020-04-27 23:51:41 +01:00
syncuser - needed after reloading database from SQL backup
2020-04-28 01:18:57 +01:00
surveyimgs - read in scans by expo, must run after "people". Not used.
test - testing...
and [runlabel] is an optional string identifying this run of the script
in the stored profiling data 'import-profile.json'
2020-04-27 23:51:41 +01:00
caves and logbooks must be run on an empty db before the others as they
set up db tables used by the others.
""")
2011-07-11 02:10:22 +01:00
if __name__ == "__main__":
import troggle.core.models
2011-07-11 02:10:22 +01:00
import sys
import django
django.setup()
2020-04-14 20:46:45 +01:00
2020-04-27 23:51:41 +01:00
if len(sys.argv)>2:
runlabel = sys.argv[len(sys.argv)-1]
else:
runlabel=None
jq = JobQueue(runlabel)
2020-04-27 23:51:41 +01:00
if len(sys.argv)==1:
usage()
exit()
elif "test" in sys.argv:
jq.enq("reinit",reinit_db)
jq.enq("dirsredirect",dirsredirect)
2020-04-16 20:36:42 +01:00
jq.enq("caves",import_caves)
jq.enq("people",import_people)
jq.enq("scans",import_surveyscans)
elif "caves" in sys.argv:
jq.enq("caves",import_caves)
2020-04-14 20:46:45 +01:00
elif "logbooks" in sys.argv:
jq.enq("logbooks",import_logbooks)
elif "people" in sys.argv:
2020-04-27 23:51:41 +01:00
jq.enq("people",import_people)
2011-07-11 02:10:22 +01:00
elif "QMs" in sys.argv:
jq.enq("QMs",import_QMs)
2020-04-14 20:46:45 +01:00
elif "reload_db" in sys.argv:
jq.enq("reload",reload_db)
2011-07-11 02:10:22 +01:00
elif "reset" in sys.argv:
2020-04-27 23:51:41 +01:00
jq.enq("reinit",reinit_db)
jq.enq("dirsredirect",dirsredirect)
jq.enq("caves",import_caves)
jq.enq("people",import_people)
jq.enq("scans",import_surveyscans)
jq.enq("logbooks",import_logbooks)
jq.enq("QMs",import_QMs)
2020-04-28 01:18:57 +01:00
jq.enq("survexblks",import_survexblks)
jq.enq("survexpos",import_survexpos)
jq.enq("tunnel",import_tunnelfiles)
2020-04-14 20:46:45 +01:00
elif "scans" in sys.argv:
jq.enq("scans",import_surveyscans)
2011-07-11 02:10:22 +01:00
elif "survex" in sys.argv:
2020-04-28 01:18:57 +01:00
jq.enq("survexblks",import_survexblks)
jq.enq("survexpos",import_survexpos)
elif "survexpos" in sys.argv:
2020-04-27 23:51:41 +01:00
jq.enq("survexpos",import_survexpos)
2019-02-24 14:29:14 +00:00
elif "surveys" in sys.argv:
2020-04-28 01:18:57 +01:00
jq.enq("surveyimgs",import_surveyimgs)
2020-04-14 20:46:45 +01:00
elif "tunnel" in sys.argv:
jq.enq("tunnel",import_tunnelfiles)
elif "help" in sys.argv:
usage()
2020-04-14 20:46:45 +01:00
elif "resetend" in sys.argv:
jq.enq("QMs",import_QMs)
jq.enq("tunnel",import_tunnelfiles)
2020-04-28 01:18:57 +01:00
jq.enq("surveyimgs",import_surveyimgs)
2020-04-14 20:46:45 +01:00
#import_descriptions() # no longer present
#parse_descriptions() # no longer present
# elif "writeCaves" in sys.argv:
# writeCaves() # no longer present
elif "autologbooks" in sys.argv:
import_auto_logbooks()
elif "dumplogbooks" in sys.argv:
dumplogbooks()
2020-02-21 14:00:33 +00:00
else:
usage()
2020-04-27 23:51:41 +01:00
print("%s not recognised as a command." % sys.argv[1])
exit()
jq.run()
2020-04-16 20:36:42 +01:00
jq.showprofile()