forked from expo/troggle
dbReset now loads into memory first (fast err checking), then into db
This commit is contained in:
parent
76a6b501f3
commit
39c622d5bf
1
.gitignore
vendored
1
.gitignore
vendored
@ -33,3 +33,4 @@ ignored-files.log
|
|||||||
tunnel-import.log
|
tunnel-import.log
|
||||||
posnotfound
|
posnotfound
|
||||||
troggle.sqlite-journal
|
troggle.sqlite-journal
|
||||||
|
loadsurvexblks.log
|
||||||
|
@ -55,8 +55,9 @@ def controlPanel(request):
|
|||||||
|
|
||||||
#importlist is mostly here so that things happen in the correct order.
|
#importlist is mostly here so that things happen in the correct order.
|
||||||
#http post data seems to come in an unpredictable order, so we do it this way.
|
#http post data seems to come in an unpredictable order, so we do it this way.
|
||||||
importlist=['reload_db', 'import_people', 'import_cavetab', 'import_logbooks', 'import_surveys', 'import_QMs']
|
importlist=['reinit_db', 'import_people', 'import_caves', 'import_logbooks',
|
||||||
databaseReset.make_dirs()
|
'import_survexblks', 'import_QMs', 'import_survexpos', 'import_surveyscans', 'import_tunnelfiles']
|
||||||
|
databaseReset.dirsredirect()
|
||||||
for item in importlist:
|
for item in importlist:
|
||||||
if item in request.POST:
|
if item in request.POST:
|
||||||
print("running"+ " databaseReset."+item+"()")
|
print("running"+ " databaseReset."+item+"()")
|
||||||
|
126
databaseReset.py
126
databaseReset.py
@ -5,7 +5,7 @@ import settings
|
|||||||
os.environ['PYTHONPATH'] = settings.PYTHON_PATH
|
os.environ['PYTHONPATH'] = settings.PYTHON_PATH
|
||||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'settings')
|
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'settings')
|
||||||
from django.core import management
|
from django.core import management
|
||||||
from django.db import connection
|
from django.db import connection, close_old_connections
|
||||||
from django.contrib.auth.models import User
|
from django.contrib.auth.models import User
|
||||||
from django.http import HttpResponse
|
from django.http import HttpResponse
|
||||||
from django.core.urlresolvers import reverse
|
from django.core.urlresolvers import reverse
|
||||||
@ -13,6 +13,9 @@ from troggle.core.models import Cave, Entrance
|
|||||||
import troggle.flatpages.models
|
import troggle.flatpages.models
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
# NOTE databaseRest.py is *imported* by views_other.py as it is used in the control panel
|
||||||
|
# presented there.
|
||||||
|
|
||||||
databasename=settings.DATABASES['default']['NAME']
|
databasename=settings.DATABASES['default']['NAME']
|
||||||
expouser=settings.EXPOUSER
|
expouser=settings.EXPOUSER
|
||||||
expouserpass=settings.EXPOUSERPASS
|
expouserpass=settings.EXPOUSERPASS
|
||||||
@ -22,17 +25,18 @@ def reinit_db():
|
|||||||
"""Rebuild database from scratch. Deletes the file first if sqlite is used,
|
"""Rebuild database from scratch. Deletes the file first if sqlite is used,
|
||||||
otherwise it drops the database and creates it.
|
otherwise it drops the database and creates it.
|
||||||
"""
|
"""
|
||||||
|
currentdbname = settings.DATABASES['default']['NAME']
|
||||||
if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.sqlite3':
|
if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.sqlite3':
|
||||||
try:
|
try:
|
||||||
os.remove(databasename)
|
os.remove(currentdbname)
|
||||||
except OSError:
|
except OSError:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
cursor = connection.cursor()
|
cursor = connection.cursor()
|
||||||
cursor.execute("DROP DATABASE %s" % databasename)
|
cursor.execute("DROP DATABASE %s" % currentdbname)
|
||||||
cursor.execute("CREATE DATABASE %s" % databasename)
|
cursor.execute("CREATE DATABASE %s" % currentdbname)
|
||||||
cursor.execute("ALTER DATABASE %s CHARACTER SET=utf8" % databasename)
|
cursor.execute("ALTER DATABASE %s CHARACTER SET=utf8" % currentdbname)
|
||||||
cursor.execute("USE %s" % databasename)
|
cursor.execute("USE %s" % currentdbname)
|
||||||
syncuser()
|
syncuser()
|
||||||
|
|
||||||
def syncuser():
|
def syncuser():
|
||||||
@ -73,7 +77,7 @@ def import_logbooks():
|
|||||||
def import_QMs():
|
def import_QMs():
|
||||||
print("Importing QMs (old caves)")
|
print("Importing QMs (old caves)")
|
||||||
import parsers.QMs
|
import parsers.QMs
|
||||||
# import process itself runs on qm.csv in only 3 caves, not 264!
|
# import process itself runs on qm.csv in only 3 old caves, not the modern ones!
|
||||||
|
|
||||||
def import_survexblks():
|
def import_survexblks():
|
||||||
import parsers.survex
|
import parsers.survex
|
||||||
@ -159,7 +163,7 @@ def dumplogbooks():
|
|||||||
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||||
|
|
||||||
class JobQueue():
|
class JobQueue():
|
||||||
"""A list of import operations to run. Always reports times
|
"""A list of import operations to run. Always reports profile times
|
||||||
in the same order.
|
in the same order.
|
||||||
"""
|
"""
|
||||||
def __init__(self,run):
|
def __init__(self,run):
|
||||||
@ -173,7 +177,7 @@ class JobQueue():
|
|||||||
for k in self.results_order:
|
for k in self.results_order:
|
||||||
self.results[k]=[]
|
self.results[k]=[]
|
||||||
self.tfile = "import_profile.json"
|
self.tfile = "import_profile.json"
|
||||||
self.htmlfile = "profile.html"
|
self.htmlfile = "profile.html" # for HTML results table. Not yet done.
|
||||||
|
|
||||||
#Adding elements to queue - enqueue
|
#Adding elements to queue - enqueue
|
||||||
def enq(self,label,func):
|
def enq(self,label,func):
|
||||||
@ -186,7 +190,9 @@ class JobQueue():
|
|||||||
# return self.queue.pop()
|
# return self.queue.pop()
|
||||||
# return ("Queue Empty!")
|
# return ("Queue Empty!")
|
||||||
|
|
||||||
def run(self):
|
def loadprofiles(self):
|
||||||
|
"""Load timings for previous runs from file
|
||||||
|
"""
|
||||||
if os.path.isfile(self.tfile):
|
if os.path.isfile(self.tfile):
|
||||||
try:
|
try:
|
||||||
f = open(self.tfile, "r")
|
f = open(self.tfile, "r")
|
||||||
@ -197,9 +203,26 @@ class JobQueue():
|
|||||||
print "FAILURE parsing JSON file %s" % (self.tfile)
|
print "FAILURE parsing JSON file %s" % (self.tfile)
|
||||||
# Python bug: https://github.com/ShinNoNoir/twitterwebsearch/issues/12
|
# Python bug: https://github.com/ShinNoNoir/twitterwebsearch/issues/12
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
for j in self.results_order:
|
for j in self.results_order:
|
||||||
self.results[j].append(None) # append a placeholder
|
self.results[j].append(None) # append a placeholder
|
||||||
|
return True
|
||||||
|
|
||||||
|
def saveprofiles(self):
|
||||||
|
with open(self.tfile, 'w') as f:
|
||||||
|
json.dump(self.results, f)
|
||||||
|
return True
|
||||||
|
|
||||||
|
def memdumpsql(self):
|
||||||
|
djconn = django.db.connection
|
||||||
|
from dump import _iterdump
|
||||||
|
with open('memdump.sql', 'w') as f:
|
||||||
|
for line in _iterdump(djconn):
|
||||||
|
f.write('%s\n' % line.encode("utf8"))
|
||||||
|
return True
|
||||||
|
|
||||||
|
def runqonce(self):
|
||||||
|
"""Run all the jobs in the queue provided once
|
||||||
|
"""
|
||||||
|
|
||||||
print "** Running job ", self.runlabel
|
print "** Running job ", self.runlabel
|
||||||
jobstart = time.time()
|
jobstart = time.time()
|
||||||
@ -216,26 +239,68 @@ class JobQueue():
|
|||||||
self.results[i[0]].pop() # the null item
|
self.results[i[0]].pop() # the null item
|
||||||
self.results[i[0]].append(duration)
|
self.results[i[0]].append(duration)
|
||||||
|
|
||||||
with open(self.tfile, 'w') as f:
|
|
||||||
json.dump(self.results, f)
|
|
||||||
|
|
||||||
jobend = time.time()
|
jobend = time.time()
|
||||||
jobduration = jobend-jobstart
|
jobduration = jobend-jobstart
|
||||||
print "** Ended all jobs. %.1f seconds" % jobduration
|
print "** Ended job %s - %.1f seconds total." % (self.runlabel,jobduration)
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
self.loadprofiles()
|
||||||
|
|
||||||
# currently uses django db whatever it was. CHANGE this to explicitly use
|
dbengine = settings.DATABASES['default']['ENGINE']
|
||||||
# a new sqlite3 db and then import the sql dump of that into the troggle db
|
dbname = settings.DATABASES['default']['NAME']
|
||||||
# instead of loading directly into the troggle sqlite db.
|
|
||||||
# in-memory ":memory:" sqlite is ~ 7x faster and all of troggle can be
|
|
||||||
# loaded in 6 minutes that way
|
|
||||||
djconn = django.db.connection
|
|
||||||
from dump import _iterdump
|
|
||||||
with open('memdump.sql', 'w') as f:
|
|
||||||
for line in _iterdump(djconn):
|
|
||||||
f.write('%s\n' % line.encode("utf8"))
|
|
||||||
|
|
||||||
# now import the memory image sql into
|
if dbname ==":memory:":
|
||||||
####(to do)
|
# just run, and save the sql file
|
||||||
|
print "-- ", settings.DATABASES['default']['NAME'], settings.DATABASES['default']['ENGINE']
|
||||||
|
self.runqonce()
|
||||||
|
self.memdumpsql()
|
||||||
|
self.saveprofiles()
|
||||||
|
else:
|
||||||
|
# run all the imports through :memory: first
|
||||||
|
settings.DATABASES['default']['ENGINE'] = 'django.db.backends.sqlite3'
|
||||||
|
settings.DATABASES['default']['NAME'] = ":memory:"
|
||||||
|
print "-- ", settings.DATABASES['default']['NAME'], settings.DATABASES['default']['ENGINE']
|
||||||
|
|
||||||
|
# but because the user may be expecting to add this to a db with lots of tables already there,
|
||||||
|
# the jobque may not start from scratch so we need to initialise the db properly first.
|
||||||
|
# But initiating twice crashes, so be sure to do it once only.
|
||||||
|
if ("reinit",reinit_db) not in self.queue:
|
||||||
|
reinit_db()
|
||||||
|
if ("dirsredirect",dirsredirect) not in self.queue:
|
||||||
|
dirsredirect()
|
||||||
|
if ("caves",import_caves) not in self.queue:
|
||||||
|
import_caves()
|
||||||
|
if ("people",import_people) not in self.queue:
|
||||||
|
import_people()
|
||||||
|
|
||||||
|
django.db.close_old_connections() # maybe not needed here
|
||||||
|
|
||||||
|
self.runqonce()
|
||||||
|
self.memdumpsql()
|
||||||
|
self.showprofile()
|
||||||
|
|
||||||
|
# restore the original db and import again
|
||||||
|
# if we wanted to, we could re-import the SQL generated in the first pass to be
|
||||||
|
# blazing fast. But for the present just re-import the lot.
|
||||||
|
settings.DATABASES['default']['ENGINE'] = dbengine
|
||||||
|
settings.DATABASES['default']['NAME'] = dbname
|
||||||
|
print "-- ", settings.DATABASES['default']['NAME'], settings.DATABASES['default']['ENGINE']
|
||||||
|
|
||||||
|
for j in self.results_order:
|
||||||
|
self.results[j].pop() # throw away results from :memory: run
|
||||||
|
self.results[j].append(None) # append a placeholder
|
||||||
|
|
||||||
|
django.db.close_old_connections() # magic rune. works. found by looking in django.db__init__.py
|
||||||
|
#django.setup() # should this be needed?
|
||||||
|
|
||||||
|
|
||||||
|
self.runqonce() # crashes because it thinks it has no migrations to apply, when it does.
|
||||||
|
self.saveprofiles()
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def showprofile(self):
|
def showprofile(self):
|
||||||
@ -277,9 +342,10 @@ class JobQueue():
|
|||||||
percen = 100* (r[i] - r[i-1])/r[i-1]
|
percen = 100* (r[i] - r[i-1])/r[i-1]
|
||||||
if abs(percen) >0.1:
|
if abs(percen) >0.1:
|
||||||
print '%8.1f%%' % percen,
|
print '%8.1f%%' % percen,
|
||||||
else:
|
else:
|
||||||
print " - ",
|
print " - ",
|
||||||
print ""
|
print ""
|
||||||
|
print "\n"
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
@ -333,8 +399,8 @@ if __name__ == "__main__":
|
|||||||
jq.enq("reinit",reinit_db)
|
jq.enq("reinit",reinit_db)
|
||||||
jq.enq("dirsredirect",dirsredirect)
|
jq.enq("dirsredirect",dirsredirect)
|
||||||
jq.enq("caves",import_caves)
|
jq.enq("caves",import_caves)
|
||||||
jq.enq("survexblks",import_survexblks)
|
jq.enq("people",import_people)
|
||||||
jq.enq("survexpos",import_survexpos)
|
jq.enq("scans",import_surveyscans)
|
||||||
elif "caves" in sys.argv:
|
elif "caves" in sys.argv:
|
||||||
jq.enq("caves",import_caves)
|
jq.enq("caves",import_caves)
|
||||||
elif "logbooks" in sys.argv:
|
elif "logbooks" in sys.argv:
|
||||||
|
@ -10,7 +10,9 @@ from django.utils.timezone import make_aware
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
import os
|
import os
|
||||||
|
import time
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
import sys
|
||||||
|
|
||||||
line_leg_regex = re.compile(r"[\d\-+.]+$")
|
line_leg_regex = re.compile(r"[\d\-+.]+$")
|
||||||
|
|
||||||
@ -179,7 +181,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
|
|||||||
# print('QM res station %s' % qm_resolve_station)
|
# print('QM res station %s' % qm_resolve_station)
|
||||||
# print('QM notes %s' % qm_notes)
|
# print('QM notes %s' % qm_notes)
|
||||||
|
|
||||||
# If the QM isn't resolved (has a resolving station) thn load it
|
# If the QM isn't resolved (has a resolving station) then load it
|
||||||
if not qm_resolve_section or qm_resolve_section is not '-' or qm_resolve_section is not 'None':
|
if not qm_resolve_section or qm_resolve_section is not '-' or qm_resolve_section is not 'None':
|
||||||
from_section = models.SurvexBlock.objects.filter(name=qm_from_section)
|
from_section = models.SurvexBlock.objects.filter(name=qm_from_section)
|
||||||
# If we can find a section (survex note chunck, named)
|
# If we can find a section (survex note chunck, named)
|
||||||
@ -364,6 +366,11 @@ def LoadAllSurvexBlocks():
|
|||||||
|
|
||||||
print(" - Data flushed")
|
print(" - Data flushed")
|
||||||
print(' - Loading All Survex Blocks...')
|
print(' - Loading All Survex Blocks...')
|
||||||
|
|
||||||
|
print(' - redirecting stdout to loadsurvexblks.log ...')
|
||||||
|
stdout_orig = sys.stdout
|
||||||
|
# Redirect sys.stdout to the file
|
||||||
|
sys.stdout = open('loadsurvexblks.log', 'w')
|
||||||
|
|
||||||
survexfile = models.SurvexFile(path=settings.SURVEX_TOPNAME, cave=None)
|
survexfile = models.SurvexFile(path=settings.SURVEX_TOPNAME, cave=None)
|
||||||
survexfile.save()
|
survexfile.save()
|
||||||
@ -379,6 +386,11 @@ def LoadAllSurvexBlocks():
|
|||||||
fin.close()
|
fin.close()
|
||||||
survexblockroot.text = "".join(textlines)
|
survexblockroot.text = "".join(textlines)
|
||||||
survexblockroot.save()
|
survexblockroot.save()
|
||||||
|
|
||||||
|
# Close the file
|
||||||
|
sys.stdout.close()
|
||||||
|
# Restore sys.stdout to our old saved file handler
|
||||||
|
sys.stdout = stdout_orig
|
||||||
print(' - Loaded All Survex Blocks.')
|
print(' - Loaded All Survex Blocks.')
|
||||||
|
|
||||||
|
|
||||||
@ -399,13 +411,18 @@ def LoadPos():
|
|||||||
# but without cave import being run before,
|
# but without cave import being run before,
|
||||||
# then *everything* may be in the fresh 'not found' cache file.
|
# then *everything* may be in the fresh 'not found' cache file.
|
||||||
|
|
||||||
cachefile = settings.SURVEX_DATA + "posnotfound"
|
cachefile = settings.SURVEX_DATA + "posnotfound.cache"
|
||||||
notfoundbefore = {}
|
notfoundbefore = {}
|
||||||
if os.path.isfile(cachefile):
|
if os.path.isfile(cachefile):
|
||||||
updtsvx = os.path.getmtime(topdata + ".svx")
|
updtsvx = os.path.getmtime(topdata + ".svx")
|
||||||
updtcache = os.path.getmtime(cachefile)
|
updtcache = os.path.getmtime(cachefile)
|
||||||
age = updtcache - updtsvx
|
age = updtcache - updtsvx
|
||||||
print(' svx: %s cache: %s cache age: %s' % (updtsvx, updtcache, str(timedelta(seconds=age) )))
|
print(' svx: %s cache: %s not-found cache is fresher by: %s' % (updtsvx, updtcache, str(timedelta(seconds=age) )))
|
||||||
|
|
||||||
|
now = time.time()
|
||||||
|
if now - updtcache > 30*24*60*60:
|
||||||
|
print " cache is more than 30 days old. Deleting."
|
||||||
|
os.remove(cachefile)
|
||||||
if age < 0 :
|
if age < 0 :
|
||||||
print " cache is stale."
|
print " cache is stale."
|
||||||
os.remove(cachefile)
|
os.remove(cachefile)
|
||||||
@ -432,6 +449,8 @@ def LoadPos():
|
|||||||
# cavern defaults to using same cwd as supplied input file
|
# cavern defaults to using same cwd as supplied input file
|
||||||
call([settings.CAVERN, "--output=%s.3d" % (topdata), "%s.svx" % (topdata)])
|
call([settings.CAVERN, "--output=%s.3d" % (topdata), "%s.svx" % (topdata)])
|
||||||
call([settings.THREEDTOPOS, '%s.3d' % (topdata)], cwd = settings.SURVEX_DATA)
|
call([settings.THREEDTOPOS, '%s.3d' % (topdata)], cwd = settings.SURVEX_DATA)
|
||||||
|
print " - This next bit takes a while. Matching ~32,000 survey positions. Be patient..."
|
||||||
|
|
||||||
posfile = open("%s.pos" % (topdata))
|
posfile = open("%s.pos" % (topdata))
|
||||||
posfile.readline() #Drop header
|
posfile.readline() #Drop header
|
||||||
for line in posfile.readlines():
|
for line in posfile.readlines():
|
||||||
@ -449,9 +468,8 @@ def LoadPos():
|
|||||||
ss.save()
|
ss.save()
|
||||||
found += 1
|
found += 1
|
||||||
except:
|
except:
|
||||||
#print "%s in %s.pos not found in lookup of SurvexStation.objects" % (name, settings.SURVEX_TOPNAME)
|
|
||||||
notfoundnow.append(name)
|
notfoundnow.append(name)
|
||||||
print " - %s stations NOT found in lookup of SurvexStation.objects. %s found. %s skipped." % (len(notfoundnow),found, len(skip))
|
print " - %s stations not found in lookup of SurvexStation.objects. %s found. %s skipped." % (len(notfoundnow),found, len(skip))
|
||||||
|
|
||||||
if found > 10: # i.e. a previous cave import has been done
|
if found > 10: # i.e. a previous cave import has been done
|
||||||
try:
|
try:
|
||||||
|
Loading…
Reference in New Issue
Block a user