remove in-memory phase of data import

This commit is contained in:
Philip Sargent 2020-06-20 19:55:23 +01:00
parent f608fc186e
commit 77c80d1a69
5 changed files with 19 additions and 142 deletions

View File

@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.29 on 2020-06-20 16:03
# Generated by Django 1.11.29 on 2020-06-20 17:43
from __future__ import unicode_literals
from django.conf import settings

View File

@ -35,24 +35,15 @@ from troggle.parsers.imports import import_caves, import_people, import_surveysc
import_logbooks, import_QMs, import_survex, import_tunnelfiles
import troggle.logbooksdump
# NOTE databaseReset.py is *imported* by views_other.py as it is used in the control panel
# presented there.
if os.geteuid() == 0:
# This protects the server from having the wrong file permissions written on logs and caches
print("This script should be run as expo not root - quitting")
exit()
dbengine = ""
dbname = ""
dbdefault =""
expouser=settings.EXPOUSER
expouserpass=settings.EXPOUSERPASS
expouseremail=settings.EXPOUSER_EMAIL
def reinit_db():
"""Rebuild database from scratch. Deletes the file first if sqlite is used,
otherwise it drops the database and creates it.
@ -75,7 +66,7 @@ def reinit_db():
print(" - deleting " + currentdbname)
os.remove(currentdbname)
except OSError:
print(" ! OSError on removing: " + currentdbname + " (Is the file open in another app? Is the server running?\n")
print(" ! OSError on removing: " + currentdbname + "\n ! Is the file open in another app? Is the server running?\n")
raise
else:
print(" - No database file found: " + currentdbname + " ..continuing, will create it.\n")
@ -86,7 +77,6 @@ def reinit_db():
cursor.execute("ALTER DATABASE %s CHARACTER SET=utf8" % currentdbname)
cursor.execute("USE %s" % currentdbname)
#Sync user - needed after reload
print(" - Migrating: " + django.db.connections.databases['default']['NAME'])
management.call_command('makemigrations','core', interactive=False)
@ -123,44 +113,6 @@ def memdumpsql(fn):
f.write('%s\n' % line.encode("utf8"))
return True
def store_dbsettings():
global dbengine
global dbname
global dbdefault
dbengine = settings.DATABASES['default']['ENGINE']
dbname = settings.DATABASES['default']['NAME']
dbdefault = settings.DATABASES['default']
def restore_dbsettings():
settings.DATABASES['default'] = dbdefault
settings.DATABASES['default']['ENGINE'] = dbengine
settings.DATABASES['default']['NAME'] = dbname
django.db.connections.databases['default'] = dbdefault
django.db.connections.databases['default']['ENGINE'] = dbengine
django.db.connections.databases['default']['NAME'] = dbname
def set_in_memory_dbsettings():
django.db.close_old_connections() # needed if MySQL running?
for conn in django.db.connections.all():
print(" ! Closing another connection to db in set_in_memory_dbsettings")
conn.close()
settings.DATABASES['default'] = {'ENGINE': 'django.db.backends.sqlite3',
'AUTOCOMMIT': True,
'ATOMIC_REQUESTS': False,
'NAME': ':memory:',
'CONN_MAX_AGE': 0,
'TIME_ZONE': 'UTC',
'OPTIONS': {},
'HOST': '',
'USER': '',
'TEST': {'COLLATION': None, 'CHARSET': None, 'NAME': None, 'MIRROR': None},
'PASSWORD': '',
'PORT': ''}
settings.DATABASES['default']['ENGINE'] = 'django.db.backends.sqlite3'
settings.DATABASES['default']['NAME'] = ':memory:'
django.db.connections.databases['default']['ENGINE'] = 'django.db.backends.sqlite3'
django.db.connections.databases['default']['NAME'] = ':memory:'
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# These functions moved to a different file - not used currently.
#import logbooksdump
@ -259,83 +211,16 @@ class JobQueue():
self.results[j].append(None) # append a placeholder
def run_now_django_tests(self,n):
store_dbsettings()
# this leaves the db set to :memory: whatever it was initially
management.call_command('test', verbosity=n)
django.db.close_old_connections()
restore_dbsettings()
# and whatever I do, it stays that way !
def skip_memory_phase(self):
if not self.runlabel:
return True
else:
if self.runlabel == "" or self.runlabel[0:2] == "F-":
return True
return False
def run(self):
"""First runs all the jobs in the queue against a scratch in-memory db
then re-runs the import against the db specified in settings.py
Default behaviour is to skip the in-memory phase.
When MySQL is the db the in-memory phase crashes as MySQL does not properly
relinquish some kind of db connection (not fixed yet)
"""
self.loadprofiles()
store_dbsettings()
print("-- start ", django.db.connections.databases['default']['ENGINE'], django.db.connections.databases['default']['NAME'])
if dbname ==":memory:":
# just run, and save the sql file
self.runqonce()
memdumpsql('memdump.sql') # saved contents of scratch db, could be imported later..
self.saveprofiles()
elif self.skip_memory_phase():
self.runqonce()
self.saveprofiles()
else:
# run all the imports through :memory: first
set_in_memory_dbsettings()
print("-- phase 1 ", settings.DATABASES['default']['ENGINE'], settings.DATABASES['default']['NAME'])
# the jobqueue may not start from scratch so we need to initialise the db properly first
# because we are using an empty :memory: database
# But initiating twice crashes it; so be sure to do it once only.
# Damn. migrate() is still calling MySQL somehow **conn_params not sqlite3. So crashes on expo server.
if ("reinit",reinit_db) not in self.queue:
reinit_db()
if ("caves",import_caves) not in self.queue:
import_caves() # sometime extract the initialising code from this and put in reinit...
if ("people",import_people) not in self.queue:
import_people() # sometime extract the initialising code from this and put in reinit...
django.db.close_old_connections() # maybe not needed here
self.runqonce()
memdumpsql('memdump2.sql')
self.showprofile()
# restore the original db and import again
# if we wanted to, we could re-import the SQL generated in the first pass to be
# blazing fast. But for the present just re-import the lot.
restore_dbsettings()
print("-- phase 2 ", settings.DATABASES['default']['ENGINE'], settings.DATABASES['default']['NAME'])
print(django.db.connections.databases['default']['NAME'])
django.db.close_old_connections() # maybe not needed here
for j in self.results_order:
self.results[j].pop() # throw away results from :memory: run
self.append_placeholders()
django.db.close_old_connections()
#django.setup() # should this be needed?
self.runqonce()
self.saveprofiles()
self.runqonce()
if settings.DATABASES['default']['NAME'] ==":memory:":
memdumpsql('memdump.sql') # saved contents of in-memory db, could be imported later..
self.saveprofiles()
return True
def showprofile(self):
@ -407,15 +292,8 @@ def usage():
and [runlabel] is an optional string identifying this run of the script
in the stored profiling data 'import-profile.json'
if [runlabel] is absent or begins with "F-" then it will skip the :memory: pass
caves and logbooks must be run on an empty db before the others as they
set up db tables used by the others.
the commands are first run on an in-memory empty database before being run on
the actual persistent database. This is very fast and checks for import errors.
the initial in-memory phase is on an empty db, so always runs caves & people for this phase
""")
if __name__ == "__main__":
@ -479,11 +357,5 @@ if __name__ == "__main__":
print("%s not recognised as a command." % sys.argv[1])
exit()
store_dbsettings()
#set_in_memory_dbsettings()
#set_in_memory_dbsettings() # seems to be ignored. Appears to be set but in reality.
#jq.run_now_django_tests(1) # actually does set db to :memory: - but invisibly !
jq.run()
jq.showprofile()

View File

@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.29 on 2020-06-20 16:03
# Generated by Django 1.11.29 on 2020-06-20 17:43
from __future__ import unicode_literals
from django.db import migrations, models

View File

@ -4,9 +4,11 @@
6 ./core/views.py
6 ./manage.py
8 ./core/templatetags/csrffaker.py
10 ./flatpages/models.py
10 ./helper/__init__.py
10 ./wsgi.py
26 ./core/management/commands/reset_db.py
26 ./export/toqms.py
26 ./flatpages/migrations/0001_initial.py
33 ./localsettingswindows.py
37 ./localsettingsubuntu.py
38 ./profiles/urls.py
@ -14,6 +16,7 @@
39 ./localsettings-expo-live.py
39 ./localsettingsdocker.py
39 ./localsettingsserver.py
40 ./parsers/imports.py
41 ./localsettingspotatohut.py
41 ./middleware.py
44 ./dump.py
@ -22,6 +25,7 @@
49 ./parsers/subcaves.py
63 ./logbooksdump.py
69 ./core/TESTS/tests.py
70 ./urls.py
73 ./localsettings.py
73 ./localsettingsWSL.py
81 ./settings.py
@ -33,7 +37,9 @@
103 ./core/view_surveys.py
124 ./core/templatetags/wiki_markup.py
135 ./utils.py
150 ./parsers/surveys.py
156 ./flatpages/views.py
160 ./core/models_survex.py
164 ./modelviz.py
167 ./core/models.py
175 ./core/views_other.py
@ -41,10 +47,9 @@
217 ./core/views_logbooks.py
256 ./core/views_survex.py
276 ./profiles/views.py
359 ./databaseReset.py
382 ./troggle-inspectdb.py
280 ./databaseReset.py
387 ./core/views_caves.py
431 ./parsers/survex.py
462 ./core/migrations/0001_initial.py
450 ./core/models_caves.py
515 ./parsers/logbooks.py
5999
5950

View File

@ -7,9 +7,9 @@ python manage.py inspectdb > troggle-inspectdb.py
echo ""
# count non-blank lines of python and template HTML code
# includes all variants of settings.py files
find . -name \*.html -print0 | xargs -0 egrep -vc "#|^\s*$" | grep -v "0$" | awk -F ":" '{ sum +=$2; print $2, $1; } END {print sum}'| sort -n > lines-of-templates.txt
find . -name \*.html -print0 | xargs -0 egrep -vc "#|^\s*$" | grep -v ":0$" | awk -F ":" '{ sum +=$2; print $2, $1; } END {print sum}'| sort -n > lines-of-templates.txt
find . -name \*.py -print0 | xargs -0 egrep -vc "#|^\s*$" | grep -v "0$" | awk -F ":" '{ sum +=$2; print $2, $1; } END {print sum}'| sort -n > lines-of-python.txt
find . -name \*.py -print0 | xargs -0 egrep -vc "#|^\s*$" | grep -v ":0$" | grep -v "/migrations/" |grep -v "troggle-inspectdb.py"| awk -F ":" '{ sum +=$2; print $2, $1; } END {print sum}'| sort -n > lines-of-python.txt
echo `tail -1 lines-of-python.txt` non-comment lines of python.
# This deletes the database so must run after generating troggle-inspectdb.py