forked from expo/troggle
Thorough spring clean and profiling
This commit is contained in:
parent
2b39dec560
commit
a8460065a4
170
databaseReset.py
170
databaseReset.py
@ -18,7 +18,10 @@ expouser=settings.EXPOUSER
|
||||
expouserpass=settings.EXPOUSERPASS
|
||||
expouseremail=settings.EXPOUSER_EMAIL
|
||||
|
||||
def reload_db():
|
||||
def reinit_db():
|
||||
"""Rebuild database from scratch. Deletes the file first if sqlite is used,
|
||||
otherwise it drops the database and creates it.
|
||||
"""
|
||||
if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.sqlite3':
|
||||
try:
|
||||
os.remove(databasename)
|
||||
@ -30,26 +33,27 @@ def reload_db():
|
||||
cursor.execute("CREATE DATABASE %s" % databasename)
|
||||
cursor.execute("ALTER DATABASE %s CHARACTER SET=utf8" % databasename)
|
||||
cursor.execute("USE %s" % databasename)
|
||||
management.call_command('syncdb', interactive=False)
|
||||
user = User.objects.create_user(expouser, expouseremail, expouserpass)
|
||||
user.is_staff = True
|
||||
user.is_superuser = True
|
||||
user.save()
|
||||
syncuser()
|
||||
|
||||
def syncuser():
|
||||
"""Sync user - needed after reload"""
|
||||
"""Sync user - needed after reload
|
||||
"""
|
||||
print("Synchronizing user")
|
||||
management.call_command('syncdb', interactive=False)
|
||||
user = User.objects.create_user(expouser, expouseremail, expouserpass)
|
||||
user.is_staff = True
|
||||
user.is_superuser = True
|
||||
user.save()
|
||||
|
||||
|
||||
def make_dirs():
|
||||
"""Make directories that troggle requires"""
|
||||
def dirsredirect():
|
||||
"""Make directories that troggle requires and sets up page redirects
|
||||
"""
|
||||
#should also deal with permissions here.
|
||||
if not os.path.isdir(settings.PHOTOS_ROOT):
|
||||
os.mkdir(settings.PHOTOS_ROOT)
|
||||
for oldURL, newURL in [("indxal.htm", reverse("caveindex"))]:
|
||||
f = troggle.flatpages.models.Redirect(originalURL = oldURL, newURL = newURL)
|
||||
f.save()
|
||||
|
||||
def import_caves():
|
||||
import parsers.caves
|
||||
@ -58,38 +62,49 @@ def import_caves():
|
||||
|
||||
def import_people():
|
||||
import parsers.people
|
||||
print("Importing People (folk.csv)")
|
||||
parsers.people.LoadPersonsExpos()
|
||||
|
||||
def import_logbooks():
|
||||
import parsers.logbooks
|
||||
print("Importing Logbooks")
|
||||
parsers.logbooks.LoadLogbooks()
|
||||
|
||||
def import_survex():
|
||||
import parsers.survex
|
||||
parsers.survex.LoadAllSurvexBlocks()
|
||||
parsers.survex.LoadPos()
|
||||
|
||||
def import_QMs():
|
||||
print("Importing QMs (old caves)")
|
||||
import parsers.QMs
|
||||
# import process itself runs on qm.csv in only 3 caves, not 264!
|
||||
|
||||
def import_survex():
|
||||
import parsers.survex
|
||||
print("Importing Survex Blocks")
|
||||
parsers.survex.LoadAllSurvexBlocks()
|
||||
print("Importing Survex Positions")
|
||||
parsers.survex.LoadPos()
|
||||
|
||||
def import_survexpos():
|
||||
import parsers.survex
|
||||
print("Importing Survex Positions")
|
||||
parsers.survex.LoadPos()
|
||||
|
||||
def import_surveys():
|
||||
"""This appears to store data in unused objects. The code is kept
|
||||
for future re-working to manage progress against notes, plans and elevs.
|
||||
"""
|
||||
import parsers.surveys
|
||||
print("Importing surveys")
|
||||
parsers.surveys.parseSurveys(logfile=settings.LOGFILE)
|
||||
|
||||
def import_surveyscans():
|
||||
import parsers.surveys
|
||||
print("Importing Survey Scans")
|
||||
parsers.surveys.LoadListScans()
|
||||
|
||||
def import_tunnelfiles():
|
||||
import parsers.surveys
|
||||
print("Importing Tunnel files")
|
||||
parsers.surveys.LoadTunnelFiles()
|
||||
|
||||
def pageredirects():
|
||||
for oldURL, newURL in [("indxal.htm", reverse("caveindex"))]:
|
||||
f = troggle.flatpages.models.Redirect(originalURL = oldURL, newURL = newURL)
|
||||
f.save()
|
||||
|
||||
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||
def import_auto_logbooks():
|
||||
import parsers.logbooks
|
||||
@ -154,9 +169,9 @@ class JobQueue():
|
||||
self.queue = [] # tuples of (jobname, jobfunction)
|
||||
self.results = {}
|
||||
self.results_order=[
|
||||
"date","runlabel","reload", "caves", "people",
|
||||
"date","runlabel","reinit", "caves", "people",
|
||||
"logbooks", "scans", "QMs", "survex",
|
||||
"tunnel", "surveys", "test", "makedirs", "redirect", "syncuser" ]
|
||||
"tunnel", "surveys", "test", "dirsredirect", "syncuser", "survexpos" ]
|
||||
for k in self.results_order:
|
||||
self.results[k]=[]
|
||||
self.tfile = "import_profile.json"
|
||||
@ -207,7 +222,7 @@ class JobQueue():
|
||||
# currently uses django db whatever it was. CHANGE this to explicitly use
|
||||
# a new sqlite3 db and then import the sql dump of that into the troggle db
|
||||
# instead of loading directly into the troggle sqlite db.
|
||||
# in-menmor ":memory:" sqlite is ~ 7x faster and all of troggle can be
|
||||
# in-memory ":memory:" sqlite is ~ 7x faster and all of troggle can be
|
||||
# loaded in 6 minutes that way
|
||||
djconn = django.db.connection
|
||||
from dump import _iterdump
|
||||
@ -221,52 +236,76 @@ class JobQueue():
|
||||
def showprofile(self):
|
||||
"""Prints out the time it took to run the jobqueue"""
|
||||
for k in self.results_order:
|
||||
percen=0
|
||||
lst = self.results[k]
|
||||
if k == "runlabel":
|
||||
r = lst[len(lst)-1]
|
||||
print '%15s %s' % (k,r)
|
||||
if k =="dirsredirect":
|
||||
break
|
||||
elif k =="syncuser":
|
||||
break
|
||||
elif k =="test":
|
||||
break
|
||||
elif k =="date":
|
||||
# Calculate dates as days before present to one decimal place
|
||||
r = lst[len(lst)-1]
|
||||
if len(lst)>2:
|
||||
days = (lst[len(lst)-2]-r)/(24*60*60)
|
||||
print '%15s %8.1f days ago' % (k,days)
|
||||
elif len(lst)>2:
|
||||
e = len(lst)-1
|
||||
percen = 100* (lst[e] - lst[e-1])/lst[e-1]
|
||||
if abs(percen) >0.1:
|
||||
print '%15s %8.1f%%' % (k, percen)
|
||||
print " days ago ",
|
||||
else:
|
||||
print '%15s ' % (k)
|
||||
print '%9s (s)' % k,
|
||||
percen=0
|
||||
r = self.results[k]
|
||||
#print "min=",min
|
||||
|
||||
for i in range(len(r)):
|
||||
if k == "runlabel":
|
||||
if r[i]:
|
||||
rp = r[i]
|
||||
else:
|
||||
rp = " - "
|
||||
print '%8s' % rp,
|
||||
elif k =="date":
|
||||
# Calculate dates as days before present
|
||||
if r[i]:
|
||||
if i == len(r)-1:
|
||||
print " this",
|
||||
else:
|
||||
# prints one place to the left of where you expect
|
||||
days = (r[i]-r[len(r)-1])/(24*60*60)
|
||||
print '%8.2f' % days,
|
||||
elif r[i]:
|
||||
print '%8.1f' % r[i],
|
||||
if i == len(r)-1 and r[i-1]:
|
||||
percen = 100* (r[i] - r[i-1])/r[i-1]
|
||||
if abs(percen) >0.1:
|
||||
print '%8.1f%%' % percen,
|
||||
else:
|
||||
print " - ",
|
||||
print ""
|
||||
return True
|
||||
|
||||
|
||||
def usage():
|
||||
print("""Usage is 'python databaseReset.py <command> [runlabel]'
|
||||
where command is:
|
||||
reset - this is normal usage, clear database and reread everything from files - time-consuming
|
||||
reset - normal usage: clear database and reread everything from files - time-consuming
|
||||
caves - read in the caves
|
||||
logbooks - read in just the logbooks
|
||||
logbooks - read in the logbooks
|
||||
people - read in the people from folk.csv
|
||||
QMs - read in the QM csv files
|
||||
reload_db - clear database (delete everything) and make empty tables
|
||||
scans - NOT the scanned surveynotes ?!
|
||||
QMs - read in the QM csv files (older caves only)
|
||||
reinit - clear database (delete everything) and make empty tables. Import nothing.
|
||||
scans - the survey scans in all the wallets
|
||||
survex - read in the survex files - all the survex blocks
|
||||
surveys - read in the scanned surveynotes
|
||||
tunnel - read in the Tunnel files - which scans the surveyscans too
|
||||
|
||||
survexpos - just the Pos out of the survex files (not part of reset)
|
||||
|
||||
tunnel - read in the Tunnel files - which scans the survey scans too
|
||||
|
||||
resetend - (archaic?)
|
||||
writecaves - *disabled* (archaic?)
|
||||
autologbooks - read in autologbooks (what are these?)
|
||||
dumplogbooks - write out autologbooks (not working?)
|
||||
syncuser - needed after reloading database rom SQL backup
|
||||
syncuser - needed after reloading database from SQL backup
|
||||
surveys - read in scans by expo, must run after "people". Not used.
|
||||
test - testing...
|
||||
|
||||
and [runlabel] is an optional string identifying this run of the script
|
||||
in the stored profiling data 'import-profile.json'
|
||||
|
||||
caves and logbooks must be run on an empty db before the others as they
|
||||
set up db tables used by the others.
|
||||
""")
|
||||
|
||||
if __name__ == "__main__":
|
||||
@ -275,31 +314,35 @@ if __name__ == "__main__":
|
||||
import django
|
||||
django.setup()
|
||||
|
||||
if len(sys.argv)>2:
|
||||
runlabel = sys.argv[len(sys.argv)-1]
|
||||
else:
|
||||
runlabel=None
|
||||
|
||||
jq = JobQueue(runlabel)
|
||||
|
||||
if "test" in sys.argv:
|
||||
jq.enq("reload",reload_db)
|
||||
jq.enq("makedirs",make_dirs)
|
||||
if len(sys.argv)==1:
|
||||
usage()
|
||||
exit()
|
||||
elif "test" in sys.argv:
|
||||
jq.enq("reinit",reinit_db)
|
||||
jq.enq("dirsredirect",dirsredirect)
|
||||
jq.enq("caves",import_caves)
|
||||
jq.enq("survex",import_survex)
|
||||
jq.enq("surveys",import_surveys)
|
||||
|
||||
#jq.enq("people",import_people)
|
||||
#jq.enq("logbooks",import_logbooks)
|
||||
elif "caves" in sys.argv:
|
||||
jq.enq("caves",import_caves)
|
||||
elif "logbooks" in sys.argv:
|
||||
# management.call_command('syncdb', interactive=False) # this sets the path so that import settings works in import_survex
|
||||
jq.enq("logbooks",import_logbooks)
|
||||
elif "people" in sys.argv:
|
||||
jq.enq("logbooks",import_people)
|
||||
jq.enq("people",import_people)
|
||||
elif "QMs" in sys.argv:
|
||||
jq.enq("QMs",import_QMs)
|
||||
elif "reload_db" in sys.argv:
|
||||
jq.enq("reload",reload_db)
|
||||
elif "reset" in sys.argv:
|
||||
jq.enq("reload",reload_db)
|
||||
jq.enq("makedirs",make_dirs)
|
||||
jq.enq("redirect",pageredirects)
|
||||
jq.enq("reinit",reinit_db)
|
||||
jq.enq("dirsredirect",dirsredirect)
|
||||
jq.enq("caves",import_caves)
|
||||
jq.enq("people",import_people)
|
||||
jq.enq("scans",import_surveyscans)
|
||||
@ -307,16 +350,12 @@ if __name__ == "__main__":
|
||||
jq.enq("QMs",import_QMs)
|
||||
jq.enq("survex",import_survex)
|
||||
jq.enq("tunnel",import_tunnelfiles)
|
||||
jq.enq("surveys",import_surveys)
|
||||
elif "scans" in sys.argv:
|
||||
jq.enq("scans",import_surveyscans)
|
||||
elif "survex" in sys.argv:
|
||||
# management.call_command('syncdb', interactive=False) # this sets the path so that import settings works in import_survex
|
||||
jq.enq("survex",import_survex)
|
||||
elif "survexpos" in sys.argv:
|
||||
# management.call_command('syncdb', interactive=False) # this sets the path so that import settings works in import_survex
|
||||
import parsers.survex
|
||||
jq.enq("survexpos",parsers.survex.LoadPos)
|
||||
jq.enq("survexpos",import_survexpos)
|
||||
elif "surveys" in sys.argv:
|
||||
jq.enq("surveys",import_surveys)
|
||||
elif "tunnel" in sys.argv:
|
||||
@ -336,8 +375,9 @@ if __name__ == "__main__":
|
||||
elif "dumplogbooks" in sys.argv:
|
||||
dumplogbooks()
|
||||
else:
|
||||
print("%s not recognised" % sys.argv)
|
||||
usage()
|
||||
print("%s not recognised as a command." % sys.argv[1])
|
||||
exit()
|
||||
|
||||
jq.run()
|
||||
jq.showprofile()
|
||||
|
@ -152,7 +152,7 @@ def readcave(filename):
|
||||
slug = slug,
|
||||
primary = primary)
|
||||
except:
|
||||
message = "Can't find text (slug): %s, skipping %s" % (slug, context)
|
||||
message = " ! Can't find text (slug): %s, skipping %s" % (slug, context)
|
||||
models.DataIssue.objects.create(parser='caves', message=message)
|
||||
print(message)
|
||||
|
||||
@ -164,7 +164,7 @@ def readcave(filename):
|
||||
entrance = models.Entrance.objects.get(entranceslug__slug = slug)
|
||||
ce = models.CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance)
|
||||
except:
|
||||
message = "Entrance text (slug) %s missing %s" % (slug, context)
|
||||
message = " ! Entrance text (slug) %s missing %s" % (slug, context)
|
||||
models.DataIssue.objects.create(parser='caves', message=message)
|
||||
print(message)
|
||||
|
||||
@ -172,14 +172,14 @@ def readcave(filename):
|
||||
def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, context = ""):
|
||||
items = re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S)
|
||||
if len(items) < minItems and printwarnings:
|
||||
message = "%(count)i %(itemname)s found, at least %(min)i expected" % {"count": len(items),
|
||||
message = " ! %(count)i %(itemname)s found, at least %(min)i expected" % {"count": len(items),
|
||||
"itemname": itemname,
|
||||
"min": minItems} + context
|
||||
models.DataIssue.objects.create(parser='caves', message=message)
|
||||
print(message)
|
||||
|
||||
if maxItems is not None and len(items) > maxItems and printwarnings:
|
||||
message = "%(count)i %(itemname)s found, no more than %(max)i expected" % {"count": len(items),
|
||||
message = " ! %(count)i %(itemname)s found, no more than %(max)i expected" % {"count": len(items),
|
||||
"itemname": itemname,
|
||||
"max": maxItems} + context
|
||||
models.DataIssue.objects.create(parser='caves', message=message)
|
||||
|
@ -58,7 +58,7 @@ def LoadPersonsExpos():
|
||||
header = dict(zip(headers, range(len(headers))))
|
||||
|
||||
# make expeditions
|
||||
print("Loading expeditions")
|
||||
print(" - Loading expeditions")
|
||||
years = headers[5:]
|
||||
|
||||
for year in years:
|
||||
@ -68,7 +68,7 @@ def LoadPersonsExpos():
|
||||
save_carefully(models.Expedition, lookupAttribs, nonLookupAttribs)
|
||||
|
||||
# make persons
|
||||
print("Loading personexpeditions")
|
||||
print(" - Loading personexpeditions")
|
||||
|
||||
for personline in personreader:
|
||||
name = personline[header["Name"]]
|
||||
|
@ -150,17 +150,19 @@ def parseSurveyScans(expedition, logfile=None):
|
||||
scanObj.save()
|
||||
except (IOError, OSError):
|
||||
yearPath=os.path.join(settings.SURVEY_SCANS, "surveyscans", expedition.year)
|
||||
print("No folder found for " + expedition.year + " at:- " + yearPath)
|
||||
print(" ! No folder found for " + expedition.year + " at:- " + yearPath)
|
||||
|
||||
# dead
|
||||
def parseSurveys(logfile=None):
|
||||
try:
|
||||
readSurveysFromCSV()
|
||||
except (IOError, OSError):
|
||||
print("Survey CSV not found..")
|
||||
print(" ! Survey CSV not found..")
|
||||
pass
|
||||
|
||||
print " - Loading scans by expedition year"
|
||||
for expedition in Expedition.objects.filter(year__gte=2000): #expos since 2000, because paths and filenames were nonstandard before then
|
||||
print "%s" % expedition,
|
||||
parseSurveyScans(expedition)
|
||||
|
||||
# dead
|
||||
@ -190,7 +192,7 @@ def GetListDir(sdir):
|
||||
def LoadListScansFile(survexscansfolder):
|
||||
gld = [ ]
|
||||
|
||||
# flatten out any directories in these book files
|
||||
# flatten out any directories in these wallet folders - should not be any
|
||||
for (fyf, ffyf, fisdiryf) in GetListDir(survexscansfolder.fpath):
|
||||
if fisdiryf:
|
||||
gld.extend(GetListDir(ffyf))
|
||||
@ -199,7 +201,7 @@ def LoadListScansFile(survexscansfolder):
|
||||
|
||||
for (fyf, ffyf, fisdiryf) in gld:
|
||||
#assert not fisdiryf, ffyf
|
||||
if re.search(r"\.(?:png|jpg|jpeg)(?i)$", fyf):
|
||||
if re.search(r"\.(?:png|jpg|jpeg|pdf|jpeg|svg)(?i)$", fyf):
|
||||
survexscansingle = SurvexScanSingle(ffile=ffyf, name=fyf, survexscansfolder=survexscansfolder)
|
||||
survexscansingle.save()
|
||||
|
||||
@ -208,7 +210,7 @@ def LoadListScansFile(survexscansfolder):
|
||||
# and builds up the models we can access later
|
||||
def LoadListScans():
|
||||
|
||||
print('Loading Survey Scans...')
|
||||
print(' - Loading Survey Scans... (deleting all objects first)')
|
||||
|
||||
SurvexScanSingle.objects.all().delete()
|
||||
SurvexScansFolder.objects.all().delete()
|
||||
@ -221,12 +223,14 @@ def LoadListScans():
|
||||
|
||||
|
||||
# iterate into the surveyscans directory
|
||||
print ' - ',
|
||||
for f, ff, fisdir in GetListDir(os.path.join(settings.SURVEY_SCANS, "surveyscans")):
|
||||
if not fisdir:
|
||||
continue
|
||||
|
||||
# do the year folders
|
||||
if re.match(r"\d\d\d\d$", f):
|
||||
print "%s" % f,
|
||||
for fy, ffy, fisdiry in GetListDir(ff):
|
||||
if fisdiry:
|
||||
assert fisdiry, ffy
|
||||
|
Loading…
Reference in New Issue
Block a user