diff --git a/databaseReset.py b/databaseReset.py index a4687cd..dadb2dc 100644 --- a/databaseReset.py +++ b/databaseReset.py @@ -18,7 +18,10 @@ expouser=settings.EXPOUSER expouserpass=settings.EXPOUSERPASS expouseremail=settings.EXPOUSER_EMAIL -def reload_db(): +def reinit_db(): + """Rebuild database from scratch. Deletes the file first if sqlite is used, + otherwise it drops the database and creates it. + """ if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.sqlite3': try: os.remove(databasename) @@ -30,26 +33,27 @@ def reload_db(): cursor.execute("CREATE DATABASE %s" % databasename) cursor.execute("ALTER DATABASE %s CHARACTER SET=utf8" % databasename) cursor.execute("USE %s" % databasename) - management.call_command('syncdb', interactive=False) - user = User.objects.create_user(expouser, expouseremail, expouserpass) - user.is_staff = True - user.is_superuser = True - user.save() + syncuser() def syncuser(): - """Sync user - needed after reload""" + """Sync user - needed after reload + """ + print("Synchronizing user") management.call_command('syncdb', interactive=False) user = User.objects.create_user(expouser, expouseremail, expouserpass) user.is_staff = True user.is_superuser = True user.save() - -def make_dirs(): - """Make directories that troggle requires""" +def dirsredirect(): + """Make directories that troggle requires and sets up page redirects + """ #should also deal with permissions here. if not os.path.isdir(settings.PHOTOS_ROOT): os.mkdir(settings.PHOTOS_ROOT) + for oldURL, newURL in [("indxal.htm", reverse("caveindex"))]: + f = troggle.flatpages.models.Redirect(originalURL = oldURL, newURL = newURL) + f.save() def import_caves(): import parsers.caves @@ -58,38 +62,49 @@ def import_caves(): def import_people(): import parsers.people + print("Importing People (folk.csv)") parsers.people.LoadPersonsExpos() def import_logbooks(): import parsers.logbooks + print("Importing Logbooks") parsers.logbooks.LoadLogbooks() -def import_survex(): - import parsers.survex - parsers.survex.LoadAllSurvexBlocks() - parsers.survex.LoadPos() - def import_QMs(): + print("Importing QMs (old caves)") import parsers.QMs # import process itself runs on qm.csv in only 3 caves, not 264! +def import_survex(): + import parsers.survex + print("Importing Survex Blocks") + parsers.survex.LoadAllSurvexBlocks() + print("Importing Survex Positions") + parsers.survex.LoadPos() + +def import_survexpos(): + import parsers.survex + print("Importing Survex Positions") + parsers.survex.LoadPos() + def import_surveys(): + """This appears to store data in unused objects. The code is kept + for future re-working to manage progress against notes, plans and elevs. + """ import parsers.surveys + print("Importing surveys") parsers.surveys.parseSurveys(logfile=settings.LOGFILE) def import_surveyscans(): import parsers.surveys + print("Importing Survey Scans") parsers.surveys.LoadListScans() def import_tunnelfiles(): import parsers.surveys + print("Importing Tunnel files") parsers.surveys.LoadTunnelFiles() -def pageredirects(): - for oldURL, newURL in [("indxal.htm", reverse("caveindex"))]: - f = troggle.flatpages.models.Redirect(originalURL = oldURL, newURL = newURL) - f.save() - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def import_auto_logbooks(): import parsers.logbooks @@ -154,9 +169,9 @@ class JobQueue(): self.queue = [] # tuples of (jobname, jobfunction) self.results = {} self.results_order=[ - "date","runlabel","reload", "caves", "people", + "date","runlabel","reinit", "caves", "people", "logbooks", "scans", "QMs", "survex", - "tunnel", "surveys", "test", "makedirs", "redirect", "syncuser" ] + "tunnel", "surveys", "test", "dirsredirect", "syncuser", "survexpos" ] for k in self.results_order: self.results[k]=[] self.tfile = "import_profile.json" @@ -191,12 +206,12 @@ class JobQueue(): self.results["runlabel"].append(self.runlabel) for i in self.queue: - start = time.time() - i[1]() # looks ugly but invokes function passed in the second item in the tuple - duration = time.time()-start - print "\n*- Ended \"", i[0], "\" %.1f seconds" % duration - self.results[i[0]].append(duration) - + start = time.time() + i[1]() # looks ugly but invokes function passed in the second item in the tuple + duration = time.time()-start + print "\n*- Ended \"", i[0], "\" %.1f seconds" % duration + self.results[i[0]].append(duration) + with open(self.tfile, 'w') as f: json.dump(self.results, f) @@ -207,7 +222,7 @@ class JobQueue(): # currently uses django db whatever it was. CHANGE this to explicitly use # a new sqlite3 db and then import the sql dump of that into the troggle db # instead of loading directly into the troggle sqlite db. - # in-menmor ":memory:" sqlite is ~ 7x faster and all of troggle can be + # in-memory ":memory:" sqlite is ~ 7x faster and all of troggle can be # loaded in 6 minutes that way djconn = django.db.connection from dump import _iterdump @@ -221,52 +236,76 @@ class JobQueue(): def showprofile(self): """Prints out the time it took to run the jobqueue""" for k in self.results_order: - percen=0 - lst = self.results[k] - if k == "runlabel": - r = lst[len(lst)-1] - print '%15s %s' % (k,r) + if k =="dirsredirect": + break + elif k =="syncuser": + break + elif k =="test": + break elif k =="date": - # Calculate dates as days before present to one decimal place - r = lst[len(lst)-1] - if len(lst)>2: - days = (lst[len(lst)-2]-r)/(24*60*60) - print '%15s %8.1f days ago' % (k,days) - elif len(lst)>2: - e = len(lst)-1 - percen = 100* (lst[e] - lst[e-1])/lst[e-1] - if abs(percen) >0.1: - print '%15s %8.1f%%' % (k, percen) - else: - print '%15s ' % (k) + print " days ago ", + else: + print '%9s (s)' % k, + percen=0 + r = self.results[k] + #print "min=",min + + for i in range(len(r)): + if k == "runlabel": + if r[i]: + rp = r[i] + else: + rp = " - " + print '%8s' % rp, + elif k =="date": + # Calculate dates as days before present + if r[i]: + if i == len(r)-1: + print " this", + else: + # prints one place to the left of where you expect + days = (r[i]-r[len(r)-1])/(24*60*60) + print '%8.2f' % days, + elif r[i]: + print '%8.1f' % r[i], + if i == len(r)-1 and r[i-1]: + percen = 100* (r[i] - r[i-1])/r[i-1] + if abs(percen) >0.1: + print '%8.1f%%' % percen, + else: + print " - ", + print "" return True def usage(): print("""Usage is 'python databaseReset.py [runlabel]' where command is: - reset - this is normal usage, clear database and reread everything from files - time-consuming + reset - normal usage: clear database and reread everything from files - time-consuming caves - read in the caves - logbooks - read in just the logbooks + logbooks - read in the logbooks people - read in the people from folk.csv - QMs - read in the QM csv files - reload_db - clear database (delete everything) and make empty tables - scans - NOT the scanned surveynotes ?! + QMs - read in the QM csv files (older caves only) + reinit - clear database (delete everything) and make empty tables. Import nothing. + scans - the survey scans in all the wallets survex - read in the survex files - all the survex blocks - surveys - read in the scanned surveynotes - tunnel - read in the Tunnel files - which scans the surveyscans too - survexpos - just the Pos out of the survex files (not part of reset) + tunnel - read in the Tunnel files - which scans the survey scans too + resetend - (archaic?) writecaves - *disabled* (archaic?) autologbooks - read in autologbooks (what are these?) dumplogbooks - write out autologbooks (not working?) - syncuser - needed after reloading database rom SQL backup + syncuser - needed after reloading database from SQL backup + surveys - read in scans by expo, must run after "people". Not used. test - testing... and [runlabel] is an optional string identifying this run of the script in the stored profiling data 'import-profile.json' + + caves and logbooks must be run on an empty db before the others as they + set up db tables used by the others. """) if __name__ == "__main__": @@ -275,31 +314,35 @@ if __name__ == "__main__": import django django.setup() - runlabel = sys.argv[len(sys.argv)-1] + if len(sys.argv)>2: + runlabel = sys.argv[len(sys.argv)-1] + else: + runlabel=None + jq = JobQueue(runlabel) - - if "test" in sys.argv: - jq.enq("reload",reload_db) - jq.enq("makedirs",make_dirs) + + if len(sys.argv)==1: + usage() + exit() + elif "test" in sys.argv: + jq.enq("reinit",reinit_db) + jq.enq("dirsredirect",dirsredirect) jq.enq("caves",import_caves) - jq.enq("survex",import_survex) - jq.enq("surveys",import_surveys) - + #jq.enq("people",import_people) + #jq.enq("logbooks",import_logbooks) elif "caves" in sys.argv: jq.enq("caves",import_caves) elif "logbooks" in sys.argv: - # management.call_command('syncdb', interactive=False) # this sets the path so that import settings works in import_survex jq.enq("logbooks",import_logbooks) elif "people" in sys.argv: - jq.enq("logbooks",import_people) + jq.enq("people",import_people) elif "QMs" in sys.argv: jq.enq("QMs",import_QMs) elif "reload_db" in sys.argv: jq.enq("reload",reload_db) elif "reset" in sys.argv: - jq.enq("reload",reload_db) - jq.enq("makedirs",make_dirs) - jq.enq("redirect",pageredirects) + jq.enq("reinit",reinit_db) + jq.enq("dirsredirect",dirsredirect) jq.enq("caves",import_caves) jq.enq("people",import_people) jq.enq("scans",import_surveyscans) @@ -307,16 +350,12 @@ if __name__ == "__main__": jq.enq("QMs",import_QMs) jq.enq("survex",import_survex) jq.enq("tunnel",import_tunnelfiles) - jq.enq("surveys",import_surveys) elif "scans" in sys.argv: jq.enq("scans",import_surveyscans) elif "survex" in sys.argv: - # management.call_command('syncdb', interactive=False) # this sets the path so that import settings works in import_survex jq.enq("survex",import_survex) elif "survexpos" in sys.argv: - # management.call_command('syncdb', interactive=False) # this sets the path so that import settings works in import_survex - import parsers.survex - jq.enq("survexpos",parsers.survex.LoadPos) + jq.enq("survexpos",import_survexpos) elif "surveys" in sys.argv: jq.enq("surveys",import_surveys) elif "tunnel" in sys.argv: @@ -336,8 +375,9 @@ if __name__ == "__main__": elif "dumplogbooks" in sys.argv: dumplogbooks() else: - print("%s not recognised" % sys.argv) usage() + print("%s not recognised as a command." % sys.argv[1]) + exit() jq.run() jq.showprofile() diff --git a/parsers/caves.py b/parsers/caves.py index 606007f..4f65675 100644 --- a/parsers/caves.py +++ b/parsers/caves.py @@ -152,7 +152,7 @@ def readcave(filename): slug = slug, primary = primary) except: - message = "Can't find text (slug): %s, skipping %s" % (slug, context) + message = " ! Can't find text (slug): %s, skipping %s" % (slug, context) models.DataIssue.objects.create(parser='caves', message=message) print(message) @@ -164,7 +164,7 @@ def readcave(filename): entrance = models.Entrance.objects.get(entranceslug__slug = slug) ce = models.CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance) except: - message = "Entrance text (slug) %s missing %s" % (slug, context) + message = " ! Entrance text (slug) %s missing %s" % (slug, context) models.DataIssue.objects.create(parser='caves', message=message) print(message) @@ -172,14 +172,14 @@ def readcave(filename): def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, context = ""): items = re.findall("<%(itemname)s>(.*?)" % {"itemname": itemname}, text, re.S) if len(items) < minItems and printwarnings: - message = "%(count)i %(itemname)s found, at least %(min)i expected" % {"count": len(items), + message = " ! %(count)i %(itemname)s found, at least %(min)i expected" % {"count": len(items), "itemname": itemname, "min": minItems} + context models.DataIssue.objects.create(parser='caves', message=message) print(message) if maxItems is not None and len(items) > maxItems and printwarnings: - message = "%(count)i %(itemname)s found, no more than %(max)i expected" % {"count": len(items), + message = " ! %(count)i %(itemname)s found, no more than %(max)i expected" % {"count": len(items), "itemname": itemname, "max": maxItems} + context models.DataIssue.objects.create(parser='caves', message=message) diff --git a/parsers/people.py b/parsers/people.py index f7e2f50..32ab2c5 100644 --- a/parsers/people.py +++ b/parsers/people.py @@ -58,7 +58,7 @@ def LoadPersonsExpos(): header = dict(zip(headers, range(len(headers)))) # make expeditions - print("Loading expeditions") + print(" - Loading expeditions") years = headers[5:] for year in years: @@ -68,7 +68,7 @@ def LoadPersonsExpos(): save_carefully(models.Expedition, lookupAttribs, nonLookupAttribs) # make persons - print("Loading personexpeditions") + print(" - Loading personexpeditions") for personline in personreader: name = personline[header["Name"]] diff --git a/parsers/surveys.py b/parsers/surveys.py index 9bd063d..450725c 100644 --- a/parsers/surveys.py +++ b/parsers/surveys.py @@ -150,17 +150,19 @@ def parseSurveyScans(expedition, logfile=None): scanObj.save() except (IOError, OSError): yearPath=os.path.join(settings.SURVEY_SCANS, "surveyscans", expedition.year) - print("No folder found for " + expedition.year + " at:- " + yearPath) + print(" ! No folder found for " + expedition.year + " at:- " + yearPath) # dead def parseSurveys(logfile=None): try: readSurveysFromCSV() except (IOError, OSError): - print("Survey CSV not found..") + print(" ! Survey CSV not found..") pass + print " - Loading scans by expedition year" for expedition in Expedition.objects.filter(year__gte=2000): #expos since 2000, because paths and filenames were nonstandard before then + print "%s" % expedition, parseSurveyScans(expedition) # dead @@ -190,7 +192,7 @@ def GetListDir(sdir): def LoadListScansFile(survexscansfolder): gld = [ ] - # flatten out any directories in these book files + # flatten out any directories in these wallet folders - should not be any for (fyf, ffyf, fisdiryf) in GetListDir(survexscansfolder.fpath): if fisdiryf: gld.extend(GetListDir(ffyf)) @@ -199,7 +201,7 @@ def LoadListScansFile(survexscansfolder): for (fyf, ffyf, fisdiryf) in gld: #assert not fisdiryf, ffyf - if re.search(r"\.(?:png|jpg|jpeg)(?i)$", fyf): + if re.search(r"\.(?:png|jpg|jpeg|pdf|jpeg|svg)(?i)$", fyf): survexscansingle = SurvexScanSingle(ffile=ffyf, name=fyf, survexscansfolder=survexscansfolder) survexscansingle.save() @@ -208,7 +210,7 @@ def LoadListScansFile(survexscansfolder): # and builds up the models we can access later def LoadListScans(): - print('Loading Survey Scans...') + print(' - Loading Survey Scans... (deleting all objects first)') SurvexScanSingle.objects.all().delete() SurvexScansFolder.objects.all().delete() @@ -221,12 +223,14 @@ def LoadListScans(): # iterate into the surveyscans directory + print ' - ', for f, ff, fisdir in GetListDir(os.path.join(settings.SURVEY_SCANS, "surveyscans")): if not fisdir: continue # do the year folders if re.match(r"\d\d\d\d$", f): + print "%s" % f, for fy, ffy, fisdiry in GetListDir(ff): if fisdiry: assert fisdiry, ffy