forked from expo/troggle
New cacheing for parsed logbooks. All logbooks load in 75 seconds now.
This commit is contained in:
parent
98fd314a62
commit
ac9f3cf061
@ -127,7 +127,7 @@ def import_auto_logbooks():
|
|||||||
print(os.path.join(root, filename))
|
print(os.path.join(root, filename))
|
||||||
parsers.logbooks.parseAutoLogBookEntry(os.path.join(root, filename))
|
parsers.logbooks.parseAutoLogBookEntry(os.path.join(root, filename))
|
||||||
|
|
||||||
#Temporary function until definative source of data transfered.
|
#Temporary function until definitive source of data transfered.
|
||||||
from django.template.defaultfilters import slugify
|
from django.template.defaultfilters import slugify
|
||||||
from django.template import Context, loader
|
from django.template import Context, loader
|
||||||
def dumplogbooks():
|
def dumplogbooks():
|
||||||
@ -177,16 +177,16 @@ def usage():
|
|||||||
caves - read in the caves
|
caves - read in the caves
|
||||||
folklog - read in the people (folk) and then the logbooks
|
folklog - read in the people (folk) and then the logbooks
|
||||||
logbooks - read in just the logbooks
|
logbooks - read in just the logbooks
|
||||||
autologbooks - read in autologbooks
|
autologbooks - read in autologbooks (what are these?)
|
||||||
dumplogbooks - write out autologbooks (not working?)
|
dumplogbooks - write out autologbooks (not working?)
|
||||||
people - read in the people from folk.csv
|
people - read in the people from folk.csv
|
||||||
QMs - read in the QM files
|
QMs - read in the QM files
|
||||||
resetend
|
resetend
|
||||||
scans - NOT the scanned surveynotes ?!
|
scans - NOT the scanned surveynotes ?!
|
||||||
survex - read in the survex files
|
survex - read in the survex files - all the survex blocks
|
||||||
survexpos
|
survexpos - just the Pos out of the survex files
|
||||||
surveys - read in the scanned surveynotes
|
surveys - read in the scanned surveynotes
|
||||||
tunnel - read in the Tunnel files
|
tunnel - read in the Tunnel files - which scans the surveyscans too
|
||||||
""")
|
""")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
@ -214,10 +214,7 @@ if __name__ == "__main__":
|
|||||||
elif "resetend" in sys.argv:
|
elif "resetend" in sys.argv:
|
||||||
#import_logbooks()
|
#import_logbooks()
|
||||||
import_QMs()
|
import_QMs()
|
||||||
try:
|
import_tunnelfiles()
|
||||||
import_tunnelfiles()
|
|
||||||
except:
|
|
||||||
print("Tunnel files parser broken.")
|
|
||||||
import_surveys()
|
import_surveys()
|
||||||
import_descriptions()
|
import_descriptions()
|
||||||
parse_descriptions()
|
parse_descriptions()
|
||||||
|
@ -12,8 +12,9 @@ from django.utils.timezone import make_aware
|
|||||||
|
|
||||||
import csv
|
import csv
|
||||||
import re
|
import re
|
||||||
import datetime
|
import datetime, time
|
||||||
import os
|
import os
|
||||||
|
import pickle
|
||||||
|
|
||||||
from utils import save_carefully
|
from utils import save_carefully
|
||||||
|
|
||||||
@ -78,10 +79,17 @@ def GetTripCave(place): #need to be fuzzier about matching here. Already a very
|
|||||||
print("No cave found for place " , place)
|
print("No cave found for place " , place)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
logentries = [] # the entire logbook is a single object: a list of entries
|
||||||
noncaveplaces = [ "Journey", "Loser Plateau" ]
|
noncaveplaces = [ "Journey", "Loser Plateau" ]
|
||||||
|
|
||||||
def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, entry_type="wiki"):
|
def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, entry_type="wiki"):
|
||||||
""" saves a logbook entry and related persontrips """
|
""" saves a logbook entry and related persontrips """
|
||||||
|
global logentries
|
||||||
|
|
||||||
|
entrytuple = (date, place, title, text,
|
||||||
|
trippeople, expedition, logtime_underground, entry_type)
|
||||||
|
logentries.append(entrytuple)
|
||||||
|
|
||||||
trippersons, author = GetTripPersons(trippeople, expedition, logtime_underground)
|
trippersons, author = GetTripPersons(trippeople, expedition, logtime_underground)
|
||||||
if not author:
|
if not author:
|
||||||
print(" - Skipping logentry: " + title + " - no author for entry")
|
print(" - Skipping logentry: " + title + " - no author for entry")
|
||||||
@ -100,12 +108,14 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
|
|||||||
lookupAttribs={'date':date, 'title':title}
|
lookupAttribs={'date':date, 'title':title}
|
||||||
nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave':cave, 'slug':slugify(title)[:50], 'entry_type':entry_type}
|
nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave':cave, 'slug':slugify(title)[:50], 'entry_type':entry_type}
|
||||||
lbo, created=save_carefully(models.LogbookEntry, lookupAttribs, nonLookupAttribs)
|
lbo, created=save_carefully(models.LogbookEntry, lookupAttribs, nonLookupAttribs)
|
||||||
|
#logentries.append(models.LogbookEntry)
|
||||||
|
|
||||||
|
|
||||||
for tripperson, time_underground in trippersons:
|
for tripperson, time_underground in trippersons:
|
||||||
lookupAttribs={'personexpedition':tripperson, 'logbook_entry':lbo}
|
lookupAttribs={'personexpedition':tripperson, 'logbook_entry':lbo}
|
||||||
nonLookupAttribs={'time_underground':time_underground, 'is_logbook_entry_author':(tripperson == author)}
|
nonLookupAttribs={'time_underground':time_underground, 'is_logbook_entry_author':(tripperson == author)}
|
||||||
#print nonLookupAttribs
|
|
||||||
save_carefully(models.PersonTrip, lookupAttribs, nonLookupAttribs)
|
save_carefully(models.PersonTrip, lookupAttribs, nonLookupAttribs)
|
||||||
|
#logentries.append(models.PersonTrip)
|
||||||
|
|
||||||
|
|
||||||
def ParseDate(tripdate, year):
|
def ParseDate(tripdate, year):
|
||||||
@ -189,7 +199,7 @@ def Parseloghtmltxt(year, expedition, txt):
|
|||||||
trippeople=trippeople, expedition=expedition, logtime_underground=0,
|
trippeople=trippeople, expedition=expedition, logtime_underground=0,
|
||||||
entry_type="html")
|
entry_type="html")
|
||||||
if logbook_entry_count == 0:
|
if logbook_entry_count == 0:
|
||||||
print(" - No trip entrys found in logbook, check the syntax matches htmltxt format")
|
print(" - No trip entries found in logbook, check the syntax matches htmltxt format")
|
||||||
|
|
||||||
|
|
||||||
# main parser for 1991 - 2001. simpler because the data has been hacked so much to fit it
|
# main parser for 1991 - 2001. simpler because the data has been hacked so much to fit it
|
||||||
@ -294,39 +304,86 @@ def SetDatesFromLogbookEntries(expedition):
|
|||||||
def LoadLogbookForExpedition(expedition):
|
def LoadLogbookForExpedition(expedition):
|
||||||
""" Parses all logbook entries for one expedition """
|
""" Parses all logbook entries for one expedition """
|
||||||
|
|
||||||
|
global logentries
|
||||||
|
|
||||||
expowebbase = os.path.join(settings.EXPOWEB, "years")
|
expowebbase = os.path.join(settings.EXPOWEB, "years")
|
||||||
yearlinks = settings.LOGBOOK_PARSER_SETTINGS
|
yearlinks = settings.LOGBOOK_PARSER_SETTINGS
|
||||||
|
|
||||||
logbook_parseable = False
|
logbook_parseable = False
|
||||||
|
logbook_cached = False
|
||||||
|
|
||||||
if expedition.year in yearlinks:
|
if expedition.year in yearlinks:
|
||||||
|
# print " - Valid logbook year: ", expedition.year
|
||||||
year_settings = yearlinks[expedition.year]
|
year_settings = yearlinks[expedition.year]
|
||||||
file_in = open(os.path.join(expowebbase, year_settings[0]))
|
|
||||||
txt = file_in.read().decode("latin1")
|
|
||||||
file_in.close()
|
|
||||||
parsefunc = year_settings[1]
|
|
||||||
logbook_parseable = True
|
|
||||||
print(" - Parsing logbook: " + year_settings[0] + "\n - Using parser: " + year_settings[1])
|
|
||||||
else:
|
|
||||||
try:
|
try:
|
||||||
file_in = open(os.path.join(expowebbase, expedition.year, settings.DEFAULT_LOGBOOK_FILE))
|
bad_cache = False
|
||||||
|
cache_filename = os.path.join(expowebbase, year_settings[0])+".cache"
|
||||||
|
now = time.time()
|
||||||
|
cache_t = os.path.getmtime(cache_filename)
|
||||||
|
file_t = os.path.getmtime(os.path.join(expowebbase, year_settings[0]))
|
||||||
|
if file_t - cache_t > 2: # at least 2 secs later
|
||||||
|
#print " - Cache is stale."
|
||||||
|
bad_cache= True
|
||||||
|
if now - cache_t > 30*24*60*60:
|
||||||
|
#print " - Cache is more than 30 days old."
|
||||||
|
bad_cache= True
|
||||||
|
if bad_cache:
|
||||||
|
print " - Cache is either stale or more than 30 days old. Deleting it."
|
||||||
|
os.remove(cache_filename)
|
||||||
|
logentries=[]
|
||||||
|
raise
|
||||||
|
print(" - Reading cache: " + cache_filename )
|
||||||
|
try:
|
||||||
|
with open(cache_filename, "rb") as f:
|
||||||
|
logentries = pickle.load(f)
|
||||||
|
print " - Loaded ", len(logentries), " objects"
|
||||||
|
logbook_cached = True
|
||||||
|
except:
|
||||||
|
print " - Failed to load corrupt cache. Deleting it.\n"
|
||||||
|
os.remove(cache_filename)
|
||||||
|
logentries=[]
|
||||||
|
except:
|
||||||
|
print(" - Opening logbook: ")
|
||||||
|
file_in = open(os.path.join(expowebbase, year_settings[0]))
|
||||||
txt = file_in.read().decode("latin1")
|
txt = file_in.read().decode("latin1")
|
||||||
file_in.close()
|
file_in.close()
|
||||||
|
parsefunc = year_settings[1]
|
||||||
logbook_parseable = True
|
logbook_parseable = True
|
||||||
print("No set parser found using default")
|
print(" - Parsing logbook: " + year_settings[0] + "\n - Using parser: " + year_settings[1])
|
||||||
parsefunc = settings.DEFAULT_LOGBOOK_PARSER
|
else:
|
||||||
except (IOError):
|
try:
|
||||||
logbook_parseable = False
|
file_in = open(os.path.join(expowebbase, expedition.year, settings.DEFAULT_LOGBOOK_FILE))
|
||||||
print("Couldn't open default logbook file and nothing in settings for expo " + expedition.year)
|
txt = file_in.read().decode("latin1")
|
||||||
|
file_in.close()
|
||||||
|
logbook_parseable = True
|
||||||
|
print("No set parser found using default")
|
||||||
|
parsefunc = settings.DEFAULT_LOGBOOK_PARSER
|
||||||
|
except (IOError):
|
||||||
|
logbook_parseable = False
|
||||||
|
print("Couldn't open default logbook file and nothing in settings for expo " + expedition.year)
|
||||||
|
|
||||||
if logbook_parseable:
|
if logbook_parseable:
|
||||||
parser = globals()[parsefunc]
|
parser = globals()[parsefunc]
|
||||||
parser(expedition.year, expedition, txt)
|
parser(expedition.year, expedition, txt)
|
||||||
SetDatesFromLogbookEntries(expedition)
|
SetDatesFromLogbookEntries(expedition)
|
||||||
|
# and this has also stored all the objects in logentries[]
|
||||||
|
print " - Storing " , len(logentries), " log entries"
|
||||||
|
with open(cache_filename, "wb") as f:
|
||||||
|
pickle.dump(logentries, f, 2)
|
||||||
|
logentries=[] # flush for next year
|
||||||
|
|
||||||
|
if logbook_cached:
|
||||||
|
i=0
|
||||||
|
for entrytuple in range(len(logentries)):
|
||||||
|
date, place, title, text, trippeople, expedition, logtime_underground, \
|
||||||
|
entry_type = logentries[i]
|
||||||
|
#print " - - obj ", i, date, title
|
||||||
|
EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground,\
|
||||||
|
entry_type)
|
||||||
|
i +=1
|
||||||
|
|
||||||
#return "TOLOAD: " + year + " " + str(expedition.personexpedition_set.all()[1].logbookentry_set.count()) + " " + str(models.PersonTrip.objects.filter(personexpedition__expedition=expedition).count())
|
#return "TOLOAD: " + year + " " + str(expedition.personexpedition_set.all()[1].logbookentry_set.count()) + " " + str(models.PersonTrip.objects.filter(personexpedition__expedition=expedition).count())
|
||||||
|
|
||||||
|
|
||||||
def LoadLogbooks():
|
def LoadLogbooks():
|
||||||
""" This is the master function for parsing all logbooks into the Troggle database. """
|
""" This is the master function for parsing all logbooks into the Troggle database. """
|
||||||
|
|
||||||
@ -372,7 +429,7 @@ def parseAutoLogBookEntry(filename):
|
|||||||
except models.Expedition.DoesNotExist:
|
except models.Expedition.DoesNotExist:
|
||||||
errors.append("Expedition not in database")
|
errors.append("Expedition not in database")
|
||||||
else:
|
else:
|
||||||
errors.append("Expediton Year could not be parsed")
|
errors.append("Expedition Year could not be parsed")
|
||||||
|
|
||||||
titleMatch = titleRegex.search(contents)
|
titleMatch = titleRegex.search(contents)
|
||||||
if titleMatch:
|
if titleMatch:
|
||||||
|
Loading…
Reference in New Issue
Block a user