mirror of
https://expo.survex.com/repositories/troggle/.git
synced 2025-01-19 09:22:32 +00:00
New cacheing for parsed logbooks. All logbooks load in 75 seconds now.
This commit is contained in:
parent
98fd314a62
commit
ac9f3cf061
@ -127,7 +127,7 @@ def import_auto_logbooks():
|
||||
print(os.path.join(root, filename))
|
||||
parsers.logbooks.parseAutoLogBookEntry(os.path.join(root, filename))
|
||||
|
||||
#Temporary function until definative source of data transfered.
|
||||
#Temporary function until definitive source of data transfered.
|
||||
from django.template.defaultfilters import slugify
|
||||
from django.template import Context, loader
|
||||
def dumplogbooks():
|
||||
@ -177,16 +177,16 @@ def usage():
|
||||
caves - read in the caves
|
||||
folklog - read in the people (folk) and then the logbooks
|
||||
logbooks - read in just the logbooks
|
||||
autologbooks - read in autologbooks
|
||||
autologbooks - read in autologbooks (what are these?)
|
||||
dumplogbooks - write out autologbooks (not working?)
|
||||
people - read in the people from folk.csv
|
||||
QMs - read in the QM files
|
||||
resetend
|
||||
scans - NOT the scanned surveynotes ?!
|
||||
survex - read in the survex files
|
||||
survexpos
|
||||
survex - read in the survex files - all the survex blocks
|
||||
survexpos - just the Pos out of the survex files
|
||||
surveys - read in the scanned surveynotes
|
||||
tunnel - read in the Tunnel files
|
||||
tunnel - read in the Tunnel files - which scans the surveyscans too
|
||||
""")
|
||||
|
||||
if __name__ == "__main__":
|
||||
@ -214,10 +214,7 @@ if __name__ == "__main__":
|
||||
elif "resetend" in sys.argv:
|
||||
#import_logbooks()
|
||||
import_QMs()
|
||||
try:
|
||||
import_tunnelfiles()
|
||||
except:
|
||||
print("Tunnel files parser broken.")
|
||||
import_tunnelfiles()
|
||||
import_surveys()
|
||||
import_descriptions()
|
||||
parse_descriptions()
|
||||
|
@ -12,8 +12,9 @@ from django.utils.timezone import make_aware
|
||||
|
||||
import csv
|
||||
import re
|
||||
import datetime
|
||||
import datetime, time
|
||||
import os
|
||||
import pickle
|
||||
|
||||
from utils import save_carefully
|
||||
|
||||
@ -78,10 +79,17 @@ def GetTripCave(place): #need to be fuzzier about matching here. Already a very
|
||||
print("No cave found for place " , place)
|
||||
return
|
||||
|
||||
|
||||
logentries = [] # the entire logbook is a single object: a list of entries
|
||||
noncaveplaces = [ "Journey", "Loser Plateau" ]
|
||||
|
||||
def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, entry_type="wiki"):
|
||||
""" saves a logbook entry and related persontrips """
|
||||
global logentries
|
||||
|
||||
entrytuple = (date, place, title, text,
|
||||
trippeople, expedition, logtime_underground, entry_type)
|
||||
logentries.append(entrytuple)
|
||||
|
||||
trippersons, author = GetTripPersons(trippeople, expedition, logtime_underground)
|
||||
if not author:
|
||||
print(" - Skipping logentry: " + title + " - no author for entry")
|
||||
@ -100,12 +108,14 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
|
||||
lookupAttribs={'date':date, 'title':title}
|
||||
nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave':cave, 'slug':slugify(title)[:50], 'entry_type':entry_type}
|
||||
lbo, created=save_carefully(models.LogbookEntry, lookupAttribs, nonLookupAttribs)
|
||||
#logentries.append(models.LogbookEntry)
|
||||
|
||||
|
||||
for tripperson, time_underground in trippersons:
|
||||
lookupAttribs={'personexpedition':tripperson, 'logbook_entry':lbo}
|
||||
nonLookupAttribs={'time_underground':time_underground, 'is_logbook_entry_author':(tripperson == author)}
|
||||
#print nonLookupAttribs
|
||||
save_carefully(models.PersonTrip, lookupAttribs, nonLookupAttribs)
|
||||
#logentries.append(models.PersonTrip)
|
||||
|
||||
|
||||
def ParseDate(tripdate, year):
|
||||
@ -189,7 +199,7 @@ def Parseloghtmltxt(year, expedition, txt):
|
||||
trippeople=trippeople, expedition=expedition, logtime_underground=0,
|
||||
entry_type="html")
|
||||
if logbook_entry_count == 0:
|
||||
print(" - No trip entrys found in logbook, check the syntax matches htmltxt format")
|
||||
print(" - No trip entries found in logbook, check the syntax matches htmltxt format")
|
||||
|
||||
|
||||
# main parser for 1991 - 2001. simpler because the data has been hacked so much to fit it
|
||||
@ -293,40 +303,87 @@ def SetDatesFromLogbookEntries(expedition):
|
||||
|
||||
def LoadLogbookForExpedition(expedition):
|
||||
""" Parses all logbook entries for one expedition """
|
||||
|
||||
|
||||
global logentries
|
||||
|
||||
expowebbase = os.path.join(settings.EXPOWEB, "years")
|
||||
yearlinks = settings.LOGBOOK_PARSER_SETTINGS
|
||||
|
||||
logbook_parseable = False
|
||||
|
||||
logbook_cached = False
|
||||
|
||||
if expedition.year in yearlinks:
|
||||
# print " - Valid logbook year: ", expedition.year
|
||||
year_settings = yearlinks[expedition.year]
|
||||
file_in = open(os.path.join(expowebbase, year_settings[0]))
|
||||
txt = file_in.read().decode("latin1")
|
||||
file_in.close()
|
||||
parsefunc = year_settings[1]
|
||||
logbook_parseable = True
|
||||
print(" - Parsing logbook: " + year_settings[0] + "\n - Using parser: " + year_settings[1])
|
||||
else:
|
||||
try:
|
||||
file_in = open(os.path.join(expowebbase, expedition.year, settings.DEFAULT_LOGBOOK_FILE))
|
||||
bad_cache = False
|
||||
cache_filename = os.path.join(expowebbase, year_settings[0])+".cache"
|
||||
now = time.time()
|
||||
cache_t = os.path.getmtime(cache_filename)
|
||||
file_t = os.path.getmtime(os.path.join(expowebbase, year_settings[0]))
|
||||
if file_t - cache_t > 2: # at least 2 secs later
|
||||
#print " - Cache is stale."
|
||||
bad_cache= True
|
||||
if now - cache_t > 30*24*60*60:
|
||||
#print " - Cache is more than 30 days old."
|
||||
bad_cache= True
|
||||
if bad_cache:
|
||||
print " - Cache is either stale or more than 30 days old. Deleting it."
|
||||
os.remove(cache_filename)
|
||||
logentries=[]
|
||||
raise
|
||||
print(" - Reading cache: " + cache_filename )
|
||||
try:
|
||||
with open(cache_filename, "rb") as f:
|
||||
logentries = pickle.load(f)
|
||||
print " - Loaded ", len(logentries), " objects"
|
||||
logbook_cached = True
|
||||
except:
|
||||
print " - Failed to load corrupt cache. Deleting it.\n"
|
||||
os.remove(cache_filename)
|
||||
logentries=[]
|
||||
except:
|
||||
print(" - Opening logbook: ")
|
||||
file_in = open(os.path.join(expowebbase, year_settings[0]))
|
||||
txt = file_in.read().decode("latin1")
|
||||
file_in.close()
|
||||
parsefunc = year_settings[1]
|
||||
logbook_parseable = True
|
||||
print("No set parser found using default")
|
||||
parsefunc = settings.DEFAULT_LOGBOOK_PARSER
|
||||
except (IOError):
|
||||
logbook_parseable = False
|
||||
print("Couldn't open default logbook file and nothing in settings for expo " + expedition.year)
|
||||
print(" - Parsing logbook: " + year_settings[0] + "\n - Using parser: " + year_settings[1])
|
||||
else:
|
||||
try:
|
||||
file_in = open(os.path.join(expowebbase, expedition.year, settings.DEFAULT_LOGBOOK_FILE))
|
||||
txt = file_in.read().decode("latin1")
|
||||
file_in.close()
|
||||
logbook_parseable = True
|
||||
print("No set parser found using default")
|
||||
parsefunc = settings.DEFAULT_LOGBOOK_PARSER
|
||||
except (IOError):
|
||||
logbook_parseable = False
|
||||
print("Couldn't open default logbook file and nothing in settings for expo " + expedition.year)
|
||||
|
||||
if logbook_parseable:
|
||||
parser = globals()[parsefunc]
|
||||
parser(expedition.year, expedition, txt)
|
||||
SetDatesFromLogbookEntries(expedition)
|
||||
# and this has also stored all the objects in logentries[]
|
||||
print " - Storing " , len(logentries), " log entries"
|
||||
with open(cache_filename, "wb") as f:
|
||||
pickle.dump(logentries, f, 2)
|
||||
logentries=[] # flush for next year
|
||||
|
||||
if logbook_cached:
|
||||
i=0
|
||||
for entrytuple in range(len(logentries)):
|
||||
date, place, title, text, trippeople, expedition, logtime_underground, \
|
||||
entry_type = logentries[i]
|
||||
#print " - - obj ", i, date, title
|
||||
EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground,\
|
||||
entry_type)
|
||||
i +=1
|
||||
|
||||
#return "TOLOAD: " + year + " " + str(expedition.personexpedition_set.all()[1].logbookentry_set.count()) + " " + str(models.PersonTrip.objects.filter(personexpedition__expedition=expedition).count())
|
||||
|
||||
|
||||
def LoadLogbooks():
|
||||
""" This is the master function for parsing all logbooks into the Troggle database. """
|
||||
|
||||
@ -372,7 +429,7 @@ def parseAutoLogBookEntry(filename):
|
||||
except models.Expedition.DoesNotExist:
|
||||
errors.append("Expedition not in database")
|
||||
else:
|
||||
errors.append("Expediton Year could not be parsed")
|
||||
errors.append("Expedition Year could not be parsed")
|
||||
|
||||
titleMatch = titleRegex.search(contents)
|
||||
if titleMatch:
|
||||
|
Loading…
Reference in New Issue
Block a user