forked from expo/troggle
Many fixes and speedups
This commit is contained in:
parent
6daa96b69e
commit
0853bbdd19
@ -27,33 +27,36 @@ An idea which no longer seems sensible given that we rely on the database to do
|
||||
# it can be checked up later from the hard-copy if necessary; or it's not possible to determin (name, trip place, etc)
|
||||
'''
|
||||
todo='''
|
||||
- Put the object store 'trips' and the 'logdataissues' into TROG global object
|
||||
|
||||
- Use the .shelve.db cache for all logbooks, not just individually
|
||||
|
||||
- refactor everything with some urgency, esp. LoadLogbookForExpedition()
|
||||
|
||||
-- far too many uses of Django field dereferencing to get values, which is SLOW
|
||||
- profile the code to find bad repetitive things, of which there are many.
|
||||
|
||||
- Loogbooks 1987, 1988, 1989 all crash on MySql - but not sqlite - with db constraint fail. Edit logbook to fix.
|
||||
- far too many uses of Django field dereferencing to get values, which is SLOW
|
||||
|
||||
- Logbooks 1987, 1988, 1989 all crash on MySql - but not sqlite - with db constraint fail. Edit logbook to fix.
|
||||
|
||||
- import/parse/re-export-as-html the 'artisanal-format' old logbooks so that
|
||||
we keep only a modern HTML05 format. Then we can retiure the old parsers and reduce the
|
||||
we keep only a modern HTML05 format. Then we can retire the old parsers and reduce the
|
||||
volume of code here substantially.
|
||||
|
||||
- rewrite to use generators rather than storing everything intermediate in lists - to reduce memory impact.
|
||||
|
||||
- We should ensure logbook.html is utf-8 and stop this crap:
|
||||
file_in = open(logbookfile,'rb')
|
||||
txt = file_in.read().decode("latin1")
|
||||
|
||||
- this is a slow and uncertain function: cave = getCaveByReference(caveRef)
|
||||
|
||||
- the object store will need additional functions to replicate the persontrip calculation
|
||||
and storage. For the moment we leave all that to be done in the django db
|
||||
Concurrent synchronisation would be nice..
|
||||
|
||||
- DB lock currently prevents multiple threads for loading logbooks. But asyncio might work..?
|
||||
|
||||
- We should ensure logbook.html is utf-8 and stop this crap:
|
||||
file_in = open(logbookfile,'rb')
|
||||
txt = file_in.read().decode("latin1")
|
||||
- Put the object store 'trips' and the 'logdataissues' into TROG global object
|
||||
|
||||
- this is a slow and uncertain function: cave = getCaveByReference(caveRef)
|
||||
'''
|
||||
|
||||
logentries = [] # the entire logbook for one year is a single object: a list of entries
|
||||
@ -62,7 +65,7 @@ noncaveplaces = [ "QMplaceholder", "Journey", "Loser Plateau", "UNKNOWN", 'plate
|
||||
logdataissues = TROG['issues']['logdataissues']
|
||||
trips ={}
|
||||
|
||||
entries = { "2022": 42, "2019": 44, "2018": 74, "2017": 60, "2016": 81, "2015": 79,
|
||||
entries = { "2022": 62, "2019": 44, "2018": 74, "2017": 60, "2016": 81, "2015": 79,
|
||||
"2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52,
|
||||
"2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 40, "2002": 31,
|
||||
"2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41,
|
||||
@ -101,7 +104,7 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
|
||||
|
||||
personyear = GetPersonExpeditionNameLookup(expedition).get(tripperson.lower())
|
||||
if not personyear:
|
||||
message = f" ! - {expedition.year} No name match for: '{tripperson}' "
|
||||
message = f" ! - {expedition.year} No name match for: '{tripperson}' in entry {tid=} for this expedition year."
|
||||
print(message)
|
||||
DataIssue.objects.create(parser='logbooks', message=message)
|
||||
logdataissues[tid]=message
|
||||
@ -115,7 +118,7 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
|
||||
|
||||
return res, author
|
||||
|
||||
def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, entry_type="wiki", tid=None):
|
||||
def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, tid=None):
|
||||
""" saves a logbook entry and related persontrips
|
||||
Does NOT save the expeditionday_id - all NULLs. why? Because we are deprecating expeditionday !
|
||||
|
||||
@ -132,7 +135,7 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
|
||||
return
|
||||
|
||||
if not author:
|
||||
message = f" ! - {expedition.year} Warning: logentry: {title} - no author for entry in year "
|
||||
message = f" ! - {expedition.year} Warning: logentry: {title} - no author for entry '{tid}'"
|
||||
DataIssue.objects.create(parser='logbooks', message=message)
|
||||
logdataissues["title"]=message
|
||||
print(message)
|
||||
@ -163,9 +166,9 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
|
||||
slug = tid + "_" + slugify(title)[:10].replace('-','_')
|
||||
else:
|
||||
slug = str(randint(1000,10000)) + "_" + slugify(title)[:10].replace('-','_')
|
||||
nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave_slug':str(cave), 'slug': slug, 'entry_type':entry_type}
|
||||
nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave_slug':str(cave), 'slug': slug}
|
||||
|
||||
# This cretes the lbo instance of LogbookEntry
|
||||
# This creates the lbo instance of LogbookEntry
|
||||
lbo, created=save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs)
|
||||
|
||||
|
||||
@ -253,14 +256,14 @@ def Parselogwikitxt(year, expedition, txt):
|
||||
trippeople, expedition, tu, "wiki", tripid)
|
||||
logentries.append(entrytuple)
|
||||
|
||||
EnterLogIntoDbase(date = ldate, place = tripcave, title = tripplace, text = triptext, trippeople=trippeople,
|
||||
expedition=expedition, logtime_underground=0, tid=tid)
|
||||
# EnterLogIntoDbase(date = ldate, place = tripcave, title = tripplace, text = triptext, trippeople=trippeople,
|
||||
# expedition=expedition, logtime_underground=0, tid=tid)
|
||||
|
||||
EnterLogIntoObjStore(year, ldate, tripcave, tripplace, triptext, trippeople,
|
||||
tu, "wiki", tripid, logbook_entry_count, tid=tid)
|
||||
# EnterLogIntoObjStore(year, ldate, tripcave, tripplace, triptext, trippeople,
|
||||
# tu, "wiki", tripid, logbook_entry_count, tid=tid)
|
||||
|
||||
|
||||
def EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, tu, formattype, tripid1, seq, tid=None):
|
||||
def EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, tu, tripid1, seq, tid=None):
|
||||
'''Called once for each logbook entry as the logbook is parsed
|
||||
'''
|
||||
# This will need additional functions to replicate the persontrip calculation and storage. For the
|
||||
@ -280,7 +283,7 @@ def EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, tu,
|
||||
|
||||
if not tid:
|
||||
tid = set_trip_id(str(date),seq)
|
||||
trips[tid] = (year, date, tripcave, triptitle, text, trippeople, tu, formattype)
|
||||
trips[tid] = (year, date, tripcave, triptitle, text, trippeople, tu)
|
||||
|
||||
## copy a lot of checking functionality here from EnterLogIntoDbase()
|
||||
# GetTripPersons is a db query, so this will need to be put in ObjStore before this will work..
|
||||
@ -353,12 +356,12 @@ def Parseloghtmltxt(year, expedition, txt):
|
||||
trippeople, expedition, tu, "html", tripid1)
|
||||
logentries.append(entrytuple)
|
||||
|
||||
EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext,
|
||||
trippeople=trippeople, expedition=expedition, logtime_underground=0,
|
||||
entry_type="html", tid=tid)
|
||||
# EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext,
|
||||
# trippeople=trippeople, expedition=expedition, logtime_underground=0,
|
||||
# entry_type="html", tid=tid)
|
||||
|
||||
EnterLogIntoObjStore(year, ldate, tripcave, triptitle, ltriptext, trippeople, tu,
|
||||
"html", tripid1, logbook_entry_count, tid=tid)
|
||||
# EnterLogIntoObjStore(year, ldate, tripcave, triptitle, ltriptext, trippeople, tu,
|
||||
# "html", tripid1, logbook_entry_count, tid=tid)
|
||||
|
||||
# main parser for 1991 - 2001. simpler because the data has been hacked so much to fit it
|
||||
# trying it out for years 1982 - 1990 too. Some logbook editing required by hand.. place
|
||||
@ -455,24 +458,24 @@ def Parseloghtml01(year, expedition, txt):
|
||||
entrytuple = (ldate, tripcave, triptitle, ltriptext,
|
||||
trippeople, expedition, tu, "html01", tid)
|
||||
logentries.append(entrytuple)
|
||||
try:
|
||||
EnterLogIntoDbase(date=ldate, place=tripcave, title=triptitle, text=ltriptext,
|
||||
trippeople=trippeople, expedition=expedition, logtime_underground=0,
|
||||
entry_type="html", tid=tid)
|
||||
except:
|
||||
message = " ! - Enter log entry into database FAIL exception in: " + tid
|
||||
DataIssue.objects.create(parser='logbooks', message=message)
|
||||
logdataissues[tid]=message
|
||||
print(message)
|
||||
# try:
|
||||
# EnterLogIntoDbase(date=ldate, place=tripcave, title=triptitle, text=ltriptext,
|
||||
# trippeople=trippeople, expedition=expedition, logtime_underground=0,
|
||||
# entry_type="html", tid=tid)
|
||||
# except:
|
||||
# message = " ! - Enter log entry into database FAIL exception in: " + tid
|
||||
# DataIssue.objects.create(parser='logbooks', message=message)
|
||||
# logdataissues[tid]=message
|
||||
# print(message)
|
||||
|
||||
try:
|
||||
EnterLogIntoObjStore(year, ldate, tripcave, triptitle, ltriptext, trippeople, tu,
|
||||
"html01", tid, logbook_entry_count, tid=tid)
|
||||
except:
|
||||
message = " ! - Enter log entry into ObjectStore FAIL exception in: " + tid
|
||||
DataIssue.objects.create(parser='logbooks', message=message)
|
||||
logdataissues[tid]=message
|
||||
print(message)
|
||||
# try:
|
||||
# EnterLogIntoObjStore(year, ldate, tripcave, triptitle, ltriptext, trippeople, tu,
|
||||
# "html01", tid, logbook_entry_count, tid=tid)
|
||||
# except:
|
||||
# message = " ! - Enter log entry into ObjectStore FAIL exception in: " + tid
|
||||
# DataIssue.objects.create(parser='logbooks', message=message)
|
||||
# logdataissues[tid]=message
|
||||
# print(message)
|
||||
|
||||
except:
|
||||
message = f" ! - Skipping logentry {year} due to exception in: {tid}"
|
||||
@ -514,7 +517,7 @@ def Parseloghtml03(year, expedition, txt):
|
||||
if re.match("T/U|Time underwater", sheader[-1]):
|
||||
tu = sheader.pop()
|
||||
if len(sheader) != 3:
|
||||
print((" ! Header not three pieces", sheader))
|
||||
print(" ! Header not three pieces", sheader)
|
||||
tripdate, triptitle, trippeople = sheader
|
||||
ldate = ParseDate(tripdate.strip(), year)
|
||||
triptitles = triptitle.split(" , ")
|
||||
@ -532,12 +535,12 @@ def Parseloghtml03(year, expedition, txt):
|
||||
trippeople, expedition, tu, "html03", tid)
|
||||
logentries.append(entrytuple)
|
||||
|
||||
EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle,
|
||||
text = ltriptext, trippeople=trippeople, expedition=expedition,
|
||||
logtime_underground=0, entry_type="html", tid=tid)
|
||||
# EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle,
|
||||
# text = ltriptext, trippeople=trippeople, expedition=expedition,
|
||||
# logtime_underground=0, entry_type="html", tid=tid)
|
||||
|
||||
EnterLogIntoObjStore(year, ldate, tripcave, triptitle, ltriptext, trippeople, tu,
|
||||
"html03", tid, logbook_entry_count, tid=tid)
|
||||
# EnterLogIntoObjStore(year, ldate, tripcave, triptitle, ltriptext, trippeople, tu,
|
||||
# "html03", tid, logbook_entry_count, tid=tid)
|
||||
|
||||
|
||||
def SetDatesFromLogbookEntries(expedition):
|
||||
@ -618,7 +621,7 @@ def LoadLogbookForExpedition(expedition):
|
||||
expedition.logbookfile = yearlinks[year][0]
|
||||
parsefunc = yearlinks[year][1]
|
||||
else:
|
||||
logbookpath = os.path.join(expologbase, year, settings.DEFAULT_LOGBOOK_FILE)
|
||||
logbookpath = Path(expologbase) / year / settings.DEFAULT_LOGBOOK_FILE
|
||||
expedition.logbookfile = settings.DEFAULT_LOGBOOK_FILE
|
||||
parsefunc = settings.DEFAULT_LOGBOOK_PARSER
|
||||
cache_filename = Path(str(logbookpath) + ".cache")
|
||||
@ -639,13 +642,13 @@ def LoadLogbookForExpedition(expedition):
|
||||
print(" - ! Cache is > 30 days old")
|
||||
bad_cache= True
|
||||
if bad_cache:
|
||||
print(" - ! Cache is either stale or more than 30 days old. Deleting it.")
|
||||
print(" - so cache is either stale or more than 30 days old. Deleting it.")
|
||||
os.remove(cache_filename)
|
||||
logentries=[]
|
||||
print(" ! Removed stale or corrupt cache file")
|
||||
print(" - Deleted stale or corrupt cache file")
|
||||
raise
|
||||
# print(" - Reading cache: " + str(cache_filename), end='')
|
||||
try:
|
||||
# print(" - Reading cache: " + str(cache_filename), end='')
|
||||
with open(cache_filename, "rb") as f:
|
||||
year,n,logentries = pickle.load(f)
|
||||
if validcache(year,n):
|
||||
@ -660,21 +663,22 @@ def LoadLogbookForExpedition(expedition):
|
||||
logentries=[]
|
||||
raise
|
||||
except :
|
||||
print(" - Cache de-pickle failure \"" + str(cache_filename) +"\"")
|
||||
print(" - Cache old or de-pickle failure \"" + str(cache_filename) +"\"")
|
||||
try:
|
||||
file_in = open(logbookpath,'rb')
|
||||
txt = file_in.read().decode("latin1")
|
||||
txt = file_in.read().decode("utf-8")
|
||||
file_in.close()
|
||||
logbook_parseable = True
|
||||
except (IOError):
|
||||
logbook_parseable = False
|
||||
print((" ! Couldn't open logbook " + logbookpath))
|
||||
print(" ! Couldn't open logbook as UTF-8 " + logbookpath)
|
||||
|
||||
if logbook_parseable:
|
||||
parser = globals()[parsefunc]
|
||||
print(f' - Using parser {parsefunc}')
|
||||
parser(year, expedition, txt) # this launches the right parser for this year
|
||||
|
||||
print(" - Setting dates from logbook entries")
|
||||
SetDatesFromLogbookEntries(expedition)
|
||||
if len(logentries) >0:
|
||||
print(" - Cacheing " , len(logentries), " log entries")
|
||||
@ -686,11 +690,14 @@ def LoadLogbookForExpedition(expedition):
|
||||
|
||||
i=0
|
||||
for entrytuple in logentries:
|
||||
try:
|
||||
date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, tripid1 = entrytuple
|
||||
except ValueError: # cope with removal of entry_type but still in cache files. Remove in Sept. 2022.
|
||||
date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, entry_type, tripid1 = entrytuple
|
||||
EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, 0,
|
||||
entry_type, tripid1)
|
||||
tripid1)
|
||||
EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, logtime_underground,
|
||||
entry_type, tripid1, i)
|
||||
tripid1, i)
|
||||
i +=1
|
||||
SetDatesFromLogbookEntries(expedition)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user