forked from expo/troggle
Many fixes and speedups
This commit is contained in:
parent
6daa96b69e
commit
0853bbdd19
@ -27,33 +27,36 @@ An idea which no longer seems sensible given that we rely on the database to do
|
|||||||
# it can be checked up later from the hard-copy if necessary; or it's not possible to determin (name, trip place, etc)
|
# it can be checked up later from the hard-copy if necessary; or it's not possible to determin (name, trip place, etc)
|
||||||
'''
|
'''
|
||||||
todo='''
|
todo='''
|
||||||
- Put the object store 'trips' and the 'logdataissues' into TROG global object
|
|
||||||
|
|
||||||
- Use the .shelve.db cache for all logbooks, not just individually
|
- Use the .shelve.db cache for all logbooks, not just individually
|
||||||
|
|
||||||
- refactor everything with some urgency, esp. LoadLogbookForExpedition()
|
- refactor everything with some urgency, esp. LoadLogbookForExpedition()
|
||||||
|
|
||||||
-- far too many uses of Django field dereferencing to get values, which is SLOW
|
- profile the code to find bad repetitive things, of which there are many.
|
||||||
|
|
||||||
- Loogbooks 1987, 1988, 1989 all crash on MySql - but not sqlite - with db constraint fail. Edit logbook to fix.
|
- far too many uses of Django field dereferencing to get values, which is SLOW
|
||||||
|
|
||||||
|
- Logbooks 1987, 1988, 1989 all crash on MySql - but not sqlite - with db constraint fail. Edit logbook to fix.
|
||||||
|
|
||||||
- import/parse/re-export-as-html the 'artisanal-format' old logbooks so that
|
- import/parse/re-export-as-html the 'artisanal-format' old logbooks so that
|
||||||
we keep only a modern HTML05 format. Then we can retiure the old parsers and reduce the
|
we keep only a modern HTML05 format. Then we can retire the old parsers and reduce the
|
||||||
volume of code here substantially.
|
volume of code here substantially.
|
||||||
|
|
||||||
- rewrite to use generators rather than storing everything intermediate in lists - to reduce memory impact.
|
- rewrite to use generators rather than storing everything intermediate in lists - to reduce memory impact.
|
||||||
|
|
||||||
- the object store will need additional functions to replicate the persontrip calculation
|
|
||||||
and storage. For the moment we leave all that to be done in the django db
|
|
||||||
Concurrent synchronisation would be nice..
|
|
||||||
|
|
||||||
- DB lock currently prevents multiple threads for loading logbooks. But asyncio might work..?
|
|
||||||
|
|
||||||
- We should ensure logbook.html is utf-8 and stop this crap:
|
- We should ensure logbook.html is utf-8 and stop this crap:
|
||||||
file_in = open(logbookfile,'rb')
|
file_in = open(logbookfile,'rb')
|
||||||
txt = file_in.read().decode("latin1")
|
txt = file_in.read().decode("latin1")
|
||||||
|
|
||||||
- this is a slow and uncertain function: cave = getCaveByReference(caveRef)
|
- this is a slow and uncertain function: cave = getCaveByReference(caveRef)
|
||||||
|
|
||||||
|
- the object store will need additional functions to replicate the persontrip calculation
|
||||||
|
and storage. For the moment we leave all that to be done in the django db
|
||||||
|
Concurrent synchronisation would be nice..
|
||||||
|
|
||||||
|
- DB lock currently prevents multiple threads for loading logbooks. But asyncio might work..?
|
||||||
|
|
||||||
|
- Put the object store 'trips' and the 'logdataissues' into TROG global object
|
||||||
|
|
||||||
'''
|
'''
|
||||||
|
|
||||||
logentries = [] # the entire logbook for one year is a single object: a list of entries
|
logentries = [] # the entire logbook for one year is a single object: a list of entries
|
||||||
@ -62,7 +65,7 @@ noncaveplaces = [ "QMplaceholder", "Journey", "Loser Plateau", "UNKNOWN", 'plate
|
|||||||
logdataissues = TROG['issues']['logdataissues']
|
logdataissues = TROG['issues']['logdataissues']
|
||||||
trips ={}
|
trips ={}
|
||||||
|
|
||||||
entries = { "2022": 42, "2019": 44, "2018": 74, "2017": 60, "2016": 81, "2015": 79,
|
entries = { "2022": 62, "2019": 44, "2018": 74, "2017": 60, "2016": 81, "2015": 79,
|
||||||
"2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52,
|
"2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52,
|
||||||
"2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 40, "2002": 31,
|
"2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 40, "2002": 31,
|
||||||
"2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41,
|
"2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41,
|
||||||
@ -101,7 +104,7 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
|
|||||||
|
|
||||||
personyear = GetPersonExpeditionNameLookup(expedition).get(tripperson.lower())
|
personyear = GetPersonExpeditionNameLookup(expedition).get(tripperson.lower())
|
||||||
if not personyear:
|
if not personyear:
|
||||||
message = f" ! - {expedition.year} No name match for: '{tripperson}' "
|
message = f" ! - {expedition.year} No name match for: '{tripperson}' in entry {tid=} for this expedition year."
|
||||||
print(message)
|
print(message)
|
||||||
DataIssue.objects.create(parser='logbooks', message=message)
|
DataIssue.objects.create(parser='logbooks', message=message)
|
||||||
logdataissues[tid]=message
|
logdataissues[tid]=message
|
||||||
@ -115,7 +118,7 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
|
|||||||
|
|
||||||
return res, author
|
return res, author
|
||||||
|
|
||||||
def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, entry_type="wiki", tid=None):
|
def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, tid=None):
|
||||||
""" saves a logbook entry and related persontrips
|
""" saves a logbook entry and related persontrips
|
||||||
Does NOT save the expeditionday_id - all NULLs. why? Because we are deprecating expeditionday !
|
Does NOT save the expeditionday_id - all NULLs. why? Because we are deprecating expeditionday !
|
||||||
|
|
||||||
@ -132,7 +135,7 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
|
|||||||
return
|
return
|
||||||
|
|
||||||
if not author:
|
if not author:
|
||||||
message = f" ! - {expedition.year} Warning: logentry: {title} - no author for entry in year "
|
message = f" ! - {expedition.year} Warning: logentry: {title} - no author for entry '{tid}'"
|
||||||
DataIssue.objects.create(parser='logbooks', message=message)
|
DataIssue.objects.create(parser='logbooks', message=message)
|
||||||
logdataissues["title"]=message
|
logdataissues["title"]=message
|
||||||
print(message)
|
print(message)
|
||||||
@ -163,9 +166,9 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
|
|||||||
slug = tid + "_" + slugify(title)[:10].replace('-','_')
|
slug = tid + "_" + slugify(title)[:10].replace('-','_')
|
||||||
else:
|
else:
|
||||||
slug = str(randint(1000,10000)) + "_" + slugify(title)[:10].replace('-','_')
|
slug = str(randint(1000,10000)) + "_" + slugify(title)[:10].replace('-','_')
|
||||||
nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave_slug':str(cave), 'slug': slug, 'entry_type':entry_type}
|
nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave_slug':str(cave), 'slug': slug}
|
||||||
|
|
||||||
# This cretes the lbo instance of LogbookEntry
|
# This creates the lbo instance of LogbookEntry
|
||||||
lbo, created=save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs)
|
lbo, created=save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs)
|
||||||
|
|
||||||
|
|
||||||
@ -253,14 +256,14 @@ def Parselogwikitxt(year, expedition, txt):
|
|||||||
trippeople, expedition, tu, "wiki", tripid)
|
trippeople, expedition, tu, "wiki", tripid)
|
||||||
logentries.append(entrytuple)
|
logentries.append(entrytuple)
|
||||||
|
|
||||||
EnterLogIntoDbase(date = ldate, place = tripcave, title = tripplace, text = triptext, trippeople=trippeople,
|
# EnterLogIntoDbase(date = ldate, place = tripcave, title = tripplace, text = triptext, trippeople=trippeople,
|
||||||
expedition=expedition, logtime_underground=0, tid=tid)
|
# expedition=expedition, logtime_underground=0, tid=tid)
|
||||||
|
|
||||||
EnterLogIntoObjStore(year, ldate, tripcave, tripplace, triptext, trippeople,
|
# EnterLogIntoObjStore(year, ldate, tripcave, tripplace, triptext, trippeople,
|
||||||
tu, "wiki", tripid, logbook_entry_count, tid=tid)
|
# tu, "wiki", tripid, logbook_entry_count, tid=tid)
|
||||||
|
|
||||||
|
|
||||||
def EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, tu, formattype, tripid1, seq, tid=None):
|
def EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, tu, tripid1, seq, tid=None):
|
||||||
'''Called once for each logbook entry as the logbook is parsed
|
'''Called once for each logbook entry as the logbook is parsed
|
||||||
'''
|
'''
|
||||||
# This will need additional functions to replicate the persontrip calculation and storage. For the
|
# This will need additional functions to replicate the persontrip calculation and storage. For the
|
||||||
@ -280,7 +283,7 @@ def EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, tu,
|
|||||||
|
|
||||||
if not tid:
|
if not tid:
|
||||||
tid = set_trip_id(str(date),seq)
|
tid = set_trip_id(str(date),seq)
|
||||||
trips[tid] = (year, date, tripcave, triptitle, text, trippeople, tu, formattype)
|
trips[tid] = (year, date, tripcave, triptitle, text, trippeople, tu)
|
||||||
|
|
||||||
## copy a lot of checking functionality here from EnterLogIntoDbase()
|
## copy a lot of checking functionality here from EnterLogIntoDbase()
|
||||||
# GetTripPersons is a db query, so this will need to be put in ObjStore before this will work..
|
# GetTripPersons is a db query, so this will need to be put in ObjStore before this will work..
|
||||||
@ -353,12 +356,12 @@ def Parseloghtmltxt(year, expedition, txt):
|
|||||||
trippeople, expedition, tu, "html", tripid1)
|
trippeople, expedition, tu, "html", tripid1)
|
||||||
logentries.append(entrytuple)
|
logentries.append(entrytuple)
|
||||||
|
|
||||||
EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext,
|
# EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext,
|
||||||
trippeople=trippeople, expedition=expedition, logtime_underground=0,
|
# trippeople=trippeople, expedition=expedition, logtime_underground=0,
|
||||||
entry_type="html", tid=tid)
|
# entry_type="html", tid=tid)
|
||||||
|
|
||||||
EnterLogIntoObjStore(year, ldate, tripcave, triptitle, ltriptext, trippeople, tu,
|
# EnterLogIntoObjStore(year, ldate, tripcave, triptitle, ltriptext, trippeople, tu,
|
||||||
"html", tripid1, logbook_entry_count, tid=tid)
|
# "html", tripid1, logbook_entry_count, tid=tid)
|
||||||
|
|
||||||
# main parser for 1991 - 2001. simpler because the data has been hacked so much to fit it
|
# main parser for 1991 - 2001. simpler because the data has been hacked so much to fit it
|
||||||
# trying it out for years 1982 - 1990 too. Some logbook editing required by hand.. place
|
# trying it out for years 1982 - 1990 too. Some logbook editing required by hand.. place
|
||||||
@ -455,24 +458,24 @@ def Parseloghtml01(year, expedition, txt):
|
|||||||
entrytuple = (ldate, tripcave, triptitle, ltriptext,
|
entrytuple = (ldate, tripcave, triptitle, ltriptext,
|
||||||
trippeople, expedition, tu, "html01", tid)
|
trippeople, expedition, tu, "html01", tid)
|
||||||
logentries.append(entrytuple)
|
logentries.append(entrytuple)
|
||||||
try:
|
# try:
|
||||||
EnterLogIntoDbase(date=ldate, place=tripcave, title=triptitle, text=ltriptext,
|
# EnterLogIntoDbase(date=ldate, place=tripcave, title=triptitle, text=ltriptext,
|
||||||
trippeople=trippeople, expedition=expedition, logtime_underground=0,
|
# trippeople=trippeople, expedition=expedition, logtime_underground=0,
|
||||||
entry_type="html", tid=tid)
|
# entry_type="html", tid=tid)
|
||||||
except:
|
# except:
|
||||||
message = " ! - Enter log entry into database FAIL exception in: " + tid
|
# message = " ! - Enter log entry into database FAIL exception in: " + tid
|
||||||
DataIssue.objects.create(parser='logbooks', message=message)
|
# DataIssue.objects.create(parser='logbooks', message=message)
|
||||||
logdataissues[tid]=message
|
# logdataissues[tid]=message
|
||||||
print(message)
|
# print(message)
|
||||||
|
|
||||||
try:
|
# try:
|
||||||
EnterLogIntoObjStore(year, ldate, tripcave, triptitle, ltriptext, trippeople, tu,
|
# EnterLogIntoObjStore(year, ldate, tripcave, triptitle, ltriptext, trippeople, tu,
|
||||||
"html01", tid, logbook_entry_count, tid=tid)
|
# "html01", tid, logbook_entry_count, tid=tid)
|
||||||
except:
|
# except:
|
||||||
message = " ! - Enter log entry into ObjectStore FAIL exception in: " + tid
|
# message = " ! - Enter log entry into ObjectStore FAIL exception in: " + tid
|
||||||
DataIssue.objects.create(parser='logbooks', message=message)
|
# DataIssue.objects.create(parser='logbooks', message=message)
|
||||||
logdataissues[tid]=message
|
# logdataissues[tid]=message
|
||||||
print(message)
|
# print(message)
|
||||||
|
|
||||||
except:
|
except:
|
||||||
message = f" ! - Skipping logentry {year} due to exception in: {tid}"
|
message = f" ! - Skipping logentry {year} due to exception in: {tid}"
|
||||||
@ -514,7 +517,7 @@ def Parseloghtml03(year, expedition, txt):
|
|||||||
if re.match("T/U|Time underwater", sheader[-1]):
|
if re.match("T/U|Time underwater", sheader[-1]):
|
||||||
tu = sheader.pop()
|
tu = sheader.pop()
|
||||||
if len(sheader) != 3:
|
if len(sheader) != 3:
|
||||||
print((" ! Header not three pieces", sheader))
|
print(" ! Header not three pieces", sheader)
|
||||||
tripdate, triptitle, trippeople = sheader
|
tripdate, triptitle, trippeople = sheader
|
||||||
ldate = ParseDate(tripdate.strip(), year)
|
ldate = ParseDate(tripdate.strip(), year)
|
||||||
triptitles = triptitle.split(" , ")
|
triptitles = triptitle.split(" , ")
|
||||||
@ -532,12 +535,12 @@ def Parseloghtml03(year, expedition, txt):
|
|||||||
trippeople, expedition, tu, "html03", tid)
|
trippeople, expedition, tu, "html03", tid)
|
||||||
logentries.append(entrytuple)
|
logentries.append(entrytuple)
|
||||||
|
|
||||||
EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle,
|
# EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle,
|
||||||
text = ltriptext, trippeople=trippeople, expedition=expedition,
|
# text = ltriptext, trippeople=trippeople, expedition=expedition,
|
||||||
logtime_underground=0, entry_type="html", tid=tid)
|
# logtime_underground=0, entry_type="html", tid=tid)
|
||||||
|
|
||||||
EnterLogIntoObjStore(year, ldate, tripcave, triptitle, ltriptext, trippeople, tu,
|
# EnterLogIntoObjStore(year, ldate, tripcave, triptitle, ltriptext, trippeople, tu,
|
||||||
"html03", tid, logbook_entry_count, tid=tid)
|
# "html03", tid, logbook_entry_count, tid=tid)
|
||||||
|
|
||||||
|
|
||||||
def SetDatesFromLogbookEntries(expedition):
|
def SetDatesFromLogbookEntries(expedition):
|
||||||
@ -618,7 +621,7 @@ def LoadLogbookForExpedition(expedition):
|
|||||||
expedition.logbookfile = yearlinks[year][0]
|
expedition.logbookfile = yearlinks[year][0]
|
||||||
parsefunc = yearlinks[year][1]
|
parsefunc = yearlinks[year][1]
|
||||||
else:
|
else:
|
||||||
logbookpath = os.path.join(expologbase, year, settings.DEFAULT_LOGBOOK_FILE)
|
logbookpath = Path(expologbase) / year / settings.DEFAULT_LOGBOOK_FILE
|
||||||
expedition.logbookfile = settings.DEFAULT_LOGBOOK_FILE
|
expedition.logbookfile = settings.DEFAULT_LOGBOOK_FILE
|
||||||
parsefunc = settings.DEFAULT_LOGBOOK_PARSER
|
parsefunc = settings.DEFAULT_LOGBOOK_PARSER
|
||||||
cache_filename = Path(str(logbookpath) + ".cache")
|
cache_filename = Path(str(logbookpath) + ".cache")
|
||||||
@ -639,13 +642,13 @@ def LoadLogbookForExpedition(expedition):
|
|||||||
print(" - ! Cache is > 30 days old")
|
print(" - ! Cache is > 30 days old")
|
||||||
bad_cache= True
|
bad_cache= True
|
||||||
if bad_cache:
|
if bad_cache:
|
||||||
print(" - ! Cache is either stale or more than 30 days old. Deleting it.")
|
print(" - so cache is either stale or more than 30 days old. Deleting it.")
|
||||||
os.remove(cache_filename)
|
os.remove(cache_filename)
|
||||||
logentries=[]
|
logentries=[]
|
||||||
print(" ! Removed stale or corrupt cache file")
|
print(" - Deleted stale or corrupt cache file")
|
||||||
raise
|
raise
|
||||||
# print(" - Reading cache: " + str(cache_filename), end='')
|
|
||||||
try:
|
try:
|
||||||
|
# print(" - Reading cache: " + str(cache_filename), end='')
|
||||||
with open(cache_filename, "rb") as f:
|
with open(cache_filename, "rb") as f:
|
||||||
year,n,logentries = pickle.load(f)
|
year,n,logentries = pickle.load(f)
|
||||||
if validcache(year,n):
|
if validcache(year,n):
|
||||||
@ -660,21 +663,22 @@ def LoadLogbookForExpedition(expedition):
|
|||||||
logentries=[]
|
logentries=[]
|
||||||
raise
|
raise
|
||||||
except :
|
except :
|
||||||
print(" - Cache de-pickle failure \"" + str(cache_filename) +"\"")
|
print(" - Cache old or de-pickle failure \"" + str(cache_filename) +"\"")
|
||||||
try:
|
try:
|
||||||
file_in = open(logbookpath,'rb')
|
file_in = open(logbookpath,'rb')
|
||||||
txt = file_in.read().decode("latin1")
|
txt = file_in.read().decode("utf-8")
|
||||||
file_in.close()
|
file_in.close()
|
||||||
logbook_parseable = True
|
logbook_parseable = True
|
||||||
except (IOError):
|
except (IOError):
|
||||||
logbook_parseable = False
|
logbook_parseable = False
|
||||||
print((" ! Couldn't open logbook " + logbookpath))
|
print(" ! Couldn't open logbook as UTF-8 " + logbookpath)
|
||||||
|
|
||||||
if logbook_parseable:
|
if logbook_parseable:
|
||||||
parser = globals()[parsefunc]
|
parser = globals()[parsefunc]
|
||||||
print(f' - Using parser {parsefunc}')
|
print(f' - Using parser {parsefunc}')
|
||||||
parser(year, expedition, txt) # this launches the right parser for this year
|
parser(year, expedition, txt) # this launches the right parser for this year
|
||||||
|
|
||||||
|
print(" - Setting dates from logbook entries")
|
||||||
SetDatesFromLogbookEntries(expedition)
|
SetDatesFromLogbookEntries(expedition)
|
||||||
if len(logentries) >0:
|
if len(logentries) >0:
|
||||||
print(" - Cacheing " , len(logentries), " log entries")
|
print(" - Cacheing " , len(logentries), " log entries")
|
||||||
@ -686,11 +690,14 @@ def LoadLogbookForExpedition(expedition):
|
|||||||
|
|
||||||
i=0
|
i=0
|
||||||
for entrytuple in logentries:
|
for entrytuple in logentries:
|
||||||
date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, entry_type, tripid1 = entrytuple
|
try:
|
||||||
|
date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, tripid1 = entrytuple
|
||||||
|
except ValueError: # cope with removal of entry_type but still in cache files. Remove in Sept. 2022.
|
||||||
|
date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, entry_type, tripid1 = entrytuple
|
||||||
EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, 0,
|
EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, 0,
|
||||||
entry_type, tripid1)
|
tripid1)
|
||||||
EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, logtime_underground,
|
EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, logtime_underground,
|
||||||
entry_type, tripid1, i)
|
tripid1, i)
|
||||||
i +=1
|
i +=1
|
||||||
SetDatesFromLogbookEntries(expedition)
|
SetDatesFromLogbookEntries(expedition)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user