forked from expo/troggle
minor refactoring
This commit is contained in:
parent
13ffe1fcc6
commit
be410d4d9d
@ -31,6 +31,8 @@ todo='''
|
||||
|
||||
- refactor everything with some urgency, esp. LoadLogbookForExpedition()
|
||||
|
||||
-- far too many uses of Django field dereferencing to get values, which is SLOW
|
||||
|
||||
- Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser,
|
||||
or it is broken/incomplete and need hand-editing.
|
||||
|
||||
@ -61,6 +63,14 @@ noncaveplaces = [ "QMplaceholder", "Journey", "Loser Plateau", "UNKNOWN", 'plate
|
||||
logdataissues = TROG['issues']['logdataissues']
|
||||
trips ={}
|
||||
|
||||
entries = { "2019": 20, "2018": 74, "2017": 60, "2016": 81, "2015": 79,
|
||||
"2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52,
|
||||
"2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 40, "2002": 31,
|
||||
"2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41,
|
||||
"1994": 32, "1993": 41, "1992": 61, "1991": 38, "1990": 87, "1989": 1,"1988": 1,"1987": 1,
|
||||
"1985": 24,"1984": 32,"1983": 52,"1982": 42,}
|
||||
# Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, or need hand-editing.
|
||||
|
||||
|
||||
#
|
||||
# the logbook loading section
|
||||
@ -534,19 +544,25 @@ def SetDatesFromLogbookEntries(expedition):
|
||||
#persontrip.save() # also saved in EnterLogIntoDbase. MERGE these to speed up import.
|
||||
|
||||
|
||||
def LoadLogbookForExpedition(expedition, expect):
|
||||
def LoadLogbookForExpedition(expedition):
|
||||
""" Parses all logbook entries for one expedition
|
||||
If a cache is found it uses it. If not found, or fails sanity checks, parses source file.
|
||||
"""
|
||||
# absolutely horrid. REFACTOR THIS (all my fault..)
|
||||
global logentries
|
||||
global logdataissues
|
||||
global entries
|
||||
|
||||
logbook_parseable = False
|
||||
logbook_cached = False
|
||||
yearlinks = settings.LOGBOOK_PARSER_SETTINGS
|
||||
expologbase = os.path.join(settings.EXPOWEB, "years")
|
||||
logentries=[]
|
||||
|
||||
year = expedition.year
|
||||
expect = entries[year]
|
||||
# print(" - Logbook for: " + year)
|
||||
|
||||
def validcache(year,n):
|
||||
if year != expedition:
|
||||
print(" ! year != expedition ",year, expedition )
|
||||
@ -578,14 +594,14 @@ def LoadLogbookForExpedition(expedition, expect):
|
||||
for i in dellist:
|
||||
del logdataissues[i]
|
||||
|
||||
cleanerrors(expedition.year)
|
||||
cleanerrors(year)
|
||||
|
||||
if expedition.year in yearlinks:
|
||||
logbookpath = Path(expologbase) / expedition.year / yearlinks[expedition.year][0]
|
||||
expedition.logbookfile = yearlinks[expedition.year][0]
|
||||
parsefunc = yearlinks[expedition.year][1]
|
||||
if year in yearlinks:
|
||||
logbookpath = Path(expologbase) / year / yearlinks[year][0]
|
||||
expedition.logbookfile = yearlinks[year][0]
|
||||
parsefunc = yearlinks[year][1]
|
||||
else:
|
||||
logbookpath = os.path.join(expologbase, expedition.year, settings.DEFAULT_LOGBOOK_FILE)
|
||||
logbookpath = os.path.join(expologbase, year, settings.DEFAULT_LOGBOOK_FILE)
|
||||
expedition.logbookfile = settings.DEFAULT_LOGBOOK_FILE
|
||||
parsefunc = settings.DEFAULT_LOGBOOK_PARSER
|
||||
cache_filename = Path(str(logbookpath) + ".cache")
|
||||
@ -616,7 +632,7 @@ def LoadLogbookForExpedition(expedition, expect):
|
||||
with open(cache_filename, "rb") as f:
|
||||
year,n,logentries = pickle.load(f)
|
||||
if validcache(year,n):
|
||||
print(" -- Loaded ", len(logentries), " log entries")
|
||||
print(f" -- {year} : Loaded {len(logentries)} log entries")
|
||||
logbook_cached = True
|
||||
else:
|
||||
print(" !- Told to expect ", expect, " but ", len(logentries), " found in cache")
|
||||
@ -640,7 +656,7 @@ def LoadLogbookForExpedition(expedition, expect):
|
||||
if logbook_parseable:
|
||||
parser = globals()[parsefunc]
|
||||
print(f' - Using parser {parsefunc}')
|
||||
parser(expedition.year, expedition, txt) # this launches the right parser for this year
|
||||
parser(year, expedition, txt) # this launches the right parser for this year
|
||||
|
||||
SetDatesFromLogbookEntries(expedition)
|
||||
if len(logentries) >0:
|
||||
@ -656,10 +672,17 @@ def LoadLogbookForExpedition(expedition, expect):
|
||||
date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, entry_type, tripid1 = entrytuple
|
||||
EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, 0,
|
||||
entry_type, tripid1)
|
||||
EnterLogIntoObjStore(expedition.year, date, tripcave, triptitle, text, trippeople, logtime_underground,
|
||||
EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, logtime_underground,
|
||||
entry_type, tripid1, i)
|
||||
i +=1
|
||||
SetDatesFromLogbookEntries(expedition)
|
||||
|
||||
if len(logentries) == expect:
|
||||
# print(f"OK {year} {len(logentries):5d} is {expect}\n")
|
||||
pass
|
||||
else:
|
||||
print(f"BAD {year} {len(logentries):5d} is not {expect}\n")
|
||||
|
||||
return len(logentries)
|
||||
|
||||
def LoadLogbooks():
|
||||
@ -668,6 +691,7 @@ def LoadLogbooks():
|
||||
This should be rewritten to use coroutines to load all logbooks from disc in parallel.
|
||||
"""
|
||||
global logdataissues
|
||||
global entries
|
||||
|
||||
logdataissues = {}
|
||||
DataIssue.objects.filter(parser='logbooks').delete()
|
||||
@ -682,51 +706,42 @@ def LoadLogbooks():
|
||||
lostlogbook = ["1976", "1977", "1978", "1979", "1980", "1981"]
|
||||
sqlfail = ["1987", "1988", "1989"] # breaks mysql with db constraint fail - debug locally first]
|
||||
nologbook = noexpo + lostlogbook + sqlfail
|
||||
entries = { "2019": 20, "2018": 74, "2017": 60, "2016": 81, "2015": 79,
|
||||
"2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52,
|
||||
"2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 40, "2002": 31,
|
||||
"2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41,
|
||||
"1994": 32, "1993": 41, "1992": 61, "1991": 38, "1990": 87, "1989": 1,"1988": 1,"1987": 1,
|
||||
"1985": 24,"1984": 32,"1983": 52,"1982": 42,}
|
||||
# Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, or need hand-editing.
|
||||
try:
|
||||
os.remove("loadlogbk.log")
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
nlbe={}
|
||||
expd ={}
|
||||
with open("loadlogbk.log", "a") as log:
|
||||
for expo in expos:
|
||||
TROG['pagecache']['expedition'][expo.year] = None # clear cache
|
||||
if expo.year in sqlfail:
|
||||
print(" - Logbook for: " + expo.year + " NO parsing attempted - known sql failures")
|
||||
message = f" ! - Not even attempting to parse logbook for {expo.year} until code fixed"
|
||||
DataIssue.objects.create(parser='logbooks', message=message)
|
||||
logdataissues[f"sqlfail {expo.year}"]=message
|
||||
print(message)
|
||||
|
||||
if expo.year not in nologbook:
|
||||
print((" - Logbook for: " + expo.year))
|
||||
if expo.year in entries:
|
||||
numentries = LoadLogbookForExpedition(expo, entries[expo.year]) # this actually loads the logbook for one year
|
||||
log.write("{} {:5d} should be {}\n".format(expo.year, numentries, entries[expo.year]))
|
||||
nlbe[expo.year]=numentries
|
||||
expd[expo.year]= 0
|
||||
else:
|
||||
print(" - No Logbook yet for: " + expo.year) # catch case when preparing for next expo
|
||||
print("** total trips in ObjStore:", len(trips))
|
||||
#for i in logdataissues:
|
||||
# print("{:15s}: {}".format(i, logdataissues[i]))
|
||||
|
||||
for lbe in trips:
|
||||
year, date, tripcave, triptitle, text, trippeople, tu, formattype = trips[lbe]
|
||||
expd[year] += 1
|
||||
yt=0
|
||||
for y in expd:
|
||||
# print("{} {}".format(y, expd[y]), nlbe[y])
|
||||
yt += expd[y]
|
||||
print("total {} log entries in all expeditions".format(yt))
|
||||
actuals = []
|
||||
|
||||
for expo in expos:
|
||||
year = expo.year
|
||||
TROG['pagecache']['expedition'][year] = None # clear cache
|
||||
if year in sqlfail:
|
||||
print(" - Logbook for: " + year + " NO parsing attempted - known sql failures")
|
||||
message = f" ! - Not even attempting to parse logbook for {year} until code fixed"
|
||||
DataIssue.objects.create(parser='logbooks', message=message)
|
||||
logdataissues[f"sqlfail {year}"]=message
|
||||
print(message)
|
||||
|
||||
if year not in nologbook:
|
||||
if year in entries:
|
||||
actuals.append(expo)
|
||||
else:
|
||||
print(" - No Logbook yet for: " + year) # catch case when preparing for next expo
|
||||
|
||||
for ex in actuals:
|
||||
nlbe[ex] = LoadLogbookForExpedition(ex) # this actually loads the logbook for one expo
|
||||
|
||||
# tried to use map with concurrent threads - but sqlite database is not concurrent, so failed with database lock
|
||||
# yt = 0
|
||||
# for r in map(LoadLogbookForExpedition, actuals):
|
||||
# yt = r
|
||||
|
||||
yt = 0
|
||||
for e in nlbe:
|
||||
yt += nlbe[e]
|
||||
print(f"total {yt:,} log entries parsed in all expeditions")
|
||||
if yt != len(trips):
|
||||
print(f"** total trips in ObjStore:{len(trips):,}")
|
||||
|
||||
try:
|
||||
shelvfilenm = 'logbktrips.shelve' # ".db" automatically apended after python 3.8
|
||||
with shelve.open(shelvfilenm, writeback=True) as odb:
|
||||
|
Loading…
x
Reference in New Issue
Block a user