2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2025-03-13 13:51:48 +00:00

minor refactoring

This commit is contained in:
Philip Sargent 2022-03-24 01:05:50 +00:00
parent 13ffe1fcc6
commit be410d4d9d

@ -31,6 +31,8 @@ todo='''
- refactor everything with some urgency, esp. LoadLogbookForExpedition()
-- far too many uses of Django field dereferencing to get values, which is SLOW
- Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser,
or it is broken/incomplete and need hand-editing.
@ -61,6 +63,14 @@ noncaveplaces = [ "QMplaceholder", "Journey", "Loser Plateau", "UNKNOWN", 'plate
logdataissues = TROG['issues']['logdataissues']
trips ={}
entries = { "2019": 20, "2018": 74, "2017": 60, "2016": 81, "2015": 79,
"2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52,
"2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 40, "2002": 31,
"2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41,
"1994": 32, "1993": 41, "1992": 61, "1991": 38, "1990": 87, "1989": 1,"1988": 1,"1987": 1,
"1985": 24,"1984": 32,"1983": 52,"1982": 42,}
# Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, or need hand-editing.
#
# the logbook loading section
@ -534,19 +544,25 @@ def SetDatesFromLogbookEntries(expedition):
#persontrip.save() # also saved in EnterLogIntoDbase. MERGE these to speed up import.
def LoadLogbookForExpedition(expedition, expect):
def LoadLogbookForExpedition(expedition):
""" Parses all logbook entries for one expedition
If a cache is found it uses it. If not found, or fails sanity checks, parses source file.
"""
# absolutely horrid. REFACTOR THIS (all my fault..)
global logentries
global logdataissues
global entries
logbook_parseable = False
logbook_cached = False
yearlinks = settings.LOGBOOK_PARSER_SETTINGS
expologbase = os.path.join(settings.EXPOWEB, "years")
logentries=[]
year = expedition.year
expect = entries[year]
# print(" - Logbook for: " + year)
def validcache(year,n):
if year != expedition:
print(" ! year != expedition ",year, expedition )
@ -578,14 +594,14 @@ def LoadLogbookForExpedition(expedition, expect):
for i in dellist:
del logdataissues[i]
cleanerrors(expedition.year)
cleanerrors(year)
if expedition.year in yearlinks:
logbookpath = Path(expologbase) / expedition.year / yearlinks[expedition.year][0]
expedition.logbookfile = yearlinks[expedition.year][0]
parsefunc = yearlinks[expedition.year][1]
if year in yearlinks:
logbookpath = Path(expologbase) / year / yearlinks[year][0]
expedition.logbookfile = yearlinks[year][0]
parsefunc = yearlinks[year][1]
else:
logbookpath = os.path.join(expologbase, expedition.year, settings.DEFAULT_LOGBOOK_FILE)
logbookpath = os.path.join(expologbase, year, settings.DEFAULT_LOGBOOK_FILE)
expedition.logbookfile = settings.DEFAULT_LOGBOOK_FILE
parsefunc = settings.DEFAULT_LOGBOOK_PARSER
cache_filename = Path(str(logbookpath) + ".cache")
@ -616,7 +632,7 @@ def LoadLogbookForExpedition(expedition, expect):
with open(cache_filename, "rb") as f:
year,n,logentries = pickle.load(f)
if validcache(year,n):
print(" -- Loaded ", len(logentries), " log entries")
print(f" -- {year} : Loaded {len(logentries)} log entries")
logbook_cached = True
else:
print(" !- Told to expect ", expect, " but ", len(logentries), " found in cache")
@ -640,7 +656,7 @@ def LoadLogbookForExpedition(expedition, expect):
if logbook_parseable:
parser = globals()[parsefunc]
print(f' - Using parser {parsefunc}')
parser(expedition.year, expedition, txt) # this launches the right parser for this year
parser(year, expedition, txt) # this launches the right parser for this year
SetDatesFromLogbookEntries(expedition)
if len(logentries) >0:
@ -656,10 +672,17 @@ def LoadLogbookForExpedition(expedition, expect):
date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, entry_type, tripid1 = entrytuple
EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, 0,
entry_type, tripid1)
EnterLogIntoObjStore(expedition.year, date, tripcave, triptitle, text, trippeople, logtime_underground,
EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, logtime_underground,
entry_type, tripid1, i)
i +=1
SetDatesFromLogbookEntries(expedition)
if len(logentries) == expect:
# print(f"OK {year} {len(logentries):5d} is {expect}\n")
pass
else:
print(f"BAD {year} {len(logentries):5d} is not {expect}\n")
return len(logentries)
def LoadLogbooks():
@ -668,6 +691,7 @@ def LoadLogbooks():
This should be rewritten to use coroutines to load all logbooks from disc in parallel.
"""
global logdataissues
global entries
logdataissues = {}
DataIssue.objects.filter(parser='logbooks').delete()
@ -682,50 +706,41 @@ def LoadLogbooks():
lostlogbook = ["1976", "1977", "1978", "1979", "1980", "1981"]
sqlfail = ["1987", "1988", "1989"] # breaks mysql with db constraint fail - debug locally first]
nologbook = noexpo + lostlogbook + sqlfail
entries = { "2019": 20, "2018": 74, "2017": 60, "2016": 81, "2015": 79,
"2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52,
"2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 40, "2002": 31,
"2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41,
"1994": 32, "1993": 41, "1992": 61, "1991": 38, "1990": 87, "1989": 1,"1988": 1,"1987": 1,
"1985": 24,"1984": 32,"1983": 52,"1982": 42,}
# Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, or need hand-editing.
try:
os.remove("loadlogbk.log")
except OSError:
pass
nlbe={}
expd ={}
with open("loadlogbk.log", "a") as log:
actuals = []
for expo in expos:
TROG['pagecache']['expedition'][expo.year] = None # clear cache
if expo.year in sqlfail:
print(" - Logbook for: " + expo.year + " NO parsing attempted - known sql failures")
message = f" ! - Not even attempting to parse logbook for {expo.year} until code fixed"
year = expo.year
TROG['pagecache']['expedition'][year] = None # clear cache
if year in sqlfail:
print(" - Logbook for: " + year + " NO parsing attempted - known sql failures")
message = f" ! - Not even attempting to parse logbook for {year} until code fixed"
DataIssue.objects.create(parser='logbooks', message=message)
logdataissues[f"sqlfail {expo.year}"]=message
logdataissues[f"sqlfail {year}"]=message
print(message)
if expo.year not in nologbook:
print((" - Logbook for: " + expo.year))
if expo.year in entries:
numentries = LoadLogbookForExpedition(expo, entries[expo.year]) # this actually loads the logbook for one year
log.write("{} {:5d} should be {}\n".format(expo.year, numentries, entries[expo.year]))
nlbe[expo.year]=numentries
expd[expo.year]= 0
if year not in nologbook:
if year in entries:
actuals.append(expo)
else:
print(" - No Logbook yet for: " + expo.year) # catch case when preparing for next expo
print("** total trips in ObjStore:", len(trips))
#for i in logdataissues:
# print("{:15s}: {}".format(i, logdataissues[i]))
print(" - No Logbook yet for: " + year) # catch case when preparing for next expo
for ex in actuals:
nlbe[ex] = LoadLogbookForExpedition(ex) # this actually loads the logbook for one expo
# tried to use map with concurrent threads - but sqlite database is not concurrent, so failed with database lock
# yt = 0
# for r in map(LoadLogbookForExpedition, actuals):
# yt = r
for lbe in trips:
year, date, tripcave, triptitle, text, trippeople, tu, formattype = trips[lbe]
expd[year] += 1
yt = 0
for y in expd:
# print("{} {}".format(y, expd[y]), nlbe[y])
yt += expd[y]
print("total {} log entries in all expeditions".format(yt))
for e in nlbe:
yt += nlbe[e]
print(f"total {yt:,} log entries parsed in all expeditions")
if yt != len(trips):
print(f"** total trips in ObjStore:{len(trips):,}")
try:
shelvfilenm = 'logbktrips.shelve' # ".db" automatically apended after python 3.8