2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2025-12-15 03:27:26 +00:00

minor refactoring

This commit is contained in:
Philip Sargent
2022-03-24 01:05:50 +00:00
parent 13ffe1fcc6
commit be410d4d9d

View File

@@ -31,6 +31,8 @@ todo='''
- refactor everything with some urgency, esp. LoadLogbookForExpedition() - refactor everything with some urgency, esp. LoadLogbookForExpedition()
-- far too many uses of Django field dereferencing to get values, which is SLOW
- Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, - Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser,
or it is broken/incomplete and need hand-editing. or it is broken/incomplete and need hand-editing.
@@ -61,6 +63,14 @@ noncaveplaces = [ "QMplaceholder", "Journey", "Loser Plateau", "UNKNOWN", 'plate
logdataissues = TROG['issues']['logdataissues'] logdataissues = TROG['issues']['logdataissues']
trips ={} trips ={}
entries = { "2019": 20, "2018": 74, "2017": 60, "2016": 81, "2015": 79,
"2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52,
"2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 40, "2002": 31,
"2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41,
"1994": 32, "1993": 41, "1992": 61, "1991": 38, "1990": 87, "1989": 1,"1988": 1,"1987": 1,
"1985": 24,"1984": 32,"1983": 52,"1982": 42,}
# Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, or need hand-editing.
# #
# the logbook loading section # the logbook loading section
@@ -534,19 +544,25 @@ def SetDatesFromLogbookEntries(expedition):
#persontrip.save() # also saved in EnterLogIntoDbase. MERGE these to speed up import. #persontrip.save() # also saved in EnterLogIntoDbase. MERGE these to speed up import.
def LoadLogbookForExpedition(expedition, expect): def LoadLogbookForExpedition(expedition):
""" Parses all logbook entries for one expedition """ Parses all logbook entries for one expedition
If a cache is found it uses it. If not found, or fails sanity checks, parses source file. If a cache is found it uses it. If not found, or fails sanity checks, parses source file.
""" """
# absolutely horrid. REFACTOR THIS (all my fault..) # absolutely horrid. REFACTOR THIS (all my fault..)
global logentries global logentries
global logdataissues global logdataissues
global entries
logbook_parseable = False logbook_parseable = False
logbook_cached = False logbook_cached = False
yearlinks = settings.LOGBOOK_PARSER_SETTINGS yearlinks = settings.LOGBOOK_PARSER_SETTINGS
expologbase = os.path.join(settings.EXPOWEB, "years") expologbase = os.path.join(settings.EXPOWEB, "years")
logentries=[] logentries=[]
year = expedition.year
expect = entries[year]
# print(" - Logbook for: " + year)
def validcache(year,n): def validcache(year,n):
if year != expedition: if year != expedition:
print(" ! year != expedition ",year, expedition ) print(" ! year != expedition ",year, expedition )
@@ -578,14 +594,14 @@ def LoadLogbookForExpedition(expedition, expect):
for i in dellist: for i in dellist:
del logdataissues[i] del logdataissues[i]
cleanerrors(expedition.year) cleanerrors(year)
if expedition.year in yearlinks: if year in yearlinks:
logbookpath = Path(expologbase) / expedition.year / yearlinks[expedition.year][0] logbookpath = Path(expologbase) / year / yearlinks[year][0]
expedition.logbookfile = yearlinks[expedition.year][0] expedition.logbookfile = yearlinks[year][0]
parsefunc = yearlinks[expedition.year][1] parsefunc = yearlinks[year][1]
else: else:
logbookpath = os.path.join(expologbase, expedition.year, settings.DEFAULT_LOGBOOK_FILE) logbookpath = os.path.join(expologbase, year, settings.DEFAULT_LOGBOOK_FILE)
expedition.logbookfile = settings.DEFAULT_LOGBOOK_FILE expedition.logbookfile = settings.DEFAULT_LOGBOOK_FILE
parsefunc = settings.DEFAULT_LOGBOOK_PARSER parsefunc = settings.DEFAULT_LOGBOOK_PARSER
cache_filename = Path(str(logbookpath) + ".cache") cache_filename = Path(str(logbookpath) + ".cache")
@@ -616,7 +632,7 @@ def LoadLogbookForExpedition(expedition, expect):
with open(cache_filename, "rb") as f: with open(cache_filename, "rb") as f:
year,n,logentries = pickle.load(f) year,n,logentries = pickle.load(f)
if validcache(year,n): if validcache(year,n):
print(" -- Loaded ", len(logentries), " log entries") print(f" -- {year} : Loaded {len(logentries)} log entries")
logbook_cached = True logbook_cached = True
else: else:
print(" !- Told to expect ", expect, " but ", len(logentries), " found in cache") print(" !- Told to expect ", expect, " but ", len(logentries), " found in cache")
@@ -640,7 +656,7 @@ def LoadLogbookForExpedition(expedition, expect):
if logbook_parseable: if logbook_parseable:
parser = globals()[parsefunc] parser = globals()[parsefunc]
print(f' - Using parser {parsefunc}') print(f' - Using parser {parsefunc}')
parser(expedition.year, expedition, txt) # this launches the right parser for this year parser(year, expedition, txt) # this launches the right parser for this year
SetDatesFromLogbookEntries(expedition) SetDatesFromLogbookEntries(expedition)
if len(logentries) >0: if len(logentries) >0:
@@ -656,10 +672,17 @@ def LoadLogbookForExpedition(expedition, expect):
date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, entry_type, tripid1 = entrytuple date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, entry_type, tripid1 = entrytuple
EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, 0, EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, 0,
entry_type, tripid1) entry_type, tripid1)
EnterLogIntoObjStore(expedition.year, date, tripcave, triptitle, text, trippeople, logtime_underground, EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, logtime_underground,
entry_type, tripid1, i) entry_type, tripid1, i)
i +=1 i +=1
SetDatesFromLogbookEntries(expedition) SetDatesFromLogbookEntries(expedition)
if len(logentries) == expect:
# print(f"OK {year} {len(logentries):5d} is {expect}\n")
pass
else:
print(f"BAD {year} {len(logentries):5d} is not {expect}\n")
return len(logentries) return len(logentries)
def LoadLogbooks(): def LoadLogbooks():
@@ -668,6 +691,7 @@ def LoadLogbooks():
This should be rewritten to use coroutines to load all logbooks from disc in parallel. This should be rewritten to use coroutines to load all logbooks from disc in parallel.
""" """
global logdataissues global logdataissues
global entries
logdataissues = {} logdataissues = {}
DataIssue.objects.filter(parser='logbooks').delete() DataIssue.objects.filter(parser='logbooks').delete()
@@ -682,50 +706,41 @@ def LoadLogbooks():
lostlogbook = ["1976", "1977", "1978", "1979", "1980", "1981"] lostlogbook = ["1976", "1977", "1978", "1979", "1980", "1981"]
sqlfail = ["1987", "1988", "1989"] # breaks mysql with db constraint fail - debug locally first] sqlfail = ["1987", "1988", "1989"] # breaks mysql with db constraint fail - debug locally first]
nologbook = noexpo + lostlogbook + sqlfail nologbook = noexpo + lostlogbook + sqlfail
entries = { "2019": 20, "2018": 74, "2017": 60, "2016": 81, "2015": 79,
"2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52,
"2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 40, "2002": 31,
"2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41,
"1994": 32, "1993": 41, "1992": 61, "1991": 38, "1990": 87, "1989": 1,"1988": 1,"1987": 1,
"1985": 24,"1984": 32,"1983": 52,"1982": 42,}
# Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, or need hand-editing.
try:
os.remove("loadlogbk.log")
except OSError:
pass
nlbe={} nlbe={}
expd ={} expd ={}
with open("loadlogbk.log", "a") as log: actuals = []
for expo in expos: for expo in expos:
TROG['pagecache']['expedition'][expo.year] = None # clear cache year = expo.year
if expo.year in sqlfail: TROG['pagecache']['expedition'][year] = None # clear cache
print(" - Logbook for: " + expo.year + " NO parsing attempted - known sql failures") if year in sqlfail:
message = f" ! - Not even attempting to parse logbook for {expo.year} until code fixed" print(" - Logbook for: " + year + " NO parsing attempted - known sql failures")
message = f" ! - Not even attempting to parse logbook for {year} until code fixed"
DataIssue.objects.create(parser='logbooks', message=message) DataIssue.objects.create(parser='logbooks', message=message)
logdataissues[f"sqlfail {expo.year}"]=message logdataissues[f"sqlfail {year}"]=message
print(message) print(message)
if expo.year not in nologbook: if year not in nologbook:
print((" - Logbook for: " + expo.year)) if year in entries:
if expo.year in entries: actuals.append(expo)
numentries = LoadLogbookForExpedition(expo, entries[expo.year]) # this actually loads the logbook for one year
log.write("{} {:5d} should be {}\n".format(expo.year, numentries, entries[expo.year]))
nlbe[expo.year]=numentries
expd[expo.year]= 0
else: else:
print(" - No Logbook yet for: " + expo.year) # catch case when preparing for next expo print(" - No Logbook yet for: " + year) # catch case when preparing for next expo
print("** total trips in ObjStore:", len(trips))
#for i in logdataissues:
# print("{:15s}: {}".format(i, logdataissues[i]))
for lbe in trips: for ex in actuals:
year, date, tripcave, triptitle, text, trippeople, tu, formattype = trips[lbe] nlbe[ex] = LoadLogbookForExpedition(ex) # this actually loads the logbook for one expo
expd[year] += 1
yt=0 # tried to use map with concurrent threads - but sqlite database is not concurrent, so failed with database lock
for y in expd: # yt = 0
# print("{} {}".format(y, expd[y]), nlbe[y]) # for r in map(LoadLogbookForExpedition, actuals):
yt += expd[y] # yt = r
print("total {} log entries in all expeditions".format(yt))
yt = 0
for e in nlbe:
yt += nlbe[e]
print(f"total {yt:,} log entries parsed in all expeditions")
if yt != len(trips):
print(f"** total trips in ObjStore:{len(trips):,}")
try: try:
shelvfilenm = 'logbktrips.shelve' # ".db" automatically apended after python 3.8 shelvfilenm = 'logbktrips.shelve' # ".db" automatically apended after python 3.8