2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2025-12-13 20:27:05 +00:00

make ?reload private and clean old error msgs

This commit is contained in:
Philip Sargent
2021-04-23 03:07:21 +01:00
parent 1a4be0f02e
commit dbd186e299
10 changed files with 172 additions and 85 deletions

View File

@@ -21,6 +21,39 @@ Parses and imports logbooks in all their wonderful confusion
# When we edit logbook entries, allow a "?" after any piece of data to say we've frigged it and
# it can be checked up later from the hard-copy if necessary; or it's not possible to determin (name, trip place, etc)
'''
todo='''
- Put the object store 'trips' and the 'logdataissues' into TROG global object
- refactor everything with some urgency, esp. LoadLogbookForExpedition()
- delete all the autoLogbooKEntry stuff when we are absolutely certain what it does
- Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser,
or it is broken/incomplete and need hand-editing.
- import/parse/re-export-as-html the 'artisanal-format' old logbooks so that
we keep only a modern HTML05 format. Then we can retiure the old parsers and reduce the
volume of code here substantially.
- edit LoadLogbooks() to use coroutines to speed up import substantially,
but perhaps we had better profile it first?
- rewrite to use generators rather than storing everything intermediate in lists - to reduce memory impact.
- the object store will need additional functions to replicate the persontrip calculation
and storage. For the moment we leave all that to be done in the django db
- We should ensure logbook.html is utf-8 and stop this crap:
file_in = open(logbookfile,'rb')
txt = file_in.read().decode("latin1")
'''
logentries = [] # the entire logbook for one year is a single object: a list of entries
noncaveplaces = [ "QMplaceholder", "Journey", "Loser Plateau", "UNKNOWN", 'plateau',
'base camp', 'basecamp', 'top camp', 'topcamp' ]
logdataissues = {}
trips ={}
#
# the logbook loading section
@@ -77,12 +110,6 @@ def GetTripCave(place):
return None
logentries = [] # the entire logbook for one year is a single object: a list of entries
noncaveplaces = [ "QMplaceholder", "Journey", "Loser Plateau", "UNKNOWN", 'plateau',
'base camp', 'basecamp', 'top camp', 'topcamp' ]
logdataissues = {}
trips ={}
def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, entry_type="wiki"):
""" saves a logbook entry and related persontrips
Does NOT save the expeditionday_id - all NULLs. why?
@@ -205,8 +232,10 @@ def EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, tu,
#print(" - New id ",tid)
else:
tid= tripid1
if tid in trips:
msg = " ! DUPLICATE id .{}. {} ~{}~".format(tid, trips[tid][0], trips[tid][1])
tyear, tdate, *trest = trips[tid]
msg = f" ! DUPLICATE on {tdate} id: '{tid}'"
print(msg)
DataIssue.objects.create(parser='logbooks', message=msg)
tid= "d{}-s{:02d}".format(str(date),seq)
@@ -427,6 +456,7 @@ def LoadLogbookForExpedition(expedition, expect):
"""
# absolutely horrid. REFACTOR THIS (all my fault..)
global logentries
global logdataissues
logbook_parseable = False
logbook_cached = False
yearlinks = settings.LOGBOOK_PARSER_SETTINGS
@@ -445,6 +475,26 @@ def LoadLogbookForExpedition(expedition, expect):
return False
return True
def cleanerrors(year):
global logdataissues
print(f' - CLEAN {year} {len(logdataissues)} data issues in total')
dataissues = DataIssue.objects.filter(parser='logbooks')
for di in dataissues:
ph = "t" + year + "-"
if re.search(ph, di.message) is not None:
print(f' - CLEANING dataissue {di.message}')
di.delete()
for te, content in logdataissues:
# tripentry = year + "." + str(logbook_entry_count)
print(f' - CLEAN {te}')
if te.startswith(year + "."):
print(f' - CLEANING logdataissue {te}')
logdataissues.pop(te)
cleanerrors(expedition.year)
if expedition.year in yearlinks:
logbookfile = os.path.join(expologbase, yearlinks[expedition.year][0])
expedition.logbookfile = yearlinks[expedition.year][0]
@@ -478,10 +528,10 @@ def LoadLogbookForExpedition(expedition, expect):
print(" -- Loaded ", len(logentries), " log entries")
logbook_cached = True
else:
print(" !- Should be ", expect, " but ", len(logentries), " found in cache")
print(" !- Told to expect ", expect, " but ", len(logentries), " found in cache")
raise
except:
print(" ! Failed to load corrupt cache. Deleting it.")
print(" ! Failed to load corrupt cache. (Or I was told to ignore it). Deleting it.")
os.remove(cache_filename)
logentries=[]
raise
@@ -554,7 +604,7 @@ def LoadLogbooks():
TROG['pagecache']['expedition'][expo.year] = None # clear cache
if expo.year not in nologbook:
print((" - Logbook for: " + expo.year))
numentries = LoadLogbookForExpedition(expo, entries[expo.year])
numentries = LoadLogbookForExpedition(expo, entries[expo.year]) # this actually loads the logbook for one year
log.write("{} {:5d} should be {}\n".format(expo.year, numentries, entries[expo.year]))
nlbe[expo.year]=numentries
expd[expo.year]= 0
@@ -588,6 +638,12 @@ locationRegex = re.compile(r'<span\s+class="location">(.*?)</span>', re.S)
caveRegex = re.compile(r'<span\s+class="cave">(.*?)</span>', re.S)
def parseAutoLogBookEntry(filename):
'''An AutoLogBookEntry appears to be one that was created online using a form, for a single trip,
which is then stored in a separate location to the usual logbook.html
But when importing logbook.html all these individual entries also need ot be parsed.
This is all redundant as we are getting rid of the whole individual trip entry system
'''
errors = []
f = open(filename, "r")
contents = f.read()