mirror of
https://expo.survex.com/repositories/troggle/.git
synced 2024-11-25 08:41:51 +00:00
more robust logbooks parsing
This commit is contained in:
parent
b979bdb560
commit
8128870d57
@ -176,13 +176,17 @@ def personexpedition(request, first_name='', last_name='', year=''):
|
||||
|
||||
def logbookentry(request, date, slug):
|
||||
this_logbookentry = LogbookEntry.objects.filter(date=date, slug=slug)
|
||||
|
||||
if len(this_logbookentry)>1:
|
||||
return render(request, 'object_list.html',{'object_list':this_logbookentry})
|
||||
|
||||
if this_logbookentry:
|
||||
if len(this_logbookentry)>1:
|
||||
return render(request, 'object_list.html',{'object_list':this_logbookentry})
|
||||
else:
|
||||
this_logbookentry=this_logbookentry[0]
|
||||
return render(request, 'logbookentry.html', {'logbookentry': this_logbookentry})
|
||||
else:
|
||||
this_logbookentry=this_logbookentry[0]
|
||||
return render(request, 'logbookentry.html', {'logbookentry': this_logbookentry})
|
||||
|
||||
msg =(f' Logbook entry slug:"{slug}" not found in database on date:"{date}" ')
|
||||
print(msg)
|
||||
return render(request, 'errors/generic.html',{'message':msg})
|
||||
|
||||
def logbookSearch(request, extra):
|
||||
query_string = ''
|
||||
|
@ -71,13 +71,18 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
|
||||
|
||||
for tripperson in re.split(r",|\+|&|&(?!\w+;)| and ", trippeople):
|
||||
tripperson = tripperson.strip()
|
||||
if not tid:
|
||||
tid = expedition.year + "." + tripperson + datetime.now().strftime("%S%f") # no good. Should be getting the tid
|
||||
mul = re.match(r"<u>(.*?)</u>$(?i)", tripperson)
|
||||
if mul:
|
||||
tripperson = mul.group(1).strip()
|
||||
if tripperson and tripperson[0] != '*':
|
||||
tripperson = re.sub(round_bracket_regex, "", tripperson).strip()
|
||||
|
||||
if tripperson =="Wiggy":
|
||||
tripperson = "Phil Wigglesworth"
|
||||
if tripperson =="Animal":
|
||||
tripperson = "Mike Richardson"
|
||||
|
||||
|
||||
personyear = GetPersonExpeditionNameLookup(expedition).get(tripperson.lower())
|
||||
if not personyear:
|
||||
message = f" ! - {expedition.year} No name match for: '{tripperson}' "
|
||||
@ -91,6 +96,7 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
|
||||
if not res:
|
||||
return None, None
|
||||
author = res[-1][0]
|
||||
|
||||
return res, author
|
||||
|
||||
def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, entry_type="wiki", tid=None):
|
||||
@ -107,11 +113,11 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
|
||||
return
|
||||
|
||||
if not author:
|
||||
message = f" ! - {expedition.year} Skipping logentry: {title} - - no author for entry in year "
|
||||
message = f" ! - {expedition.year} Warning: logentry: {title} - no author for entry in year "
|
||||
DataIssue.objects.create(parser='logbooks', message=message)
|
||||
logdataissues["title"]=message
|
||||
print(message)
|
||||
return
|
||||
#return
|
||||
|
||||
# This needs attention. The slug field is derived from 'title'
|
||||
# both GetCaveLookup() and GetTripCave() need to work together better. None of this data is *used* though?
|
||||
@ -298,7 +304,7 @@ def Parseloghtmltxt(year, expedition, txt):
|
||||
"html", tripid1, logbook_entry_count, tid=tid)
|
||||
|
||||
# main parser for 1991 - 2001. simpler because the data has been hacked so much to fit it
|
||||
# trying it out for years 1982 - 1990 too. Some logbook editing required by hand..
|
||||
# trying it out for years 1982 - 1990 too. Some logbook editing required by hand.. place
|
||||
def Parseloghtml01(year, expedition, txt):
|
||||
global logentries
|
||||
global logdataissues
|
||||
@ -309,7 +315,9 @@ def Parseloghtml01(year, expedition, txt):
|
||||
for trippara in tripparas:
|
||||
logbook_entry_count += 1
|
||||
tid = set_trip_id(year,logbook_entry_count)
|
||||
# print(f" #0 - tid: {tid}")
|
||||
try:
|
||||
#print(f" #1 - tid: {tid}")
|
||||
s = re.match(r"(?s)\s*(?:<p>)?(.*?)</?p>(.*)$(?i)", trippara)
|
||||
if not s:
|
||||
message = " ! - Skipping logentry {year} failure to parse header: " + tid + trippara[:300] + "..."
|
||||
@ -317,22 +325,40 @@ def Parseloghtml01(year, expedition, txt):
|
||||
logdataissues[tid]=message
|
||||
print(message)
|
||||
break
|
||||
tripheader, triptext = s.group(1), s.group(2)
|
||||
mtripid = re.search(r'<a id="(.*?)"', tripheader)
|
||||
try:
|
||||
tripheader, triptext = s.group(1), s.group(2)
|
||||
except:
|
||||
message = f" ! - Fail to set tripheader, triptext. trip:<{tid}> s:'{s}'"
|
||||
DataIssue.objects.create(parser='logbooks', message=message)
|
||||
logdataissues[tid]=message
|
||||
print(message)
|
||||
|
||||
|
||||
# mtripid = re.search(r'<a id="(.*?)"', tripheader)
|
||||
# if not mtripid:
|
||||
# # not an error, this is probabluy jusyt a different year
|
||||
# message = f" ! - Fail id trip:{tid} header:'{tripheader}'"
|
||||
# message = f" ! - A tag id not found. Never mind. Not needed. trip:<{tid}> header:'{tripheader}'"
|
||||
# DataIssue.objects.create(parser='logbooks', message=message)
|
||||
# logdataissues[tid]=message
|
||||
# print(message)
|
||||
|
||||
tripid = mtripid and mtripid.group(1) or ""
|
||||
#print(f" # - mtripid: {mtripid}")
|
||||
# tripid = mtripid and mtripid.group(1) or ""
|
||||
# print(f" # - mtripid: {mtripid}")
|
||||
tripheader = re.sub(r"</?(?:[ab]|span)[^>]*>", "", tripheader)
|
||||
|
||||
tripdate, triptitle, trippeople = tripheader.split("|")
|
||||
#print(f" #2 - tid: {tid}")
|
||||
try:
|
||||
tripdate, triptitle, trippeople = tripheader.split("|")
|
||||
except:
|
||||
message = f" ! - Fail to split out date|title|people. trip:<{tid}> '{tripheader.split('|')}'"
|
||||
DataIssue.objects.create(parser='logbooks', message=message)
|
||||
logdataissues[tid]=message
|
||||
print(message)
|
||||
tripdate, triptitle = tripheader.split("|")
|
||||
trippeople = "anon"
|
||||
#print(f" #3 - tid: {tid}")
|
||||
ldate = ParseDate(tripdate.strip(), year)
|
||||
|
||||
#print(f" # - tid: {tid} <{tripdate}> <{triptitle}> <{trippeople}>")
|
||||
#print(f" #4 - tid: {tid}")
|
||||
|
||||
mtu = re.search(r'<p[^>]*>(T/?U.*)', triptext)
|
||||
if mtu:
|
||||
tu = mtu.group(1)
|
||||
@ -363,7 +389,7 @@ def Parseloghtml01(year, expedition, txt):
|
||||
|
||||
|
||||
entrytuple = (ldate, tripcave, triptitle, ltriptext,
|
||||
trippeople, expedition, tu, "html01", tripid)
|
||||
trippeople, expedition, tu, "html01", tid)
|
||||
logentries.append(entrytuple)
|
||||
try:
|
||||
EnterLogIntoDbase(date=ldate, place=tripcave, title=triptitle, text=ltriptext,
|
||||
@ -377,7 +403,7 @@ def Parseloghtml01(year, expedition, txt):
|
||||
|
||||
try:
|
||||
EnterLogIntoObjStore(year, ldate, tripcave, triptitle, ltriptext, trippeople, tu,
|
||||
"html01", tripid, logbook_entry_count, tid=tid)
|
||||
"html01", tid, logbook_entry_count, tid=tid)
|
||||
except:
|
||||
message = " ! - Enter log entry into ObjectStore FAIL exception in: " + tid
|
||||
DataIssue.objects.create(parser='logbooks', message=message)
|
||||
@ -577,7 +603,7 @@ def LoadLogbookForExpedition(expedition, expect):
|
||||
SetDatesFromLogbookEntries(expedition)
|
||||
if len(logentries) >0:
|
||||
print(" - Cacheing " , len(logentries), " log entries")
|
||||
with open(cache_filename, "wb") as fc:
|
||||
with open(cache_filename, "wb") as fc: # we much check that permission are g+w ! or expo can't delete the cache
|
||||
logbk=(expedition,len(logentries),logentries)
|
||||
pickle.dump(logbk, fc, protocol=4)
|
||||
else:
|
||||
@ -608,14 +634,14 @@ def LoadLogbooks():
|
||||
if len(expos) <= 1:
|
||||
print(" ! No expeditions found. Load 'people' first.\n")
|
||||
nologbook = ["1976", "1977", "1978", "1979", "1980", "1981",
|
||||
"1987", "1988", "1989",
|
||||
"1987", "1988", "1989", # needs more hand-editing of log.htm
|
||||
"1986", "2020",]
|
||||
entries = {"2021": 0, "2019": 20, "2018": 74, "2017": 60, "2016": 81, "2015": 79,
|
||||
"2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52,
|
||||
"2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 40, "2002": 31,
|
||||
"2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41,
|
||||
"1994": 32, "1993": 41, "1992": 61, "1991": 38, "1990": 87, "1989": 1,"1988": 1,"1987": 1,
|
||||
"1985": 21,"1984": 19,"1983": 22,"1982": 42,}
|
||||
"1985": 22,"1984": 32,"1983": 52,"1982": 42,}
|
||||
# Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, or need hand-editing.
|
||||
try:
|
||||
os.remove("loadlogbk.log")
|
||||
|
Loading…
Reference in New Issue
Block a user