forked from expo/troggle
parse several UK caving blogs per year - working
This commit is contained in:
parent
5e9fd7fd77
commit
f80e4efed8
@ -27,11 +27,9 @@ Also has code to download a logbook in a choice of formats (why?!)
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
todo = '''
|
todo = '''
|
||||||
- Check that the logbookdownloader works by testing with a round trip.
|
|
||||||
|
|
||||||
- Use it to convert all older logbooks into the 2005-variant of HTML then we can
|
- Use logbookdownloader to convert all older logbooks into the 2005-variant of HTML then we can
|
||||||
get rid of the parsers for older formats. There are no images stored in the database,
|
get rid of the parsers for older formats.
|
||||||
so this is only a tool for a first pass, to be followed by extensive hand-editing!
|
|
||||||
When we have done all the old logbooks, delete this function and the two templates.
|
When we have done all the old logbooks, delete this function and the two templates.
|
||||||
|
|
||||||
|
|
||||||
@ -152,7 +150,7 @@ def exportlogbook(request,year=None,extension=None):
|
|||||||
for the current year. Formats available are HTML2005 (others old & broken or not written yet)
|
for the current year. Formats available are HTML2005 (others old & broken or not written yet)
|
||||||
|
|
||||||
There are no images stored in the database, so this is only a tool for a first pass, to be followed by
|
There are no images stored in the database, so this is only a tool for a first pass, to be followed by
|
||||||
hand-editing. However links to images work int he HTML text of a logbook entry
|
hand-editing. However links to images work in the HTML text of a logbook entry
|
||||||
|
|
||||||
NEED TO ADD IN THE MATERIAL WHICH IS NOT IN ANY LBE ! e.g. front matter.
|
NEED TO ADD IN THE MATERIAL WHICH IS NOT IN ANY LBE ! e.g. front matter.
|
||||||
|
|
||||||
@ -184,6 +182,18 @@ def exportlogbook(request,year=None,extension=None):
|
|||||||
t=loader.get_template(template)
|
t=loader.get_template(template)
|
||||||
logbookfile = (t.render({'logbook_entries':logbook_entries}))
|
logbookfile = (t.render({'logbook_entries':logbook_entries}))
|
||||||
|
|
||||||
|
frontpath = Path(settings.EXPOWEB, "years", year, "frontmatter.html")
|
||||||
|
if frontpath.is_file():
|
||||||
|
try:
|
||||||
|
with open(frontpath,"r") as front:
|
||||||
|
frontmatter = front.read()
|
||||||
|
except:
|
||||||
|
print(" ! Very Bad Error opening " + frontpath)
|
||||||
|
logbookfile = re.sub(r"<body>", "<body>\n"+frontmatter , logbookfile)
|
||||||
|
else:
|
||||||
|
logbookfile = re.sub(r"<body>", f"<body>\n<h1>Expo {year}</h1>\n", logbookfile)
|
||||||
|
|
||||||
|
|
||||||
dir = Path(settings.EXPOWEB) / "years" / year
|
dir = Path(settings.EXPOWEB) / "years" / year
|
||||||
filepath = Path(dir, filename)
|
filepath = Path(dir, filename)
|
||||||
with(open(filepath, 'w')) as lb:
|
with(open(filepath, 'w')) as lb:
|
||||||
|
@ -43,8 +43,9 @@ def import_logbooks():
|
|||||||
|
|
||||||
def import_logbook(year=2019):
|
def import_logbook(year=2019):
|
||||||
print(f"-- Importing Logbook {year}")
|
print(f"-- Importing Logbook {year}")
|
||||||
with transaction.atomic():
|
print(f"-- - commented out")
|
||||||
troggle.parsers.logbooks.LoadLogbook(year, format="blog")
|
# with transaction.atomic():
|
||||||
|
# troggle.parsers.logbooks.LoadLogbook(year, format="cucc")
|
||||||
|
|
||||||
def import_QMs():
|
def import_QMs():
|
||||||
print("-- Importing old QMs for 161, 204, 234 from CSV files")
|
print("-- Importing old QMs for 161, 204, 234 from CSV files")
|
||||||
|
@ -51,8 +51,13 @@ data for old logbooks. New design needed, with a mechanism for flagging fixtures
|
|||||||
|
|
||||||
'''
|
'''
|
||||||
MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200
|
MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200
|
||||||
DEFAULT_LOGBOOK_PARSER = "parser_html"
|
BLOG_PARSER_SETTINGS = {
|
||||||
|
"2017": ("ukcavingblog.html", "parser_blog"),
|
||||||
|
"2019": ("ukcavingblog.html", "parser_blog"),
|
||||||
|
"2022": ("ukcavingblog.html", "parser_blog"),
|
||||||
|
}
|
||||||
DEFAULT_LOGBOOK_FILE = "logbook.html"
|
DEFAULT_LOGBOOK_FILE = "logbook.html"
|
||||||
|
DEFAULT_LOGBOOK_PARSER = "parser_html"
|
||||||
# All years since 2010 use the default value for Logbook parser
|
# All years since 2010 use the default value for Logbook parser
|
||||||
# but several don't work, and are skipped by the parsing code, e.g. 1983
|
# but several don't work, and are skipped by the parsing code, e.g. 1983
|
||||||
LOGBOOK_PARSER_SETTINGS = {
|
LOGBOOK_PARSER_SETTINGS = {
|
||||||
@ -89,11 +94,11 @@ LOGBOOK_PARSER_SETTINGS = {
|
|||||||
"1982": ("log.htm", "parser_html_01"),
|
"1982": ("log.htm", "parser_html_01"),
|
||||||
}
|
}
|
||||||
|
|
||||||
entries = { "2022": 64, "2019": 56, "2018": 74, "2017": 60, "2016": 81, "2015": 79,
|
entries = { "2022": 64, "2019": 56, "2018": 75, "2017": 61, "2016": 81, "2015": 79,
|
||||||
"2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52,
|
"2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52,
|
||||||
"2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 42, "2002": 31,
|
"2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 42, "2002": 31,
|
||||||
"2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41,
|
"2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 95, "1995": 42,
|
||||||
"1994": 32, "1993": 41, "1992": 61, "1991": 38, "1990": 87, "1989": 1,"1988": 1,"1987": 1,
|
"1994": 32, "1993": 41, "1992": 62, "1991": 39, "1990": 87, "1989": 1,"1988": 1,"1987": 1,
|
||||||
"1985": 24, "1984": 32, "1983": 52, "1982": 42,}
|
"1985": 24, "1984": 32, "1983": 52, "1982": 42,}
|
||||||
# Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, or need hand-editing.
|
# Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, or need hand-editing.
|
||||||
|
|
||||||
@ -258,7 +263,7 @@ def ParseDate(tripdate, year):
|
|||||||
return datetime.date(1970, 1, 1)
|
return datetime.date(1970, 1, 1)
|
||||||
|
|
||||||
# (2006 - not any more), 2008 - 2009
|
# (2006 - not any more), 2008 - 2009
|
||||||
def wiki_parser(year, expedition, txt):
|
def wiki_parser(year, expedition, txt, seq=""):
|
||||||
global logentries
|
global logentries
|
||||||
global logdataissues
|
global logdataissues
|
||||||
|
|
||||||
@ -300,10 +305,20 @@ def wiki_parser(year, expedition, txt):
|
|||||||
|
|
||||||
# 2002, 2004, 2005, 2007, 2010 - now
|
# 2002, 2004, 2005, 2007, 2010 - now
|
||||||
# 2006 wiki text is incomplete, but the html all there. So using this parser now.
|
# 2006 wiki text is incomplete, but the html all there. So using this parser now.
|
||||||
def parser_html(year, expedition, txt):
|
def parser_html(year, expedition, txt, seq=""):
|
||||||
global logentries
|
global logentries
|
||||||
global logdataissues
|
global logdataissues
|
||||||
|
|
||||||
|
# extract front material and stash for later use when rebuilding from list of entries
|
||||||
|
headmatch = re.match(r"(?i)(?s).*<body[^>]*>(.*?)<hr.*", txt)
|
||||||
|
headpara = headmatch.groups()[0].strip()
|
||||||
|
|
||||||
|
# print(f" - headpara:\n'{headpara}'")
|
||||||
|
if(len(headpara)>0):
|
||||||
|
frontpath = Path(settings.EXPOWEB, "years", year, "frontmatter.html")
|
||||||
|
with open(frontpath,"w") as front:
|
||||||
|
front.write(headpara+"\n")
|
||||||
|
|
||||||
tripparas = re.findall(r"<hr\s*/>([\s\S]*?)(?=<hr)", txt)
|
tripparas = re.findall(r"<hr\s*/>([\s\S]*?)(?=<hr)", txt)
|
||||||
logbook_entry_count = 0
|
logbook_entry_count = 0
|
||||||
for trippara in tripparas:
|
for trippara in tripparas:
|
||||||
@ -323,7 +338,7 @@ def parser_html(year, expedition, txt):
|
|||||||
if s:
|
if s:
|
||||||
tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups()
|
tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups()
|
||||||
else: # allow title and people to be swapped in order
|
else: # allow title and people to be swapped in order
|
||||||
msg = f" !- {year} Can't parse:{logbook_entry_count} '{trippara[:40]}'..."
|
msg = f" !- {year} Can't parse:{logbook_entry_count} '{trippara[:50]}'..."
|
||||||
print(msg)
|
print(msg)
|
||||||
DataIssue.objects.create(parser='logbooks', message=msg)
|
DataIssue.objects.create(parser='logbooks', message=msg)
|
||||||
logdataissues[tid]=msg
|
logdataissues[tid]=msg
|
||||||
@ -340,11 +355,11 @@ def parser_html(year, expedition, txt):
|
|||||||
if s2:
|
if s2:
|
||||||
tripid, tripid1, tripdate, triptitle, trippeople, triptext, tu = s2.groups()
|
tripid, tripid1, tripdate, triptitle, trippeople, triptext, tu = s2.groups()
|
||||||
else:
|
else:
|
||||||
if not re.search(r"Rigging Guide", trippara):
|
# if not re.search(r"Rigging Guide", trippara):
|
||||||
msg = f" !- Logbook. Can't parse entry on 2nd pass:{logbook_entry_count} '{trippara[:40]}'..."
|
msg = f" !- Logbook. Can't parse entry on 2nd pass:{logbook_entry_count} '{trippara[:50]}'..."
|
||||||
print(msg)
|
print(msg)
|
||||||
DataIssue.objects.create(parser='logbooks', message=msg)
|
DataIssue.objects.create(parser='logbooks', message=msg)
|
||||||
logdataissues[tid]=msg
|
logdataissues[tid]=msg
|
||||||
continue
|
continue
|
||||||
|
|
||||||
ldate = ParseDate(tripdate.strip(), year)
|
ldate = ParseDate(tripdate.strip(), year)
|
||||||
@ -364,11 +379,21 @@ def parser_html(year, expedition, txt):
|
|||||||
|
|
||||||
# main parser for 1991 - 2001. simpler because the data has been hacked so much to fit it
|
# main parser for 1991 - 2001. simpler because the data has been hacked so much to fit it
|
||||||
# trying it out for years 1982 - 1990 too. Some logbook editing required by hand.. place
|
# trying it out for years 1982 - 1990 too. Some logbook editing required by hand.. place
|
||||||
def parser_html_01(year, expedition, txt):
|
def parser_html_01(year, expedition, txt, seq=""):
|
||||||
global logentries
|
global logentries
|
||||||
global logdataissues
|
global logdataissues
|
||||||
errorcount = 0
|
errorcount = 0
|
||||||
|
|
||||||
|
# extract front material and stash for later use when rebuilding from list of entries
|
||||||
|
headmatch = re.match(r"(?i)(?s).*<body[^>]*>(.*?)<hr.*", txt)
|
||||||
|
headpara = headmatch.groups()[0].strip()
|
||||||
|
|
||||||
|
# print(f" - headpara:\n'{headpara}'")
|
||||||
|
if(len(headpara)>0):
|
||||||
|
frontpath = Path(settings.EXPOWEB, "years", year, "frontmatter.html")
|
||||||
|
with open(frontpath,"w") as front:
|
||||||
|
front.write(headpara+"\n")
|
||||||
|
|
||||||
tripparas = re.findall(r"<hr[\s/]*>([\s\S]*?)(?=<hr)", txt)
|
tripparas = re.findall(r"<hr[\s/]*>([\s\S]*?)(?=<hr)", txt)
|
||||||
logbook_entry_count = 0
|
logbook_entry_count = 0
|
||||||
for trippara in tripparas:
|
for trippara in tripparas:
|
||||||
@ -472,8 +497,8 @@ def parser_html_01(year, expedition, txt):
|
|||||||
print(message)
|
print(message)
|
||||||
return
|
return
|
||||||
|
|
||||||
def parser_blog(year, expedition, txt):
|
def parser_blog(year, expedition, txt, sq=""):
|
||||||
'''Parses the format of web pages collected as 'Save As HTML" fromt eh UK Caving blog website.
|
'''Parses the format of web pages collected as 'Save As HTML" from the UK Caving blog website.
|
||||||
Note that the entries have dates and authors, but no titles.
|
Note that the entries have dates and authors, but no titles.
|
||||||
'''
|
'''
|
||||||
global logentries
|
global logentries
|
||||||
@ -494,14 +519,13 @@ def parser_blog(year, expedition, txt):
|
|||||||
print(f"{len(tripheads)} != {len(tripparas)}")
|
print(f"{len(tripheads)} != {len(tripparas)}")
|
||||||
|
|
||||||
location = "Plateau"
|
location = "Plateau"
|
||||||
tripname = "UK Caving Blog post"
|
|
||||||
tu = 0
|
tu = 0
|
||||||
logbook_entry_count = 0
|
logbook_entry_count = 0
|
||||||
for i in range(0, len(tripparas)):
|
for i in range(0, len(tripparas)):
|
||||||
trippara = tripparas[i]
|
trippara = tripparas[i]
|
||||||
triphead = tripheads[i]
|
triphead = tripheads[i]
|
||||||
logbook_entry_count += 1
|
logbook_entry_count += 1
|
||||||
tid = set_trip_id(year,logbook_entry_count) +"_blog"
|
tid = set_trip_id(year,logbook_entry_count) +"_blog" + sq
|
||||||
# print(f" - tid: {tid}")
|
# print(f" - tid: {tid}")
|
||||||
|
|
||||||
# data-author="tcacrossley"
|
# data-author="tcacrossley"
|
||||||
@ -514,7 +538,7 @@ def parser_blog(year, expedition, txt):
|
|||||||
break
|
break
|
||||||
trippeople = match_author.group(1)
|
trippeople = match_author.group(1)
|
||||||
# print(f" - tid: {tid} {trippeople}")
|
# print(f" - tid: {tid} {trippeople}")
|
||||||
# datetime="2019-07-11T13:16:18+0100"
|
# datetime="2019-07-11T13:16:18+0100"
|
||||||
match_datetime = re.search(r".*datetime=\"([^\"]*)\" data-time=.*", triphead)
|
match_datetime = re.search(r".*datetime=\"([^\"]*)\" data-time=.*", triphead)
|
||||||
if not ( match_datetime ) :
|
if not ( match_datetime ) :
|
||||||
message = f" ! - Skipping logentry {year}:{logbook_entry_count} on failure to parse datetime {tid} {triphead[:400]}..."
|
message = f" ! - Skipping logentry {year}:{logbook_entry_count} on failure to parse datetime {tid} {triphead[:400]}..."
|
||||||
@ -527,19 +551,25 @@ def parser_blog(year, expedition, txt):
|
|||||||
try:
|
try:
|
||||||
tripdate = datetime.fromisoformat(datestamp)
|
tripdate = datetime.fromisoformat(datestamp)
|
||||||
except:
|
except:
|
||||||
print(datestamp[0:9])
|
message = f" ! - FROMISOFORMAT fail logentry {year}:{logbook_entry_count} {tid} '{datestamp}'"
|
||||||
|
DataIssue.objects.create(parser='logbooks', message=message)
|
||||||
|
logdataissues[tid]=message
|
||||||
|
print(message)
|
||||||
|
# fallback, ignore the timestamp bits:
|
||||||
tripdate = datetime.fromisoformat(datestamp[0:10])
|
tripdate = datetime.fromisoformat(datestamp[0:10])
|
||||||
# print(f" - tid: {tid} '{trippeople}' '{tripdate}'")
|
print(f" - tid: {tid} '{trippeople}' '{tripdate}'")
|
||||||
|
|
||||||
tripname = f"UK Caving Blog post {logbook_entry_count}" # must be unique for a given date
|
tripname = f"UK Caving Blog{sq} post {logbook_entry_count}" # must be unique for a given date
|
||||||
|
tripcontent = trippara + f"\n\nBlog Author: {trippeople}"
|
||||||
|
|
||||||
entrytuple = (tripdate, location, tripname, trippara,
|
entrytuple = (tripdate, location, tripname, tripcontent,
|
||||||
trippeople, expedition, tu, tid)
|
trippeople, expedition, tu, tid)
|
||||||
logentries.append(entrytuple)
|
logentries.append(entrytuple)
|
||||||
|
|
||||||
|
|
||||||
def LoadLogbookForExpedition(expedition, clean=True):
|
def LoadLogbookForExpedition(expedition, clean=True):
|
||||||
""" Parses all logbook entries for one expedition
|
""" Parses all logbook entries for one expedition
|
||||||
|
if clean==True then it deletes all entries for this year first.
|
||||||
"""
|
"""
|
||||||
global logentries
|
global logentries
|
||||||
# absolutely horrid. REFACTOR THIS (all my fault..)
|
# absolutely horrid. REFACTOR THIS (all my fault..)
|
||||||
@ -580,13 +610,13 @@ def LoadLogbookForExpedition(expedition, clean=True):
|
|||||||
|
|
||||||
if year in yearlinks:
|
if year in yearlinks:
|
||||||
yearfile, yearparser = yearlinks[year]
|
yearfile, yearparser = yearlinks[year]
|
||||||
logbookpath = Path(expologbase) / year / yearfile
|
logbookpath = Path(yearfile)
|
||||||
expedition.logbookfile = yearfile
|
expedition.logbookfile = yearfile
|
||||||
parsefunc = yearparser
|
parsefunc = yearparser
|
||||||
# print(f" - Logbook file {yearfile} using parser {yearparser}")
|
# print(f" - Logbook file {yearfile} using parser {yearparser}")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
logbookpath = Path(expologbase) / year / DEFAULT_LOGBOOK_FILE
|
logbookpath = Path(DEFAULT_LOGBOOK_FILE)
|
||||||
expedition.logbookfile = DEFAULT_LOGBOOK_FILE
|
expedition.logbookfile = DEFAULT_LOGBOOK_FILE
|
||||||
parsefunc = DEFAULT_LOGBOOK_PARSER
|
parsefunc = DEFAULT_LOGBOOK_PARSER
|
||||||
|
|
||||||
@ -597,34 +627,39 @@ def LoadLogbookForExpedition(expedition, clean=True):
|
|||||||
for lbe in lbes:
|
for lbe in lbes:
|
||||||
lbe.delete()
|
lbe.delete()
|
||||||
|
|
||||||
try:
|
for sq in ["", "2", "3", "4"]: # cope with blog saved as many separate files
|
||||||
file_in = open(logbookpath,'rb')
|
lb = Path(expologbase, year, logbookpath.stem + sq + logbookpath.suffix)
|
||||||
txt = file_in.read().decode("utf-8")
|
if not (lb.is_file()):
|
||||||
file_in.close()
|
# print(f" ! End of blog. Next blog file in sequence not there:{lb}")
|
||||||
logbook_parseable = True
|
break
|
||||||
except (IOError):
|
|
||||||
logbook_parseable = False
|
|
||||||
print(" ! Couldn't open logbook as UTF-8 " + logbookpath)
|
|
||||||
except:
|
|
||||||
logbook_parseable = False
|
|
||||||
print(" ! Very Bad Error opening " + logbookpath)
|
|
||||||
|
|
||||||
if logbook_parseable:
|
|
||||||
parser = globals()[parsefunc]
|
|
||||||
print(f' - {year} parsing with {parsefunc}')
|
|
||||||
parser(year, expedition, txt) # this launches the right parser for this year
|
|
||||||
|
|
||||||
i=0
|
|
||||||
for entrytuple in logentries:
|
|
||||||
# date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, tripid1 = entrytuple
|
|
||||||
try:
|
try:
|
||||||
date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, tripid1 = entrytuple
|
with open(lb,'rb') as file_in:
|
||||||
except ValueError: # cope with removal of entry_type but still in cache files. Remove in Dec. 2022.
|
txt = file_in.read().decode("utf-8")
|
||||||
date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, entry_type, tripid1 = entrytuple
|
logbook_parseable = True
|
||||||
print(f' - Exception entry_type "{entry_type}" {tripid1}')
|
except (IOError):
|
||||||
EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, 0,
|
logbook_parseable = False
|
||||||
tripid1)
|
print(f" ! Couldn't open logbook as UTF-8 {lb}")
|
||||||
i +=1
|
except:
|
||||||
|
logbook_parseable = False
|
||||||
|
print(f" ! Very Bad Error opening {lb}")
|
||||||
|
|
||||||
|
if logbook_parseable:
|
||||||
|
|
||||||
|
# --------------------
|
||||||
|
parser = globals()[parsefunc]
|
||||||
|
print(f' - {year} parsing with {parsefunc} - {lb}')
|
||||||
|
parser(year, expedition, txt, sq) # this launches the right parser for this year
|
||||||
|
# --------------------
|
||||||
|
|
||||||
|
for entrytuple in logentries:
|
||||||
|
# date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, tripid1 = entrytuple
|
||||||
|
try:
|
||||||
|
date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, tripid1 = entrytuple
|
||||||
|
except ValueError: # cope with removal of entry_type but still in cache files. Remove in Dec. 2022.
|
||||||
|
date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, entry_type, tripid1 = entrytuple
|
||||||
|
print(f' - Exception entry_type "{entry_type}" {tripid1}')
|
||||||
|
EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, 0,
|
||||||
|
tripid1)
|
||||||
|
|
||||||
if len(logentries) == expect:
|
if len(logentries) == expect:
|
||||||
# print(f"OK {year} {len(logentries):5d} is {expect}\n")
|
# print(f"OK {year} {len(logentries):5d} is {expect}\n")
|
||||||
@ -634,19 +669,19 @@ def LoadLogbookForExpedition(expedition, clean=True):
|
|||||||
|
|
||||||
return len(logentries)
|
return len(logentries)
|
||||||
|
|
||||||
def LoadLogbook(year, format="cucc"):
|
# def LoadLogbook(year, format="cucc"):
|
||||||
global LOGBOOK_PARSER_SETTINGS
|
# global LOGBOOK_PARSER_SETTINGS
|
||||||
|
|
||||||
nlbe={}
|
# nlbe={}
|
||||||
TROG['pagecache']['expedition'][year] = None # clear cache
|
# TROG['pagecache']['expedition'][year] = None # clear cache
|
||||||
|
|
||||||
expo = Expedition.objects.get(year=year)
|
# expo = Expedition.objects.get(year=year)
|
||||||
|
|
||||||
if (format=="blog"):
|
# if (format=="blog"):
|
||||||
LOGBOOK_PARSER_SETTINGS[str(year)] = ("ukcavingblog.html", "parser_blog")
|
# LOGBOOK_PARSER_SETTINGS[str(year)] = BLOG_PARSER_SETTINGS[str(year)]
|
||||||
# print(f" - Logbook file {LOGBOOK_PARSER_SETTINGS[str(year)][0]} using parser {LOGBOOK_PARSER_SETTINGS[str(year)][1]}")
|
# # print(f" - Logbook file {LOGBOOK_PARSER_SETTINGS[str(year)][0]} using parser {LOGBOOK_PARSER_SETTINGS[str(year)][1]}")
|
||||||
|
|
||||||
nlbe[expo] = LoadLogbookForExpedition(expo) # this actually loads the logbook for one expo
|
# nlbe[expo] = LoadLogbookForExpedition(expo) # this actually loads the logbook for one expo
|
||||||
|
|
||||||
def LoadLogbooks():
|
def LoadLogbooks():
|
||||||
""" This is the master function for parsing all logbooks into the Troggle database.
|
""" This is the master function for parsing all logbooks into the Troggle database.
|
||||||
@ -671,7 +706,7 @@ def LoadLogbooks():
|
|||||||
sqlfail = ["1987", "1988", "1989"] # breaks mysql with db constraint fail - debug locally first]
|
sqlfail = ["1987", "1988", "1989"] # breaks mysql with db constraint fail - debug locally first]
|
||||||
nologbook = noexpo + lostlogbook + sqlfail
|
nologbook = noexpo + lostlogbook + sqlfail
|
||||||
|
|
||||||
blogs = ["2019"]
|
# blogs = ["2019"]
|
||||||
|
|
||||||
nlbe={}
|
nlbe={}
|
||||||
expd ={}
|
expd ={}
|
||||||
@ -694,17 +729,21 @@ def LoadLogbooks():
|
|||||||
else:
|
else:
|
||||||
print(" - No Logbook yet for: " + year) # catch case when preparing for next expo
|
print(" - No Logbook yet for: " + year) # catch case when preparing for next expo
|
||||||
|
|
||||||
if year in blogs:
|
if year in BLOG_PARSER_SETTINGS:
|
||||||
bloglist.append(expo)
|
bloglist.append(expo)
|
||||||
|
|
||||||
|
|
||||||
for ex in loglist:
|
for ex in loglist:
|
||||||
nlbe[ex] = LoadLogbookForExpedition(ex) # this actually loads the logbook for one expo
|
nlbe[ex] = LoadLogbookForExpedition(ex) # this loads the logbook for one expo
|
||||||
|
|
||||||
for b in bloglist:
|
for b in bloglist:
|
||||||
orig = LOGBOOK_PARSER_SETTINGS[str(b)]
|
if str(b) in LOGBOOK_PARSER_SETTINGS:
|
||||||
LOGBOOK_PARSER_SETTINGS[str(b)] = ("ukcavingblog.html", "parser_blog")
|
orig = LOGBOOK_PARSER_SETTINGS[str(b)]
|
||||||
nlbe[b] = LoadLogbookForExpedition(b, clean=False) # this actually loads the logbook for one expo
|
else:
|
||||||
|
orig = (DEFAULT_LOGBOOK_FILE, DEFAULT_LOGBOOK_PARSER)
|
||||||
|
LOGBOOK_PARSER_SETTINGS[str(b)] = BLOG_PARSER_SETTINGS[str(b)]
|
||||||
|
print(f" - BLOG: {b}")
|
||||||
|
nlbe[b] = LoadLogbookForExpedition(b, clean=False) # this loads the blog logbook for one expo
|
||||||
LOGBOOK_PARSER_SETTINGS[str(b)] = orig
|
LOGBOOK_PARSER_SETTINGS[str(b)] = orig
|
||||||
|
|
||||||
# tried to use map with concurrent threads - but sqlite database is not concurrent, so failed with database lock
|
# tried to use map with concurrent threads - but sqlite database is not concurrent, so failed with database lock
|
||||||
|
@ -4,14 +4,15 @@
|
|||||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||||
<title>{{logbook_entries.0.expedition}} Expo Logbook</title>
|
<title>{{logbook_entries.0.expedition}} Expo Logbook</title>
|
||||||
<link rel="stylesheet" href="../../css/main2.css" />
|
<link rel="stylesheet" href="../../css/main2.css" />
|
||||||
|
<style>figure {font-weight: bold; font-size: small; font-family: sans-serif;font-variant-caps: small-caps;}</style>
|
||||||
</head>
|
</head>
|
||||||
<!-- Exported by troggle in this format after having been imported using a different format and a different parser.
|
<!-- Exported by troggle in this format after having been imported using a different format and a different
|
||||||
This is because we are steadily converting old formats to a new common format so that we do not need to maintain half
|
parser. This is because we are steadily converting old formats to a new common format so that we do not need to
|
||||||
a dozen parser functions.
|
maintain half a dozen parser functions.
|
||||||
|
|
||||||
Exported on {% now 'Y-m-d D' %} using control panel webpage and exportlogbook() in troggle/code/views/other.py
|
Exported on {% now 'Y-m-d D' %} using control panel webpage and exportlogbook() in troggle/code/views/other.py
|
||||||
-->
|
-->
|
||||||
<body>
|
<body>
|
||||||
<h1>Expo {{logbook_entries.0.expedition}}</h1>
|
|
||||||
{%for logbook_entry in logbook_entries%}
|
{%for logbook_entry in logbook_entries%}
|
||||||
<hr />
|
<hr />
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user