blog parsing working

This commit is contained in:
Philip Sargent 2022-12-15 00:35:48 +00:00
parent cb50528e2d
commit 5cc6c26606

View File

@ -525,7 +525,7 @@ def parser_blog(year, expedition, txt):
datestamp = match_datetime.group(1)
tripdate = datetime.fromisoformat(datestamp)
print(f" - tid: {tid} '{trippeople}' '{tripdate}'")
# print(f" - tid: {tid} '{trippeople}' '{tripdate}'")
tripname = f"UK Caving Blog post {logbook_entry_count}" # must be unique for a given date
@ -534,7 +534,7 @@ def parser_blog(year, expedition, txt):
logentries.append(entrytuple)
def LoadLogbookForExpedition(expedition):
def LoadLogbookForExpedition(expedition, clean=True):
""" Parses all logbook entries for one expedition
"""
global logentries
@ -571,15 +571,15 @@ def LoadLogbookForExpedition(expedition):
dellist.append(key)
for i in dellist:
del logdataissues[i]
cleanerrors(year)
if (clean):
cleanerrors(year)
if year in yearlinks:
yearfile, yearparser = yearlinks[year]
logbookpath = Path(expologbase) / year / yearfile
expedition.logbookfile = yearfile
parsefunc = yearparser
print(f" - Logbook file {yearfile} using parser {yearparser}")
# print(f" - Logbook file {yearfile} using parser {yearparser}")
else:
logbookpath = Path(expologbase) / year / DEFAULT_LOGBOOK_FILE
@ -589,8 +589,9 @@ def LoadLogbookForExpedition(expedition):
expedition.save()
lbes = LogbookEntry.objects.filter(expedition=expedition)
for lbe in lbes:
lbe.delete()
if (clean):
for lbe in lbes:
lbe.delete()
try:
file_in = open(logbookpath,'rb')
@ -659,15 +660,19 @@ def LoadLogbooks():
DataIssue.objects.create(parser='logbooks', message=message)
logdataissues[f"sqlfail 0000"]=message
print(message)
return
noexpo = ["1986", "2020", "2021",] #no expo
lostlogbook = ["1976", "1977", "1978", "1979", "1980", "1981"]
sqlfail = ["1987", "1988", "1989"] # breaks mysql with db constraint fail - debug locally first]
nologbook = noexpo + lostlogbook + sqlfail
blogs = ["2019"]
nlbe={}
expd ={}
actuals = []
loglist = []
bloglist = []
for expo in expos: # pointless as we explicitly know the years in this code.
year = expo.year
@ -681,16 +686,26 @@ def LoadLogbooks():
if year not in nologbook:
if year in entries:
actuals.append(expo)
loglist.append(expo)
else:
print(" - No Logbook yet for: " + year) # catch case when preparing for next expo
if year in blogs:
bloglist.append(expo)
for ex in actuals:
for ex in loglist:
nlbe[ex] = LoadLogbookForExpedition(ex) # this actually loads the logbook for one expo
for b in bloglist:
orig = LOGBOOK_PARSER_SETTINGS[str(b)]
LOGBOOK_PARSER_SETTINGS[str(b)] = ("ukcavingblog.html", "parser_blog")
nlbe[b] = LoadLogbookForExpedition(b, clean=False) # this actually loads the logbook for one expo
LOGBOOK_PARSER_SETTINGS[str(b)] = orig
# tried to use map with concurrent threads - but sqlite database is not concurrent, so failed with database lock
# yt = 0
# for r in map(LoadLogbookForExpedition, actuals):
# for r in map(LoadLogbookForExpedition, loglist):
# yt = r
yt = 0