forked from expo/troggle
blog parsing working
This commit is contained in:
parent
cb50528e2d
commit
5cc6c26606
@ -525,7 +525,7 @@ def parser_blog(year, expedition, txt):
|
||||
datestamp = match_datetime.group(1)
|
||||
|
||||
tripdate = datetime.fromisoformat(datestamp)
|
||||
print(f" - tid: {tid} '{trippeople}' '{tripdate}'")
|
||||
# print(f" - tid: {tid} '{trippeople}' '{tripdate}'")
|
||||
|
||||
tripname = f"UK Caving Blog post {logbook_entry_count}" # must be unique for a given date
|
||||
|
||||
@ -534,7 +534,7 @@ def parser_blog(year, expedition, txt):
|
||||
logentries.append(entrytuple)
|
||||
|
||||
|
||||
def LoadLogbookForExpedition(expedition):
|
||||
def LoadLogbookForExpedition(expedition, clean=True):
|
||||
""" Parses all logbook entries for one expedition
|
||||
"""
|
||||
global logentries
|
||||
@ -571,15 +571,15 @@ def LoadLogbookForExpedition(expedition):
|
||||
dellist.append(key)
|
||||
for i in dellist:
|
||||
del logdataissues[i]
|
||||
|
||||
cleanerrors(year)
|
||||
if (clean):
|
||||
cleanerrors(year)
|
||||
|
||||
if year in yearlinks:
|
||||
yearfile, yearparser = yearlinks[year]
|
||||
logbookpath = Path(expologbase) / year / yearfile
|
||||
expedition.logbookfile = yearfile
|
||||
parsefunc = yearparser
|
||||
print(f" - Logbook file {yearfile} using parser {yearparser}")
|
||||
# print(f" - Logbook file {yearfile} using parser {yearparser}")
|
||||
|
||||
else:
|
||||
logbookpath = Path(expologbase) / year / DEFAULT_LOGBOOK_FILE
|
||||
@ -589,8 +589,9 @@ def LoadLogbookForExpedition(expedition):
|
||||
expedition.save()
|
||||
|
||||
lbes = LogbookEntry.objects.filter(expedition=expedition)
|
||||
for lbe in lbes:
|
||||
lbe.delete()
|
||||
if (clean):
|
||||
for lbe in lbes:
|
||||
lbe.delete()
|
||||
|
||||
try:
|
||||
file_in = open(logbookpath,'rb')
|
||||
@ -659,15 +660,19 @@ def LoadLogbooks():
|
||||
DataIssue.objects.create(parser='logbooks', message=message)
|
||||
logdataissues[f"sqlfail 0000"]=message
|
||||
print(message)
|
||||
return
|
||||
|
||||
noexpo = ["1986", "2020", "2021",] #no expo
|
||||
lostlogbook = ["1976", "1977", "1978", "1979", "1980", "1981"]
|
||||
sqlfail = ["1987", "1988", "1989"] # breaks mysql with db constraint fail - debug locally first]
|
||||
nologbook = noexpo + lostlogbook + sqlfail
|
||||
|
||||
blogs = ["2019"]
|
||||
|
||||
nlbe={}
|
||||
expd ={}
|
||||
actuals = []
|
||||
loglist = []
|
||||
bloglist = []
|
||||
|
||||
for expo in expos: # pointless as we explicitly know the years in this code.
|
||||
year = expo.year
|
||||
@ -681,16 +686,26 @@ def LoadLogbooks():
|
||||
|
||||
if year not in nologbook:
|
||||
if year in entries:
|
||||
actuals.append(expo)
|
||||
loglist.append(expo)
|
||||
else:
|
||||
print(" - No Logbook yet for: " + year) # catch case when preparing for next expo
|
||||
|
||||
if year in blogs:
|
||||
bloglist.append(expo)
|
||||
|
||||
for ex in actuals:
|
||||
|
||||
for ex in loglist:
|
||||
nlbe[ex] = LoadLogbookForExpedition(ex) # this actually loads the logbook for one expo
|
||||
|
||||
|
||||
for b in bloglist:
|
||||
orig = LOGBOOK_PARSER_SETTINGS[str(b)]
|
||||
LOGBOOK_PARSER_SETTINGS[str(b)] = ("ukcavingblog.html", "parser_blog")
|
||||
nlbe[b] = LoadLogbookForExpedition(b, clean=False) # this actually loads the logbook for one expo
|
||||
LOGBOOK_PARSER_SETTINGS[str(b)] = orig
|
||||
|
||||
# tried to use map with concurrent threads - but sqlite database is not concurrent, so failed with database lock
|
||||
# yt = 0
|
||||
# for r in map(LoadLogbookForExpedition, actuals):
|
||||
# for r in map(LoadLogbookForExpedition, loglist):
|
||||
# yt = r
|
||||
|
||||
yt = 0
|
||||
|
Loading…
Reference in New Issue
Block a user