forked from expo/troggle
blog parsing working
This commit is contained in:
@@ -525,7 +525,7 @@ def parser_blog(year, expedition, txt):
|
|||||||
datestamp = match_datetime.group(1)
|
datestamp = match_datetime.group(1)
|
||||||
|
|
||||||
tripdate = datetime.fromisoformat(datestamp)
|
tripdate = datetime.fromisoformat(datestamp)
|
||||||
print(f" - tid: {tid} '{trippeople}' '{tripdate}'")
|
# print(f" - tid: {tid} '{trippeople}' '{tripdate}'")
|
||||||
|
|
||||||
tripname = f"UK Caving Blog post {logbook_entry_count}" # must be unique for a given date
|
tripname = f"UK Caving Blog post {logbook_entry_count}" # must be unique for a given date
|
||||||
|
|
||||||
@@ -534,7 +534,7 @@ def parser_blog(year, expedition, txt):
|
|||||||
logentries.append(entrytuple)
|
logentries.append(entrytuple)
|
||||||
|
|
||||||
|
|
||||||
def LoadLogbookForExpedition(expedition):
|
def LoadLogbookForExpedition(expedition, clean=True):
|
||||||
""" Parses all logbook entries for one expedition
|
""" Parses all logbook entries for one expedition
|
||||||
"""
|
"""
|
||||||
global logentries
|
global logentries
|
||||||
@@ -571,15 +571,15 @@ def LoadLogbookForExpedition(expedition):
|
|||||||
dellist.append(key)
|
dellist.append(key)
|
||||||
for i in dellist:
|
for i in dellist:
|
||||||
del logdataissues[i]
|
del logdataissues[i]
|
||||||
|
if (clean):
|
||||||
cleanerrors(year)
|
cleanerrors(year)
|
||||||
|
|
||||||
if year in yearlinks:
|
if year in yearlinks:
|
||||||
yearfile, yearparser = yearlinks[year]
|
yearfile, yearparser = yearlinks[year]
|
||||||
logbookpath = Path(expologbase) / year / yearfile
|
logbookpath = Path(expologbase) / year / yearfile
|
||||||
expedition.logbookfile = yearfile
|
expedition.logbookfile = yearfile
|
||||||
parsefunc = yearparser
|
parsefunc = yearparser
|
||||||
print(f" - Logbook file {yearfile} using parser {yearparser}")
|
# print(f" - Logbook file {yearfile} using parser {yearparser}")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
logbookpath = Path(expologbase) / year / DEFAULT_LOGBOOK_FILE
|
logbookpath = Path(expologbase) / year / DEFAULT_LOGBOOK_FILE
|
||||||
@@ -589,8 +589,9 @@ def LoadLogbookForExpedition(expedition):
|
|||||||
expedition.save()
|
expedition.save()
|
||||||
|
|
||||||
lbes = LogbookEntry.objects.filter(expedition=expedition)
|
lbes = LogbookEntry.objects.filter(expedition=expedition)
|
||||||
for lbe in lbes:
|
if (clean):
|
||||||
lbe.delete()
|
for lbe in lbes:
|
||||||
|
lbe.delete()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
file_in = open(logbookpath,'rb')
|
file_in = open(logbookpath,'rb')
|
||||||
@@ -659,15 +660,19 @@ def LoadLogbooks():
|
|||||||
DataIssue.objects.create(parser='logbooks', message=message)
|
DataIssue.objects.create(parser='logbooks', message=message)
|
||||||
logdataissues[f"sqlfail 0000"]=message
|
logdataissues[f"sqlfail 0000"]=message
|
||||||
print(message)
|
print(message)
|
||||||
|
return
|
||||||
|
|
||||||
noexpo = ["1986", "2020", "2021",] #no expo
|
noexpo = ["1986", "2020", "2021",] #no expo
|
||||||
lostlogbook = ["1976", "1977", "1978", "1979", "1980", "1981"]
|
lostlogbook = ["1976", "1977", "1978", "1979", "1980", "1981"]
|
||||||
sqlfail = ["1987", "1988", "1989"] # breaks mysql with db constraint fail - debug locally first]
|
sqlfail = ["1987", "1988", "1989"] # breaks mysql with db constraint fail - debug locally first]
|
||||||
nologbook = noexpo + lostlogbook + sqlfail
|
nologbook = noexpo + lostlogbook + sqlfail
|
||||||
|
|
||||||
|
blogs = ["2019"]
|
||||||
|
|
||||||
nlbe={}
|
nlbe={}
|
||||||
expd ={}
|
expd ={}
|
||||||
actuals = []
|
loglist = []
|
||||||
|
bloglist = []
|
||||||
|
|
||||||
for expo in expos: # pointless as we explicitly know the years in this code.
|
for expo in expos: # pointless as we explicitly know the years in this code.
|
||||||
year = expo.year
|
year = expo.year
|
||||||
@@ -681,16 +686,26 @@ def LoadLogbooks():
|
|||||||
|
|
||||||
if year not in nologbook:
|
if year not in nologbook:
|
||||||
if year in entries:
|
if year in entries:
|
||||||
actuals.append(expo)
|
loglist.append(expo)
|
||||||
else:
|
else:
|
||||||
print(" - No Logbook yet for: " + year) # catch case when preparing for next expo
|
print(" - No Logbook yet for: " + year) # catch case when preparing for next expo
|
||||||
|
|
||||||
for ex in actuals:
|
if year in blogs:
|
||||||
|
bloglist.append(expo)
|
||||||
|
|
||||||
|
|
||||||
|
for ex in loglist:
|
||||||
nlbe[ex] = LoadLogbookForExpedition(ex) # this actually loads the logbook for one expo
|
nlbe[ex] = LoadLogbookForExpedition(ex) # this actually loads the logbook for one expo
|
||||||
|
|
||||||
|
for b in bloglist:
|
||||||
|
orig = LOGBOOK_PARSER_SETTINGS[str(b)]
|
||||||
|
LOGBOOK_PARSER_SETTINGS[str(b)] = ("ukcavingblog.html", "parser_blog")
|
||||||
|
nlbe[b] = LoadLogbookForExpedition(b, clean=False) # this actually loads the logbook for one expo
|
||||||
|
LOGBOOK_PARSER_SETTINGS[str(b)] = orig
|
||||||
|
|
||||||
# tried to use map with concurrent threads - but sqlite database is not concurrent, so failed with database lock
|
# tried to use map with concurrent threads - but sqlite database is not concurrent, so failed with database lock
|
||||||
# yt = 0
|
# yt = 0
|
||||||
# for r in map(LoadLogbookForExpedition, actuals):
|
# for r in map(LoadLogbookForExpedition, loglist):
|
||||||
# yt = r
|
# yt = r
|
||||||
|
|
||||||
yt = 0
|
yt = 0
|
||||||
|
|||||||
Reference in New Issue
Block a user