blog parsing working

This commit is contained in:
2022-12-15 00:35:48 +00:00
parent cb50528e2d
commit 5cc6c26606

View File

@@ -525,7 +525,7 @@ def parser_blog(year, expedition, txt):
datestamp = match_datetime.group(1) datestamp = match_datetime.group(1)
tripdate = datetime.fromisoformat(datestamp) tripdate = datetime.fromisoformat(datestamp)
print(f" - tid: {tid} '{trippeople}' '{tripdate}'") # print(f" - tid: {tid} '{trippeople}' '{tripdate}'")
tripname = f"UK Caving Blog post {logbook_entry_count}" # must be unique for a given date tripname = f"UK Caving Blog post {logbook_entry_count}" # must be unique for a given date
@@ -534,7 +534,7 @@ def parser_blog(year, expedition, txt):
logentries.append(entrytuple) logentries.append(entrytuple)
def LoadLogbookForExpedition(expedition): def LoadLogbookForExpedition(expedition, clean=True):
""" Parses all logbook entries for one expedition """ Parses all logbook entries for one expedition
""" """
global logentries global logentries
@@ -571,15 +571,15 @@ def LoadLogbookForExpedition(expedition):
dellist.append(key) dellist.append(key)
for i in dellist: for i in dellist:
del logdataissues[i] del logdataissues[i]
if (clean):
cleanerrors(year) cleanerrors(year)
if year in yearlinks: if year in yearlinks:
yearfile, yearparser = yearlinks[year] yearfile, yearparser = yearlinks[year]
logbookpath = Path(expologbase) / year / yearfile logbookpath = Path(expologbase) / year / yearfile
expedition.logbookfile = yearfile expedition.logbookfile = yearfile
parsefunc = yearparser parsefunc = yearparser
print(f" - Logbook file {yearfile} using parser {yearparser}") # print(f" - Logbook file {yearfile} using parser {yearparser}")
else: else:
logbookpath = Path(expologbase) / year / DEFAULT_LOGBOOK_FILE logbookpath = Path(expologbase) / year / DEFAULT_LOGBOOK_FILE
@@ -589,8 +589,9 @@ def LoadLogbookForExpedition(expedition):
expedition.save() expedition.save()
lbes = LogbookEntry.objects.filter(expedition=expedition) lbes = LogbookEntry.objects.filter(expedition=expedition)
for lbe in lbes: if (clean):
lbe.delete() for lbe in lbes:
lbe.delete()
try: try:
file_in = open(logbookpath,'rb') file_in = open(logbookpath,'rb')
@@ -659,15 +660,19 @@ def LoadLogbooks():
DataIssue.objects.create(parser='logbooks', message=message) DataIssue.objects.create(parser='logbooks', message=message)
logdataissues[f"sqlfail 0000"]=message logdataissues[f"sqlfail 0000"]=message
print(message) print(message)
return
noexpo = ["1986", "2020", "2021",] #no expo noexpo = ["1986", "2020", "2021",] #no expo
lostlogbook = ["1976", "1977", "1978", "1979", "1980", "1981"] lostlogbook = ["1976", "1977", "1978", "1979", "1980", "1981"]
sqlfail = ["1987", "1988", "1989"] # breaks mysql with db constraint fail - debug locally first] sqlfail = ["1987", "1988", "1989"] # breaks mysql with db constraint fail - debug locally first]
nologbook = noexpo + lostlogbook + sqlfail nologbook = noexpo + lostlogbook + sqlfail
blogs = ["2019"]
nlbe={} nlbe={}
expd ={} expd ={}
actuals = [] loglist = []
bloglist = []
for expo in expos: # pointless as we explicitly know the years in this code. for expo in expos: # pointless as we explicitly know the years in this code.
year = expo.year year = expo.year
@@ -681,16 +686,26 @@ def LoadLogbooks():
if year not in nologbook: if year not in nologbook:
if year in entries: if year in entries:
actuals.append(expo) loglist.append(expo)
else: else:
print(" - No Logbook yet for: " + year) # catch case when preparing for next expo print(" - No Logbook yet for: " + year) # catch case when preparing for next expo
if year in blogs:
bloglist.append(expo)
for ex in actuals:
for ex in loglist:
nlbe[ex] = LoadLogbookForExpedition(ex) # this actually loads the logbook for one expo nlbe[ex] = LoadLogbookForExpedition(ex) # this actually loads the logbook for one expo
for b in bloglist:
orig = LOGBOOK_PARSER_SETTINGS[str(b)]
LOGBOOK_PARSER_SETTINGS[str(b)] = ("ukcavingblog.html", "parser_blog")
nlbe[b] = LoadLogbookForExpedition(b, clean=False) # this actually loads the logbook for one expo
LOGBOOK_PARSER_SETTINGS[str(b)] = orig
# tried to use map with concurrent threads - but sqlite database is not concurrent, so failed with database lock # tried to use map with concurrent threads - but sqlite database is not concurrent, so failed with database lock
# yt = 0 # yt = 0
# for r in map(LoadLogbookForExpedition, actuals): # for r in map(LoadLogbookForExpedition, loglist):
# yt = r # yt = r
yt = 0 yt = 0