forked from expo/troggle
no search before db object creation: faster
This commit is contained in:
parent
e4c804b305
commit
db0504057b
@ -40,7 +40,7 @@ def import_logbooks():
|
|||||||
troggle.parsers.logbooks.LoadLogbooks()
|
troggle.parsers.logbooks.LoadLogbooks()
|
||||||
|
|
||||||
|
|
||||||
def import_logbook(year=2016):
|
def import_logbook(year=2022):
|
||||||
print(f"-- Importing Logbook {year}")
|
print(f"-- Importing Logbook {year}")
|
||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
troggle.parsers.logbooks.LoadLogbook(year)
|
troggle.parsers.logbooks.LoadLogbook(year)
|
||||||
|
@ -205,25 +205,18 @@ def tidy_trip_persons(trippeople, expedition, logtime_underground, tid):
|
|||||||
|
|
||||||
return trippersons, author
|
return trippersons, author
|
||||||
|
|
||||||
def store_entry_into_database(date, place, title, text, trippersons, author, expedition, logtime_underground, tid=None):
|
def tidy_trip_cave(place):
|
||||||
"""saves a single logbook entry and related persontrips
|
# GetCaveLookup() need to work better. None of this data is *used* though?
|
||||||
Does NOT save the expeditionday_id - all NULLs. why? Because we are deprecating expeditionday !
|
# 'tripcave' is converted to a string doing this, which renders as the cave slug.
|
||||||
|
|
||||||
troggle.log shows that we are creating lots of duplicates, which is no no problem with SQL as they just overwrite but we are saving the same thing too many times..
|
|
||||||
|
|
||||||
Until 18 Dec.2022, this was overwriting logbook entries for the same date with the same title, because
|
|
||||||
lookupAttribs={'date':date, 'title':title}
|
|
||||||
"""
|
|
||||||
|
|
||||||
# This needs attention. The slug field is derived from 'title'
|
|
||||||
# both GetCaveLookup() and GetTripCave() need to work together better. None of this data is *used* though?
|
|
||||||
# tripCave = GetTripCave(place):
|
|
||||||
|
|
||||||
lplace = place.lower()
|
lplace = place.lower()
|
||||||
cave = None
|
cave = None
|
||||||
if lplace not in noncaveplaces:
|
if lplace not in noncaveplaces:
|
||||||
cave = GetCaveLookup().get(lplace)
|
cave = GetCaveLookup().get(lplace)
|
||||||
|
|
||||||
|
return cave
|
||||||
|
|
||||||
|
def tidy_trip_image_urls(text, date):
|
||||||
y = str(date)[:4]
|
y = str(date)[:4]
|
||||||
|
|
||||||
text = text.replace(' src="', f' src="/years/{y}/')
|
text = text.replace(' src="', f' src="/years/{y}/')
|
||||||
@ -234,13 +227,26 @@ def store_entry_into_database(date, place, title, text, trippersons, author, exp
|
|||||||
|
|
||||||
text = text.replace("\t", "")
|
text = text.replace("\t", "")
|
||||||
text = text.replace("\n\n\n", "\n\n")
|
text = text.replace("\n\n\n", "\n\n")
|
||||||
|
return text
|
||||||
|
|
||||||
|
def store_entry_into_database(date, place, tripcave, title, text, trippersons, author, expedition, logtime_underground, tid=None):
|
||||||
|
"""saves a single logbook entry and related persontrips
|
||||||
|
Does NOT save the expeditionday_id - all NULLs. why? Because we are deprecating expeditionday !
|
||||||
|
|
||||||
|
troggle.log shows that we are creating lots of duplicates, which is no no problem with SQL as they just overwrite but we are saving the same thing too many times..
|
||||||
|
|
||||||
|
Until 18 Dec.2022, this was overwriting logbook entries for the same date with the same title, because
|
||||||
|
lookupAttribs={'date':date, 'title':title}
|
||||||
|
"""
|
||||||
|
text = tidy_trip_image_urls(text, date)
|
||||||
|
|
||||||
# Check for an existing copy of the current entry, and save
|
# Check for an existing copy of the current entry, and save
|
||||||
expedition.get_expedition_day(date)
|
expedition.get_expedition_day(date)
|
||||||
|
|
||||||
lookupAttribs = {"date": date, "title": title}
|
lookupAttribs = {"date": date, "title": title}
|
||||||
# 'cave' is converted to a string doing this, which renders as the cave slug.
|
|
||||||
# but it is a db query which we should try to avoid - rewrite this
|
# but it is a db query which we should try to avoid - rewrite this
|
||||||
|
|
||||||
|
# This needs attention. The slug field is derived from 'title'
|
||||||
# NEW slug for a logbook entry here! Unique id + slugified title fragment
|
# NEW slug for a logbook entry here! Unique id + slugified title fragment
|
||||||
|
|
||||||
if tid is not None:
|
if tid is not None:
|
||||||
@ -253,12 +259,13 @@ def store_entry_into_database(date, place, title, text, trippersons, author, exp
|
|||||||
"text": text,
|
"text": text,
|
||||||
"expedition": expedition,
|
"expedition": expedition,
|
||||||
"time_underground": logtime_underground,
|
"time_underground": logtime_underground,
|
||||||
"cave_slug": str(cave),
|
"cave_slug": str(tripcave),
|
||||||
"slug": slug,
|
"slug": slug,
|
||||||
}
|
}
|
||||||
|
# Rewriting as we know prior objects have already been deleted.
|
||||||
# This creates the lbo instance of LogbookEntry
|
# This creates the lbo instance of LogbookEntry
|
||||||
lbo, created = save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs)
|
lbo = LogbookEntry.objects.create(**nonLookupAttribs, **lookupAttribs)
|
||||||
|
# lbo, created = save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs)
|
||||||
|
|
||||||
# for PersonTrip time_underground is float (decimal hours)
|
# for PersonTrip time_underground is float (decimal hours)
|
||||||
for tripperson, time_underground in trippersons:
|
for tripperson, time_underground in trippersons:
|
||||||
@ -266,7 +273,8 @@ def store_entry_into_database(date, place, title, text, trippersons, author, exp
|
|||||||
lookupAttribs = {"personexpedition": tripperson, "logbook_entry": lbo}
|
lookupAttribs = {"personexpedition": tripperson, "logbook_entry": lbo}
|
||||||
nonLookupAttribs = {"time_underground": time_underground, "is_logbook_entry_author": (tripperson == author)}
|
nonLookupAttribs = {"time_underground": time_underground, "is_logbook_entry_author": (tripperson == author)}
|
||||||
# this creates the PersonTrip instance.
|
# this creates the PersonTrip instance.
|
||||||
save_carefully(PersonTrip, lookupAttribs, nonLookupAttribs)
|
pt = PersonTrip.objects.create(**nonLookupAttribs, **lookupAttribs)
|
||||||
|
# save_carefully(PersonTrip, lookupAttribs, nonLookupAttribs)
|
||||||
|
|
||||||
def parser_date(tripdate, year):
|
def parser_date(tripdate, year):
|
||||||
"""Interprets dates in the expo logbooks and returns a correct datetime.date object"""
|
"""Interprets dates in the expo logbooks and returns a correct datetime.date object"""
|
||||||
@ -386,9 +394,9 @@ def parser_html(year, expedition, txt, seq=""):
|
|||||||
ldate = parser_date(tripdate.strip(), year)
|
ldate = parser_date(tripdate.strip(), year)
|
||||||
triptitles = triptitle.split(" - ")
|
triptitles = triptitle.split(" - ")
|
||||||
if len(triptitles) >= 2:
|
if len(triptitles) >= 2:
|
||||||
tripcave = triptitles[0]
|
place = triptitles[0]
|
||||||
else:
|
else:
|
||||||
tripcave = "UNKNOWN"
|
place = "UNKNOWN"
|
||||||
ltriptext = re.sub(r"</p>", "", triptext)
|
ltriptext = re.sub(r"</p>", "", triptext)
|
||||||
# ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext)
|
# ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext)
|
||||||
ltriptext = re.sub(r"<p>", "<br /><br />", ltriptext).strip()
|
ltriptext = re.sub(r"<p>", "<br /><br />", ltriptext).strip()
|
||||||
@ -404,9 +412,10 @@ def parser_html(year, expedition, txt, seq=""):
|
|||||||
dupl[check] = 1
|
dupl[check] = 1
|
||||||
|
|
||||||
tu = tidy_time_underground(tu)
|
tu = tidy_time_underground(tu)
|
||||||
|
|
||||||
trippersons, author = tidy_trip_persons(trippeople, expedition, tu, tid)
|
trippersons, author = tidy_trip_persons(trippeople, expedition, tu, tid)
|
||||||
entrytuple = (ldate, tripcave, triptitle, ltriptext, trippersons, author, expedition, tu, tripid1)
|
tripcave = tidy_trip_cave(place)
|
||||||
|
|
||||||
|
entrytuple = (ldate, place, tripcave, triptitle, ltriptext, trippersons, author, expedition, tu, tripid1)
|
||||||
logentries.append(entrytuple)
|
logentries.append(entrytuple)
|
||||||
|
|
||||||
|
|
||||||
@ -494,7 +503,7 @@ def parser_blog(year, expedition, txt, sq=""):
|
|||||||
# print(f" - tid: {tid} '{trippeople}' '{tripdate}'")
|
# print(f" - tid: {tid} '{trippeople}' '{tripdate}'")
|
||||||
|
|
||||||
# triptitle must have the location then a hyphen at the beginning as it is ignored by export function. We can't know what this is, so we set it as 'Expo' and 'Unknown'.
|
# triptitle must have the location then a hyphen at the beginning as it is ignored by export function. We can't know what this is, so we set it as 'Expo' and 'Unknown'.
|
||||||
location = "Unknown"
|
place = "Unknown"
|
||||||
# triptitle must be unique for a given date. We can enforce this here.
|
# triptitle must be unique for a given date. We can enforce this here.
|
||||||
triptitle = f"Expo - UK Caving Blog{sq} post {logbook_entry_count}"
|
triptitle = f"Expo - UK Caving Blog{sq} post {logbook_entry_count}"
|
||||||
tripcontent = re.sub(r"(width=\"\d+\")", "", tripcontent)
|
tripcontent = re.sub(r"(width=\"\d+\")", "", tripcontent)
|
||||||
@ -505,7 +514,9 @@ def parser_blog(year, expedition, txt, sq=""):
|
|||||||
tripcontent = f"\n\n<!-- Content parsed from UK Caving Blog -->\nBlog Author: {trippeople}" + tripcontent
|
tripcontent = f"\n\n<!-- Content parsed from UK Caving Blog -->\nBlog Author: {trippeople}" + tripcontent
|
||||||
|
|
||||||
trippersons, author = tidy_trip_persons(trippeople, expedition, logtime_underground, tid)
|
trippersons, author = tidy_trip_persons(trippeople, expedition, logtime_underground, tid)
|
||||||
entrytuple = (tripdate, location, triptitle, tripcontent, trippersons, author, expedition, tu, tid)
|
tripcave = tidy_trip_cave(place)
|
||||||
|
|
||||||
|
entrytuple = (tripdate, place, tripcave, triptitle, tripcontent, trippersons, author, expedition, tu, tid)
|
||||||
logentries.append(entrytuple)
|
logentries.append(entrytuple)
|
||||||
|
|
||||||
def clean_all_logbooks():
|
def clean_all_logbooks():
|
||||||
@ -581,10 +592,6 @@ def parse_logbook_for_expedition(expedition, blog=False):
|
|||||||
print(f" - {year} parsing with {parsefunc} - {lb}")
|
print(f" - {year} parsing with {parsefunc} - {lb}")
|
||||||
parser(year, expedition, txt, sq) # this launches the right parser for this year
|
parser(year, expedition, txt, sq) # this launches the right parser for this year
|
||||||
# --------------------
|
# --------------------
|
||||||
# move database storage into separate step
|
|
||||||
# for entrytuple in logentries:
|
|
||||||
# date, tripcave, triptitle, text, trippersons, author, expedition, logtime_underground, tripid1 = entrytuple
|
|
||||||
# store_entry_into_database(date, tripcave, triptitle, text, trippersons, author, expedition, logtime_underground, tripid1)
|
|
||||||
|
|
||||||
if len(logentries) == expect:
|
if len(logentries) == expect:
|
||||||
# print(f"OK {year} {len(logentries):5d} is {expect}\n")
|
# print(f"OK {year} {len(logentries):5d} is {expect}\n")
|
||||||
@ -614,10 +621,12 @@ def LoadLogbook(year):
|
|||||||
f" - Not a year with extant blog entries to import: '{year}' not in BLOG_PARSER_SETTINGS {BLOG_PARSER_SETTINGS}"
|
f" - Not a year with extant blog entries to import: '{year}' not in BLOG_PARSER_SETTINGS {BLOG_PARSER_SETTINGS}"
|
||||||
)
|
)
|
||||||
for entrytuple in logentries:
|
for entrytuple in logentries:
|
||||||
date, tripcave, triptitle, text, trippersons, author, expedition, logtime_underground, tripid1 = entrytuple
|
date, place, tripcave, triptitle, text, trippersons, author, expedition, logtime_underground, tripid1 = entrytuple
|
||||||
if expo == expedition:
|
if expo == expedition: # unneeded check, we zeroed it bbefore filling it
|
||||||
#print(f" - {triptitle}")
|
#print(f" - {triptitle}")
|
||||||
store_entry_into_database(date, tripcave, triptitle, text, trippersons, author, expedition, logtime_underground, tripid1)
|
store_entry_into_database(date, place, tripcave, triptitle, text, trippersons, author, expedition, logtime_underground, tripid1)
|
||||||
|
else:
|
||||||
|
print(f" ! unexpected log entry labelled as '{expedition}' {tripid1}" )
|
||||||
expedition.save() # to save logbook name property
|
expedition.save() # to save logbook name property
|
||||||
|
|
||||||
def LoadLogbooks():
|
def LoadLogbooks():
|
||||||
@ -703,8 +712,8 @@ def LoadLogbooks():
|
|||||||
# - LogBookEntry (text, who when etc.)
|
# - LogBookEntry (text, who when etc.)
|
||||||
# - PersonTrip (who was on that specific trip mentione din the logbook entry)
|
# - PersonTrip (who was on that specific trip mentione din the logbook entry)
|
||||||
for entrytuple in allentries:
|
for entrytuple in allentries:
|
||||||
date, tripcave, triptitle, text, trippersons, author, expedition, logtime_underground, tripid1 = entrytuple
|
date, place, tripcave, triptitle, text, trippersons, author, expedition, logtime_underground, tripid1 = entrytuple
|
||||||
store_entry_into_database(date, tripcave, triptitle, text, trippersons, author, expedition, logtime_underground, tripid1)
|
store_entry_into_database(date, place, tripcave, triptitle, text, trippersons, author, expedition, logtime_underground, tripid1)
|
||||||
|
|
||||||
for expo in expos:
|
for expo in expos:
|
||||||
expedition.save() # to save logbook name property
|
expedition.save() # to save logbook name property
|
||||||
|
Loading…
Reference in New Issue
Block a user