mirror of
https://expo.survex.com/repositories/troggle/.git
synced 2025-12-14 01:27:10 +00:00
Deep fix, nonunique ids in logbookentries fixed
This commit is contained in:
@@ -36,7 +36,11 @@ save_carefully() - core function that saves troggle objects in the database
|
|||||||
TROG = {
|
TROG = {
|
||||||
'pagecache' : {
|
'pagecache' : {
|
||||||
'expedition' : {}
|
'expedition' : {}
|
||||||
|
},
|
||||||
|
'issues' : {
|
||||||
|
'logdataissues' : {}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# This is module-level executable. This is a Bad Thing. Especially when it touches the file system.
|
# This is module-level executable. This is a Bad Thing. Especially when it touches the file system.
|
||||||
|
|||||||
@@ -59,16 +59,16 @@ def expedition(request, expeditionname):
|
|||||||
if request.user.is_authenticated:
|
if request.user.is_authenticated:
|
||||||
if "reload" in request.GET:
|
if "reload" in request.GET:
|
||||||
this_expedition = Expedition.objects.get(year=int(expeditionname))
|
this_expedition = Expedition.objects.get(year=int(expeditionname))
|
||||||
# Need to delete the exisitng entries or we get duplication
|
# Need to delete the existing entries or we get duplication
|
||||||
# Need to delete both in the Django ORM and in our own object-store.
|
# Need to delete both in the Django ORM and in our own object-store.
|
||||||
entries = this_expedition.logbookentry_set.all()
|
entries = this_expedition.logbookentry_set.all()
|
||||||
print(f'! - expo {expeditionname} {len(entries)} entries')
|
print(f'! - expo {expeditionname} {len(entries)} entries initially')
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
print(f'! - delete entry: "{entry}"')
|
#print(f'! - delete entry: "{entry}"')
|
||||||
entry.delete()
|
entry.delete()
|
||||||
entries = this_expedition.logbookentry_set.all()
|
entries = this_expedition.logbookentry_set.all()
|
||||||
print(f'! - expo {expeditionname} {len(entries)} entries')
|
print(f'! - expo {expeditionname} {len(entries)} entries after deletion')
|
||||||
LoadLogbookForExpedition(this_expedition, 0) # 0 means re-parse
|
LoadLogbookForExpedition(this_expedition, 0) # 0 means re-parse as implies cache expected to be 0
|
||||||
logged_in = True
|
logged_in = True
|
||||||
else:
|
else:
|
||||||
logged_in = False
|
logged_in = False
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
import csv
|
import csv
|
||||||
import datetime
|
from datetime import datetime, date, time
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import time
|
#import time
|
||||||
import pickle
|
import pickle
|
||||||
import shelve
|
import shelve
|
||||||
|
|
||||||
@@ -26,8 +26,6 @@ todo='''
|
|||||||
|
|
||||||
- refactor everything with some urgency, esp. LoadLogbookForExpedition()
|
- refactor everything with some urgency, esp. LoadLogbookForExpedition()
|
||||||
|
|
||||||
- delete all the autoLogbooKEntry stuff when we are absolutely certain what it does
|
|
||||||
|
|
||||||
- Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser,
|
- Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser,
|
||||||
or it is broken/incomplete and need hand-editing.
|
or it is broken/incomplete and need hand-editing.
|
||||||
|
|
||||||
@@ -46,26 +44,35 @@ todo='''
|
|||||||
- We should ensure logbook.html is utf-8 and stop this crap:
|
- We should ensure logbook.html is utf-8 and stop this crap:
|
||||||
file_in = open(logbookfile,'rb')
|
file_in = open(logbookfile,'rb')
|
||||||
txt = file_in.read().decode("latin1")
|
txt = file_in.read().decode("latin1")
|
||||||
|
|
||||||
|
- this is a slow and uncertain function: cave = getCaveByReference(caveRef)
|
||||||
'''
|
'''
|
||||||
|
|
||||||
logentries = [] # the entire logbook for one year is a single object: a list of entries
|
logentries = [] # the entire logbook for one year is a single object: a list of entries
|
||||||
noncaveplaces = [ "QMplaceholder", "Journey", "Loser Plateau", "UNKNOWN", 'plateau',
|
noncaveplaces = [ "QMplaceholder", "Journey", "Loser Plateau", "UNKNOWN", 'plateau',
|
||||||
'base camp', 'basecamp', 'top camp', 'topcamp' ]
|
'base camp', 'basecamp', 'top camp', 'topcamp' ]
|
||||||
logdataissues = {}
|
logdataissues = TROG['issues']['logdataissues']
|
||||||
trips ={}
|
trips ={}
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# the logbook loading section
|
# the logbook loading section
|
||||||
#
|
#
|
||||||
def GetTripPersons(trippeople, expedition, logtime_underground, tid="!"):
|
def set_trip_id(year, seq):
|
||||||
|
tid= f"{year}.s{seq:02d}"
|
||||||
|
return tid
|
||||||
|
|
||||||
|
|
||||||
|
def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
|
||||||
res = [ ]
|
res = [ ]
|
||||||
author = None
|
author = None
|
||||||
round_bracket_regex = re.compile(r"[\(\[].*?[\)\]]")
|
round_bracket_regex = re.compile(r"[\(\[].*?[\)\]]")
|
||||||
if tid =="!":
|
#print(f'# {tid}')
|
||||||
tid = expedition.year + "." + tripperson
|
|
||||||
for tripperson in re.split(r",|\+|&|&(?!\w+;)| and ", trippeople):
|
for tripperson in re.split(r",|\+|&|&(?!\w+;)| and ", trippeople):
|
||||||
tripperson = tripperson.strip()
|
tripperson = tripperson.strip()
|
||||||
|
if not tid:
|
||||||
|
tid = expedition.year + "." + tripperson + datetime.now().strftime("%S%f") # no good. Should be getting the tid
|
||||||
mul = re.match(r"<u>(.*?)</u>$(?i)", tripperson)
|
mul = re.match(r"<u>(.*?)</u>$(?i)", tripperson)
|
||||||
if mul:
|
if mul:
|
||||||
tripperson = mul.group(1).strip()
|
tripperson = mul.group(1).strip()
|
||||||
@@ -86,23 +93,24 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid="!"):
|
|||||||
author = res[-1][0]
|
author = res[-1][0]
|
||||||
return res, author
|
return res, author
|
||||||
|
|
||||||
def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, entry_type="wiki", tid="!"):
|
def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, entry_type="wiki", tid=None):
|
||||||
""" saves a logbook entry and related persontrips
|
""" saves a logbook entry and related persontrips
|
||||||
Does NOT save the expeditionday_id - all NULLs. why?
|
Does NOT save the expeditionday_id - all NULLs. why?
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
trippersons, author = GetTripPersons(trippeople, expedition, logtime_underground, tid=tid)
|
trippersons, author = GetTripPersons(trippeople, expedition, logtime_underground, tid=tid)
|
||||||
except:
|
except:
|
||||||
message = " ! - Skipping logentry: %s - GetTripPersons FAIL in year '%s'" % (title, expedition.year)
|
message = f" ! - {expedition.year} Skipping logentry: {title} - GetTripPersons FAIL"
|
||||||
DataIssue.objects.create(parser='logbooks', message=message)
|
DataIssue.objects.create(parser='logbooks', message=message)
|
||||||
logdataissues["title"]=message
|
logdataissues["title"]=message
|
||||||
|
print(message)
|
||||||
return
|
return
|
||||||
|
|
||||||
if not author:
|
if not author:
|
||||||
print(" ! - Skipping logentry: " + title + " - no author for entry")
|
message = f" ! - {expedition.year} Skipping logentry: {title} - - no author for entry in year "
|
||||||
message = " ! - Skipping logentry: %s - no author for entry in year '%s'" % (title, expedition.year)
|
|
||||||
DataIssue.objects.create(parser='logbooks', message=message)
|
DataIssue.objects.create(parser='logbooks', message=message)
|
||||||
logdataissues["title"]=message
|
logdataissues["title"]=message
|
||||||
|
print(message)
|
||||||
return
|
return
|
||||||
|
|
||||||
# This needs attention. The slug field is derived from 'title'
|
# This needs attention. The slug field is derived from 'title'
|
||||||
@@ -113,16 +121,16 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
|
|||||||
cave=None
|
cave=None
|
||||||
if lplace not in noncaveplaces:
|
if lplace not in noncaveplaces:
|
||||||
cave = GetCaveLookup().get(lplace)
|
cave = GetCaveLookup().get(lplace)
|
||||||
# message = " ! - '" + lplace + "' place not in noncaveplaces."
|
|
||||||
# print(message)
|
|
||||||
# DataIssue.objects.create(parser='logbooks', message=message)
|
|
||||||
|
|
||||||
#Check for an existing copy of the current entry, and save
|
#Check for an existing copy of the current entry, and save
|
||||||
expeditionday = expedition.get_expedition_day(date)
|
expeditionday = expedition.get_expedition_day(date)
|
||||||
lookupAttribs={'date':date, 'title':title}
|
lookupAttribs={'date':date, 'title':title}
|
||||||
# 'cave' is converted to a string doing this, which renders as the cave slug.
|
# 'cave' is converted to a string doing this, which renders as the cave slug.
|
||||||
# but it is a db query which we should try to avoid - rewrite this
|
# but it is a db query which we should try to avoid - rewrite this
|
||||||
nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave_slug':str(cave), 'slug':slugify(title)[:50], 'entry_type':entry_type}
|
|
||||||
|
#NEW sluf for a logbook entry here! Use the unique id, not the title !!!
|
||||||
|
slug = tid + slugify(title)[:50]
|
||||||
|
nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave_slug':str(cave), 'slug': slug, 'entry_type':entry_type}
|
||||||
lbo, created=save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs)
|
lbo, created=save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs)
|
||||||
|
|
||||||
|
|
||||||
@@ -133,31 +141,37 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
|
|||||||
|
|
||||||
def ParseDate(tripdate, year):
|
def ParseDate(tripdate, year):
|
||||||
""" Interprets dates in the expo logbooks and returns a correct datetime.date object """
|
""" Interprets dates in the expo logbooks and returns a correct datetime.date object """
|
||||||
mdatestandard = re.match(r"(\d\d\d\d)-(\d\d)-(\d\d)", tripdate)
|
try:
|
||||||
mdategoof = re.match(r"(\d\d?)/0?(\d)/(20|19)?(\d\d)", tripdate)
|
mdatestandard = re.match(r"(\d\d\d\d)-(\d\d)-(\d\d)", tripdate)
|
||||||
if mdatestandard:
|
mdategoof = re.match(r"(\d\d?)/0?(\d)/(20|19)?(\d\d)", tripdate)
|
||||||
if not (mdatestandard.group(1) == year):
|
if mdatestandard:
|
||||||
message = " ! - Bad date (year) in logbook: " + tripdate + " - " + year
|
if not (mdatestandard.group(1) == year):
|
||||||
|
message = " ! - Bad date (year) in logbook: " + tripdate + " - " + year
|
||||||
|
DataIssue.objects.create(parser='logbooks', message=message)
|
||||||
|
logdataissues["tripdate"]=message
|
||||||
|
return datetime.date('1970', '01', '01')
|
||||||
|
else:
|
||||||
|
year, month, day = int(mdatestandard.group(1)), int(mdatestandard.group(2)), int(mdatestandard.group(3))
|
||||||
|
elif mdategoof:
|
||||||
|
if not (not mdategoof.group(3) or mdategoof.group(3) == year[:2]):
|
||||||
|
message = " ! - Bad date mdategoof.group(3) in logbook: " + tripdate + " - " + mdategoof.group(3)
|
||||||
|
DataIssue.objects.create(parser='logbooks', message=message)
|
||||||
|
logdataissues["tripdate"]=message
|
||||||
|
return date('1970', '01', '01')
|
||||||
|
else:
|
||||||
|
yadd = int(year[:2]) * 100
|
||||||
|
day, month, year = int(mdategoof.group(1)), int(mdategoof.group(2)), int(mdategoof.group(4)) + yadd
|
||||||
|
else:
|
||||||
|
message = " ! - Bad date in logbook: " + tripdate + " - " + year
|
||||||
DataIssue.objects.create(parser='logbooks', message=message)
|
DataIssue.objects.create(parser='logbooks', message=message)
|
||||||
logdataissues["tripdate"]=message
|
logdataissues["tripdate"]=message
|
||||||
return datetime.date('1970', '01', '01')
|
|
||||||
else:
|
return date(year, month, day)
|
||||||
year, month, day = int(mdatestandard.group(1)), int(mdatestandard.group(2)), int(mdatestandard.group(3))
|
except:
|
||||||
elif mdategoof:
|
message = " ! - Failed to parse date in logbook: " + tripdate + " - " + year
|
||||||
if not (not mdategoof.group(3) or mdategoof.group(3) == year[:2]):
|
|
||||||
message = " ! - Bad date mdategoof.group(3) in logbook: " + tripdate + " - " + mdategoof.group(3)
|
|
||||||
DataIssue.objects.create(parser='logbooks', message=message)
|
|
||||||
logdataissues["tripdate"]=message
|
|
||||||
return datetime.date('1970', '01', '01')
|
|
||||||
else:
|
|
||||||
yadd = int(year[:2]) * 100
|
|
||||||
day, month, year = int(mdategoof.group(1)), int(mdategoof.group(2)), int(mdategoof.group(4)) + yadd
|
|
||||||
else:
|
|
||||||
message = " ! - Bad date in logbook: " + tripdate + " - " + year
|
|
||||||
DataIssue.objects.create(parser='logbooks', message=message)
|
DataIssue.objects.create(parser='logbooks', message=message)
|
||||||
logdataissues["tripdate"]=message
|
logdataissues["tripdate"]=message
|
||||||
|
return date(year, month, day)
|
||||||
return datetime.date(year, month, day)
|
|
||||||
|
|
||||||
# (2006 - not any more), 2008 - 2009
|
# (2006 - not any more), 2008 - 2009
|
||||||
def Parselogwikitxt(year, expedition, txt):
|
def Parselogwikitxt(year, expedition, txt):
|
||||||
@@ -168,6 +182,8 @@ def Parselogwikitxt(year, expedition, txt):
|
|||||||
trippara = re.findall(r"===(.*?)===([\s\S]*?)(?====)", txt)
|
trippara = re.findall(r"===(.*?)===([\s\S]*?)(?====)", txt)
|
||||||
for triphead, triptext in trippara:
|
for triphead, triptext in trippara:
|
||||||
logbook_entry_count += 1
|
logbook_entry_count += 1
|
||||||
|
tid = set_trip_id(year,logbook_entry_count)
|
||||||
|
|
||||||
tripheadp = triphead.split("|")
|
tripheadp = triphead.split("|")
|
||||||
if not (len(tripheadp) == 3):
|
if not (len(tripheadp) == 3):
|
||||||
message = " ! - Bad no of items in tripdate in logbook: " + tripdate + " - " + tripheadp
|
message = " ! - Bad no of items in tripdate in logbook: " + tripdate + " - " + tripheadp
|
||||||
@@ -198,30 +214,24 @@ def Parselogwikitxt(year, expedition, txt):
|
|||||||
logentries.append(entrytuple)
|
logentries.append(entrytuple)
|
||||||
|
|
||||||
EnterLogIntoDbase(date = ldate, place = tripcave, title = tripplace, text = triptext, trippeople=trippeople,
|
EnterLogIntoDbase(date = ldate, place = tripcave, title = tripplace, text = triptext, trippeople=trippeople,
|
||||||
expedition=expedition, logtime_underground=0)
|
expedition=expedition, logtime_underground=0, tid=tid)
|
||||||
|
|
||||||
EnterLogIntoObjStore(year, ldate, tripcave, tripplace, triptext, trippeople,
|
EnterLogIntoObjStore(year, ldate, tripcave, tripplace, triptext, trippeople,
|
||||||
tu, "wiki", tripid, logbook_entry_count)
|
tu, "wiki", tripid, logbook_entry_count, tid=tid)
|
||||||
|
|
||||||
|
|
||||||
def EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, tu, formattype, tripid1, seq):
|
def EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, tu, formattype, tripid1, seq, tid=None):
|
||||||
# This will need additional functions to replicate the persontrip calculation and storage. For the
|
# This will need additional functions to replicate the persontrip calculation and storage. For the
|
||||||
# moment we leave all that to be done in the django db
|
# moment we leave all that to be done in the django db
|
||||||
global trips # should be a singleton TROG eventually
|
global trips # should be a singleton TROG eventually
|
||||||
global logdataissues
|
global logdataissues
|
||||||
|
|
||||||
if tripid1 is None or tripid1 =="":
|
|
||||||
tid= "n{}-s{:02d}".format(str(date),seq)
|
|
||||||
#print(" - New id ",tid)
|
|
||||||
else:
|
|
||||||
tid= tripid1
|
|
||||||
|
|
||||||
if tid in trips:
|
if tid in trips:
|
||||||
tyear, tdate, *trest = trips[tid]
|
tyear, tdate, *trest = trips[tid]
|
||||||
msg = f" ! DUPLICATE on {tdate} id: '{tid}'"
|
msg = f" ! DUPLICATE on {tdate} id: '{tid}'"
|
||||||
print(msg)
|
print(msg)
|
||||||
DataIssue.objects.create(parser='logbooks', message=msg)
|
DataIssue.objects.create(parser='logbooks', message=msg)
|
||||||
tid= "d{}-s{:02d}".format(str(date),seq)
|
tid = set_trip_id(str(date),seq)
|
||||||
#print(" - De-dup ",seq, tid)
|
#print(" - De-dup ",seq, tid)
|
||||||
logdataissues[tid]=msg
|
logdataissues[tid]=msg
|
||||||
trips[tid] = (year, date, tripcave, triptitle, text, trippeople, tu, formattype)
|
trips[tid] = (year, date, tripcave, triptitle, text, trippeople, tu, formattype)
|
||||||
@@ -247,7 +257,7 @@ def Parseloghtmltxt(year, expedition, txt):
|
|||||||
logbook_entry_count = 0
|
logbook_entry_count = 0
|
||||||
for trippara in tripparas:
|
for trippara in tripparas:
|
||||||
logbook_entry_count += 1
|
logbook_entry_count += 1
|
||||||
tid= "n{}-s{:02d}".format(year,logbook_entry_count)
|
tid = set_trip_id(year,logbook_entry_count)
|
||||||
|
|
||||||
s = re.match(r'''(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)? # second date
|
s = re.match(r'''(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)? # second date
|
||||||
\s*(?:<a\s+id="(.*?)"\s*/>\s*</a>)?
|
\s*(?:<a\s+id="(.*?)"\s*/>\s*</a>)?
|
||||||
@@ -285,7 +295,7 @@ def Parseloghtmltxt(year, expedition, txt):
|
|||||||
entry_type="html", tid=tid)
|
entry_type="html", tid=tid)
|
||||||
|
|
||||||
EnterLogIntoObjStore(year, ldate, tripcave, triptitle, ltriptext, trippeople, tu,
|
EnterLogIntoObjStore(year, ldate, tripcave, triptitle, ltriptext, trippeople, tu,
|
||||||
"html", tripid1, logbook_entry_count)
|
"html", tripid1, logbook_entry_count, tid=tid)
|
||||||
|
|
||||||
# main parser for 1991 - 2001. simpler because the data has been hacked so much to fit it
|
# main parser for 1991 - 2001. simpler because the data has been hacked so much to fit it
|
||||||
# trying it out for years 1982 - 1990 too. Some logbook editing required by hand..
|
# trying it out for years 1982 - 1990 too. Some logbook editing required by hand..
|
||||||
@@ -298,18 +308,26 @@ def Parseloghtml01(year, expedition, txt):
|
|||||||
logbook_entry_count = 0
|
logbook_entry_count = 0
|
||||||
for trippara in tripparas:
|
for trippara in tripparas:
|
||||||
logbook_entry_count += 1
|
logbook_entry_count += 1
|
||||||
tid= f"{year}.s{logbook_entry_count:02d}"
|
tid = set_trip_id(year,logbook_entry_count)
|
||||||
try:
|
try:
|
||||||
s = re.match(r"(?s)\s*(?:<p>)?(.*?)</?p>(.*)$(?i)", trippara)
|
s = re.match(r"(?s)\s*(?:<p>)?(.*?)</?p>(.*)$(?i)", trippara)
|
||||||
if not s:
|
if not s:
|
||||||
message = " ! - Skipping logentry on failure to parse header: " + tid + trippara[:300] + "..."
|
message = " ! - Skipping logentry {year} failure to parse header: " + tid + trippara[:300] + "..."
|
||||||
DataIssue.objects.create(parser='logbooks', message=message)
|
DataIssue.objects.create(parser='logbooks', message=message)
|
||||||
logdataissues[tid]=message
|
logdataissues[tid]=message
|
||||||
print(message)
|
print(message)
|
||||||
break
|
break
|
||||||
tripheader, triptext = s.group(1), s.group(2)
|
tripheader, triptext = s.group(1), s.group(2)
|
||||||
mtripid = re.search(r'<a id="(.*?)"', tripheader)
|
mtripid = re.search(r'<a id="(.*?)"', tripheader)
|
||||||
|
# if not mtripid:
|
||||||
|
# # not an error, this is probabluy jusyt a different year
|
||||||
|
# message = f" ! - Fail id trip:{tid} header:'{tripheader}'"
|
||||||
|
# DataIssue.objects.create(parser='logbooks', message=message)
|
||||||
|
# logdataissues[tid]=message
|
||||||
|
# print(message)
|
||||||
|
|
||||||
tripid = mtripid and mtripid.group(1) or ""
|
tripid = mtripid and mtripid.group(1) or ""
|
||||||
|
#print(f" # - mtripid: {mtripid}")
|
||||||
tripheader = re.sub(r"</?(?:[ab]|span)[^>]*>", "", tripheader)
|
tripheader = re.sub(r"</?(?:[ab]|span)[^>]*>", "", tripheader)
|
||||||
|
|
||||||
tripdate, triptitle, trippeople = tripheader.split("|")
|
tripdate, triptitle, trippeople = tripheader.split("|")
|
||||||
@@ -336,6 +354,13 @@ def Parseloghtml01(year, expedition, txt):
|
|||||||
ltriptext = re.sub(r"</?u>", "_", ltriptext)
|
ltriptext = re.sub(r"</?u>", "_", ltriptext)
|
||||||
ltriptext = re.sub(r"</?i>", "''", ltriptext)
|
ltriptext = re.sub(r"</?i>", "''", ltriptext)
|
||||||
ltriptext = re.sub(r"</?b>", "'''", ltriptext)
|
ltriptext = re.sub(r"</?b>", "'''", ltriptext)
|
||||||
|
|
||||||
|
if ltriptext == "":
|
||||||
|
message = " ! - Zero content for logbook entry!: " + tid
|
||||||
|
DataIssue.objects.create(parser='logbooks', message=message)
|
||||||
|
logdataissues[tid]=message
|
||||||
|
print(message)
|
||||||
|
|
||||||
|
|
||||||
entrytuple = (ldate, tripcave, triptitle, ltriptext,
|
entrytuple = (ldate, tripcave, triptitle, ltriptext,
|
||||||
trippeople, expedition, tu, "html01", tripid)
|
trippeople, expedition, tu, "html01", tripid)
|
||||||
@@ -343,16 +368,16 @@ def Parseloghtml01(year, expedition, txt):
|
|||||||
try:
|
try:
|
||||||
EnterLogIntoDbase(date=ldate, place=tripcave, title=triptitle, text=ltriptext,
|
EnterLogIntoDbase(date=ldate, place=tripcave, title=triptitle, text=ltriptext,
|
||||||
trippeople=trippeople, expedition=expedition, logtime_underground=0,
|
trippeople=trippeople, expedition=expedition, logtime_underground=0,
|
||||||
entry_type="html")
|
entry_type="html", tid=tid)
|
||||||
|
|
||||||
except:
|
except:
|
||||||
message = " ! - Enter log entry into database FAIL exception in: " + tid
|
message = " ! - Enter log entry into database FAIL exception in: " + tid
|
||||||
DataIssue.objects.create(parser='logbooks', message=message)
|
DataIssue.objects.create(parser='logbooks', message=message)
|
||||||
logdataissues[tid]=message
|
logdataissues[tid]=message
|
||||||
print(message)
|
print(message)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
EnterLogIntoObjStore(year, ldate, tripcave, triptitle, ltriptext, trippeople, tu,
|
EnterLogIntoObjStore(year, ldate, tripcave, triptitle, ltriptext, trippeople, tu,
|
||||||
"html01", tripid, logbook_entry_count)
|
"html01", tripid, logbook_entry_count, tid=tid)
|
||||||
except:
|
except:
|
||||||
message = " ! - Enter log entry into ObjectStore FAIL exception in: " + tid
|
message = " ! - Enter log entry into ObjectStore FAIL exception in: " + tid
|
||||||
DataIssue.objects.create(parser='logbooks', message=message)
|
DataIssue.objects.create(parser='logbooks', message=message)
|
||||||
@@ -360,7 +385,7 @@ def Parseloghtml01(year, expedition, txt):
|
|||||||
print(message)
|
print(message)
|
||||||
|
|
||||||
except:
|
except:
|
||||||
message = " ! - Skipping logentry due to exception in: " + tid
|
message = f" ! - Skipping logentry {year} due to exception in: {tid}"
|
||||||
DataIssue.objects.create(parser='logbooks', message=message)
|
DataIssue.objects.create(parser='logbooks', message=message)
|
||||||
logdataissues[tid]=message
|
logdataissues[tid]=message
|
||||||
print(message)
|
print(message)
|
||||||
@@ -381,11 +406,11 @@ def Parseloghtml03(year, expedition, txt):
|
|||||||
logbook_entry_count = 0
|
logbook_entry_count = 0
|
||||||
for trippara in tripparas:
|
for trippara in tripparas:
|
||||||
logbook_entry_count += 1
|
logbook_entry_count += 1
|
||||||
tid= f"{year}.s{logbook_entry_count:02d}"
|
tid = set_trip_id(year,logbook_entry_count)
|
||||||
|
|
||||||
s = re.match(r"(?s)\s*<p>(.*?)</p>(.*)$", trippara)
|
s = re.match(r"(?s)\s*<p>(.*?)</p>(.*)$", trippara)
|
||||||
if not ( s ) :
|
if not ( s ) :
|
||||||
message = " ! - Skipping logentry on failure to parse Parseloghtml03: {} {} {}...".format(tid,s,trippara[:300])
|
message = " ! - Skipping logentry {year} on failure to parse Parseloghtml03: {} {} {}...".format(tid,s,trippara[:300])
|
||||||
DataIssue.objects.create(parser='logbooks', message=message)
|
DataIssue.objects.create(parser='logbooks', message=message)
|
||||||
logdataissues[tid]=message
|
logdataissues[tid]=message
|
||||||
print(message)
|
print(message)
|
||||||
@@ -419,10 +444,10 @@ def Parseloghtml03(year, expedition, txt):
|
|||||||
|
|
||||||
EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle,
|
EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle,
|
||||||
text = ltriptext, trippeople=trippeople, expedition=expedition,
|
text = ltriptext, trippeople=trippeople, expedition=expedition,
|
||||||
logtime_underground=0, entry_type="html")
|
logtime_underground=0, entry_type="html", tid=tid)
|
||||||
|
|
||||||
EnterLogIntoObjStore(year, ldate, tripcave, triptitle, ltriptext, trippeople, tu,
|
EnterLogIntoObjStore(year, ldate, tripcave, triptitle, ltriptext, trippeople, tu,
|
||||||
"html03", tid, logbook_entry_count)
|
"html03", tid, logbook_entry_count, tid=tid)
|
||||||
|
|
||||||
|
|
||||||
def SetDatesFromLogbookEntries(expedition):
|
def SetDatesFromLogbookEntries(expedition):
|
||||||
@@ -477,16 +502,15 @@ def LoadLogbookForExpedition(expedition, expect):
|
|||||||
for di in dataissues:
|
for di in dataissues:
|
||||||
ph = year
|
ph = year
|
||||||
if re.search(ph, di.message) is not None:
|
if re.search(ph, di.message) is not None:
|
||||||
print(f' - CLEANING dataissue {di.message}')
|
#print(f' - CLEANING dataissue {di.message}')
|
||||||
di.delete()
|
di.delete()
|
||||||
|
|
||||||
print(f' - CLEAN {year} {len(logdataissues)} {type(logdataissues)} data issues for this year')
|
#print(f' - CLEAN {year} {len(logdataissues)} {type(logdataissues)} data issues for this year')
|
||||||
dellist = []
|
dellist = []
|
||||||
for key, value in logdataissues.items():
|
for key, value in logdataissues.items():
|
||||||
# tripentry = year + "." + str(logbook_entry_count)
|
#print(f' - CLEANING logdataissues [{key}]: {value}')
|
||||||
print(f' - CLEAN [{key}]')
|
if key.startswith(year):
|
||||||
if key.startswith(year + "."):
|
#print(f' - CLEANING logdataissues [{key:12}]: {value} ')
|
||||||
print(f' - CLEANING logdataissues [{key:12}]: value ')
|
|
||||||
dellist.append(key)
|
dellist.append(key)
|
||||||
for i in dellist:
|
for i in dellist:
|
||||||
del logdataissues[i]
|
del logdataissues[i]
|
||||||
@@ -547,7 +571,9 @@ def LoadLogbookForExpedition(expedition, expect):
|
|||||||
|
|
||||||
if logbook_parseable:
|
if logbook_parseable:
|
||||||
parser = globals()[parsefunc]
|
parser = globals()[parsefunc]
|
||||||
parser(expedition.year, expedition, txt)
|
|
||||||
|
parser(expedition.year, expedition, txt) # this launches the parser
|
||||||
|
|
||||||
SetDatesFromLogbookEntries(expedition)
|
SetDatesFromLogbookEntries(expedition)
|
||||||
if len(logentries) >0:
|
if len(logentries) >0:
|
||||||
print(" - Cacheing " , len(logentries), " log entries")
|
print(" - Cacheing " , len(logentries), " log entries")
|
||||||
@@ -625,112 +651,13 @@ def LoadLogbooks():
|
|||||||
odb.sync()
|
odb.sync()
|
||||||
odb.close()
|
odb.close()
|
||||||
|
|
||||||
dateRegex = re.compile(r'<span\s+class="date">(\d\d\d\d)-(\d\d)-(\d\d)</span>', re.S)
|
# dateRegex = re.compile(r'<span\s+class="date">(\d\d\d\d)-(\d\d)-(\d\d)</span>', re.S)
|
||||||
expeditionYearRegex = re.compile(r'<span\s+class="expeditionyear">(.*?)</span>', re.S)
|
# expeditionYearRegex = re.compile(r'<span\s+class="expeditionyear">(.*?)</span>', re.S)
|
||||||
titleRegex = re.compile(r'<H1>(.*?)</H1>', re.S)
|
# titleRegex = re.compile(r'<H1>(.*?)</H1>', re.S)
|
||||||
reportRegex = re.compile(r'<div\s+class="report">(.*)</div>\s*</body>', re.S)
|
# reportRegex = re.compile(r'<div\s+class="report">(.*)</div>\s*</body>', re.S)
|
||||||
personRegex = re.compile(r'<div\s+class="person">(.*?)</div>', re.S)
|
# personRegex = re.compile(r'<div\s+class="person">(.*?)</div>', re.S)
|
||||||
nameAuthorRegex = re.compile(r'<span\s+class="name(,author|)">(.*?)</span>', re.S)
|
# nameAuthorRegex = re.compile(r'<span\s+class="name(,author|)">(.*?)</span>', re.S)
|
||||||
TURegex = re.compile(r'<span\s+class="TU">([0-9]*\.?[0-9]+)</span>', re.S)
|
# TURegex = re.compile(r'<span\s+class="TU">([0-9]*\.?[0-9]+)</span>', re.S)
|
||||||
locationRegex = re.compile(r'<span\s+class="location">(.*?)</span>', re.S)
|
# locationRegex = re.compile(r'<span\s+class="location">(.*?)</span>', re.S)
|
||||||
caveRegex = re.compile(r'<span\s+class="cave">(.*?)</span>', re.S)
|
# caveRegex = re.compile(r'<span\s+class="cave">(.*?)</span>', re.S)
|
||||||
|
|
||||||
def parseAutoLogBookEntry(filename):
|
|
||||||
'''An AutoLogBookEntry appears to be one that was created online using a form, for a single trip,
|
|
||||||
which is then stored in a separate location to the usual logbook.html
|
|
||||||
But when importing logbook.html all these individual entries also need ot be parsed.
|
|
||||||
|
|
||||||
This is all redundant as we are getting rid of the whole individual trip entry system
|
|
||||||
'''
|
|
||||||
errors = []
|
|
||||||
f = open(filename, "r")
|
|
||||||
contents = f.read()
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
dateMatch = dateRegex.search(contents)
|
|
||||||
if dateMatch:
|
|
||||||
year, month, day = [int(x) for x in dateMatch.groups()]
|
|
||||||
date = datetime.date(year, month, day)
|
|
||||||
else:
|
|
||||||
errors.append(" - Date could not be found")
|
|
||||||
|
|
||||||
expeditionYearMatch = expeditionYearRegex.search(contents)
|
|
||||||
if expeditionYearMatch:
|
|
||||||
try:
|
|
||||||
expedition = Expedition.objects.get(year = expeditionYearMatch.groups()[0])
|
|
||||||
personExpeditionNameLookup = GetPersonExpeditionNameLookup(expedition)
|
|
||||||
except Expedition.DoesNotExist:
|
|
||||||
errors.append(" - Expedition not in database")
|
|
||||||
else:
|
|
||||||
errors.append(" - Expedition Year could not be parsed")
|
|
||||||
|
|
||||||
titleMatch = titleRegex.search(contents)
|
|
||||||
if titleMatch:
|
|
||||||
title, = titleMatch.groups()
|
|
||||||
if len(title) > settings.MAX_LOGBOOK_ENTRY_TITLE_LENGTH:
|
|
||||||
errors.append(" - Title too long")
|
|
||||||
else:
|
|
||||||
errors.append(" - Title could not be found")
|
|
||||||
|
|
||||||
caveMatch = caveRegex.search(contents)
|
|
||||||
if caveMatch:
|
|
||||||
caveRef, = caveMatch.groups()
|
|
||||||
try:
|
|
||||||
# this is a slow and uncertain function:
|
|
||||||
cave = getCaveByReference(caveRef)
|
|
||||||
except:
|
|
||||||
cave = None
|
|
||||||
errors.append(" - Cave not found in database")
|
|
||||||
else:
|
|
||||||
cave = None
|
|
||||||
|
|
||||||
locationMatch = locationRegex.search(contents)
|
|
||||||
if locationMatch:
|
|
||||||
location, = locationMatch.groups()
|
|
||||||
else:
|
|
||||||
location = None
|
|
||||||
|
|
||||||
if cave is None and location is None:
|
|
||||||
errors.append(" - Location nor cave could not be found")
|
|
||||||
|
|
||||||
reportMatch = reportRegex.search(contents)
|
|
||||||
if reportMatch:
|
|
||||||
report, = reportMatch.groups()
|
|
||||||
else:
|
|
||||||
errors.append(" - Contents could not be found")
|
|
||||||
if errors:
|
|
||||||
return errors # Easiest to bail out at this point as we need to make sure that we know which expedition to look for people from.
|
|
||||||
people = []
|
|
||||||
for personMatch in personRegex.findall(contents):
|
|
||||||
nameAuthorMatch = nameAuthorRegex.search(contents)
|
|
||||||
if nameAuthorMatch:
|
|
||||||
author, name = nameAuthorMatch.groups()
|
|
||||||
if name.lower() in personExpeditionNameLookup:
|
|
||||||
personExpo = personExpeditionNameLookup[name.lower()]
|
|
||||||
else:
|
|
||||||
errors.append(" - Person could not be found in database")
|
|
||||||
author = bool(author)
|
|
||||||
else:
|
|
||||||
errors.append(" - Persons name could not be found")
|
|
||||||
|
|
||||||
TUMatch = TURegex.search(contents)
|
|
||||||
if TUMatch:
|
|
||||||
TU, = TUMatch.groups()
|
|
||||||
else:
|
|
||||||
errors.append(" - TU could not be found")
|
|
||||||
if not errors:
|
|
||||||
people.append((name, author, TU))
|
|
||||||
if errors:
|
|
||||||
return errors # Bail out before committing to the database
|
|
||||||
logbookEntry = LogbookEntry(date = date,
|
|
||||||
expedition = expedition,
|
|
||||||
title = title, cave = cave, place = location,
|
|
||||||
text = report, slug = slugify(title)[:50],
|
|
||||||
filename = filename)
|
|
||||||
logbookEntry.save()
|
|
||||||
for name, author, TU in people:
|
|
||||||
PersonTrip(personexpedition = personExpo,
|
|
||||||
time_underground = TU,
|
|
||||||
logbook_entry = logbookEntry,
|
|
||||||
is_logbook_entry_author = author).save()
|
|
||||||
print(logbookEntry)
|
|
||||||
Reference in New Issue
Block a user