From 5836c6ff9080285e48662b28da7286a877b0e813 Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Sat, 6 Feb 2021 00:18:48 +0000 Subject: [PATCH] Importing old logbooks --- core/views_logbooks2.py | 268 ++++++++++++++++++++++++++++++++++++++++ parsers/logbooks.py | 119 +++++++++++------- parsers/people.py | 6 + parsers/survex.py | 1 + settings.py | 8 ++ templates/base.html | 2 +- 6 files changed, 356 insertions(+), 48 deletions(-) create mode 100644 core/views_logbooks2.py diff --git a/core/views_logbooks2.py b/core/views_logbooks2.py new file mode 100644 index 0000000..d79bbd3 --- /dev/null +++ b/core/views_logbooks2.py @@ -0,0 +1,268 @@ +import datetime +import os.path +import re + +import django.db.models +from django.db.models import Min, Max +from django.urls import reverse +from django.http import HttpResponse, HttpResponseRedirect +from django.shortcuts import render, render_to_response +from django.template import Context, loader +from django.template.defaultfilters import slugify +from django.utils import timezone +from django.views.generic.list import ListView + +import troggle.core.models as models +import troggle.parsers.logbooks as logbookparsers +from troggle.core.forms import getTripForm # , get_name, PersonForm +from troggle.core.models import Expedition, Person, PersonExpedition +from troggle.core.models_caves import LogbookEntry, PersonTrip +from troggle.core.models_survex import SurvexBlock +from troggle.helper import login_required_if_public +from troggle.parsers.logbooks import LoadLogbookForExpedition +from troggle.parsers.people import GetPersonExpeditionNameLookup + +import troggle.settings as settings + +# Django uses Context, not RequestContext when you call render +# to_response. We always want to use RequestContext, so that +# django adds the context from settings.TEMPLATE_CONTEXT_PROCESSORS. +# This way we automatically get necessary settings variables passed +# to each template. So we use a custom method, render_response +# instead of render_to_response. Hopefully future Django releases +# will make this unnecessary. +# from troggle.alwaysUseRequestContext import render_response + +# Deprecated in 1.11.29 +# @django.db.models.permalink #this allows the nice get_absolute_url syntax we are using + +def getNotablePersons(): + notablepersons = [] + for person in Person.objects.all(): + if person.bisnotable(): + notablepersons.append(person) + return notablepersons + + +def personindex(request): + persons = Person.objects.all() + # From what I can tell, "persons" seems to be the table rows, while "personss" is the table columns. - AC 16 Feb 09 + personss = [ ] + ncols = 4 + nc = int((len(persons) + ncols - 1) / ncols) + for i in range(ncols): + personss.append(persons[i * nc: (i + 1) * nc]) + + notablepersons = [] + for person in Person.objects.all(): + if person.bisnotable(): + notablepersons.append(person) + + return render(request,'personindex.html', {'persons': persons, 'personss':personss, 'notablepersons':notablepersons}) + + +def expedition(request, expeditionname): + this_expedition = Expedition.objects.get(year=int(expeditionname)) + expeditions = Expedition.objects.all() + personexpeditiondays = [ ] + dateditems = list(this_expedition.logbookentry_set.all()) + list(this_expedition.survexblock_set.all()) + dates = sorted(set([item.date for item in dateditems])) + for personexpedition in this_expedition.personexpedition_set.all(): + prow = [ ] + for date in dates: + pcell = { "persontrips": PersonTrip.objects.filter(personexpedition=personexpedition, + logbook_entry__date=date) } + pcell["survexblocks"] = set(SurvexBlock.objects.filter(survexpersonrole__personexpedition=personexpedition, + date = date)) + prow.append(pcell) + personexpeditiondays.append({"personexpedition":personexpedition, "personrow":prow}) + + if "reload" in request.GET: + LoadLogbookForExpedition(this_expedition) + return render(request,'expedition.html', {'expedition': this_expedition, 'expeditions':expeditions, 'personexpeditiondays':personexpeditiondays, 'settings':settings, 'dateditems': dateditems }) + +def get_absolute_url(self): + return ('expedition', (expedition.year)) + +class ExpeditionListView(ListView): # django thus expects a template called "expedition_list.html" +# from the name of the object not the name of the class. + model = Expedition + + +class Expeditions_tsvListView(ListView): + """This uses the Django built-in shortcut mechanism + It defaults to use a template with name /_list.html. + https://www.agiliq.com/blog/2017/12/when-and-how-use-django-listview/ + https://developer.mozilla.org/en-US/docs/Learn/Server-side/Django/Generic_views + Either a queryset variable or set_queryset() function is used, but not needed + if you want all the obejcts of a particaulr type in which case just set model = + """ + template_name = 'core/expeditions_tsv_list.html' # if not present then uses core/expedition_list.html + #queryset = Expedition.objects.all() + #context_object_name = 'expedition' + model = Expedition # equivalent to .objects.all() for a queryset + +class Expeditions_jsonListView(ListView): + template_name = 'core/expeditions_json_list.html' + model = Expedition + + +def person(request, first_name='', last_name='', ): + this_person = Person.objects.get(first_name = first_name, last_name = last_name) + + # This is for removing the reference to the user's profile, in case they set it to the wrong person + if request.method == 'GET': + if request.GET.get('clear_profile')=='True': + this_person.user=None + this_person.save() + return HttpResponseRedirect(reverse('profiles_select_profile')) + + return render(request,'person.html', {'person': this_person, }) + + +def GetPersonChronology(personexpedition): + '''Horrible bug here whern there is more than one survex block per day, it duplicates the entry but gets it wrong + Fortunately this is just the display on this page which is wroing, no bad calculations get into the database. + ''' + res = { } + for persontrip in personexpedition.persontrip_set.all(): + a = res.setdefault(persontrip.logbook_entry.date, { }) + a.setdefault("persontrips", [ ]).append(persontrip) + + for personrole in personexpedition.survexpersonrole_set.all(): + a = res.setdefault(personrole.survexblock.date, { }) + a.setdefault("personroles", [ ]).append(personrole.survexblock) + + # build up the tables + rdates = sorted(list(res.keys())) + + res2 = [ ] + for rdate in rdates: + persontrips = res[rdate].get("persontrips", []) + personroles = res[rdate].get("personroles", []) + for n in range(max(len(persontrips), len(personroles) )): + res2.append(((n == 0 and rdate or "--"), (n < len(persontrips) and persontrips[n]), (n < len(personroles) and personroles[n]) )) + + return res2 + + +def personexpedition(request, first_name='', last_name='', year=''): + person = Person.objects.get(first_name = first_name, last_name = last_name) + this_expedition = Expedition.objects.get(year=year) + personexpedition = person.personexpedition_set.get(expedition=this_expedition) + personchronology = GetPersonChronology(personexpedition) + return render(request,'personexpedition.html', {'personexpedition': personexpedition, 'personchronology':personchronology}) + + +def logbookentry(request, date, slug): + this_logbookentry = LogbookEntry.objects.filter(date=date, slug=slug) + + if len(this_logbookentry)>1: + return render(request, 'object_list.html',{'object_list':this_logbookentry}) + else: + this_logbookentry=this_logbookentry[0] + return render(request, 'logbookentry.html', {'logbookentry': this_logbookentry}) + + +def logbookSearch(request, extra): + query_string = '' + found_entries = None + if ('q' in request.GET) and request.GET['q'].strip(): + query_string = request.GET['q'] + entry_query = search.get_query(query_string, ['text','title',]) + found_entries = LogbookEntry.objects.filter(entry_query) + + return render(request,'logbooksearch.html', + { 'query_string': query_string, 'found_entries': found_entries, }) + #context_instance=RequestContext(request)) + +def personForm(request,pk): + person=Person.objects.get(pk=pk) + form=PersonForm(instance=person) + return render(request,'personform.html', {'form':form,}) + +# tried to delete all this, and the reference in urls.py, but got impenetrable django error message +# @login_required_if_public +# def newLogbookEntry(request, expeditionyear, pdate = None, pslug = None): + # expedition = Expedition.objects.get(year=expeditionyear) + # PersonTripFormSet, TripForm = getTripForm(expedition) + # if pslug and pdate: + # previousdate = datetime.date(*[int(x) for x in pdate.split("-")]) + # previouslbe = LogbookEntry.objects.get(slug = pslug, date = previousdate, expedition = expedition) + # assert previouslbe.filename + # if request.method == 'POST': # If the form has been submitted... + # tripForm = TripForm(request.POST) # A form bound to the POST data + # personTripFormSet = PersonTripFormSet(request.POST) + # if tripForm.is_valid() and personTripFormSet.is_valid(): # All validation rules pass + # dateStr = tripForm.cleaned_data["date"].strftime("%Y-%m-%d") + # directory = os.path.join(settings.EXPOWEB, + # "years", + # expedition.year, + # "autologbook") + # filename = os.path.join(directory, + # dateStr + "." + slugify(tripForm.cleaned_data["title"])[:50] + ".html") + # if not os.path.isdir(directory): + # os.mkdir(directory) + # if pslug and pdate: + # delLogbookEntry(previouslbe) + # f = open(filename, "w") + # template = loader.get_template('dataformat/logbookentry.html') + # context = Context({'trip': tripForm.cleaned_data, + # 'persons': personTripFormSet.cleaned_data, + # 'date': dateStr, + # 'expeditionyear': expeditionyear}) + # f.write(template.render(context)) + # f.close() + # print((logbookparsers.parseAutoLogBookEntry(filename))) + # return HttpResponseRedirect(reverse('expedition', args=[expedition.year])) # Redirect after POST + # else: + # if pslug and pdate: + # if previouslbe.cave: + # tripForm = TripForm(initial={"date": previousdate, + # "title": previouslbe.title, + # "cave": previouslbe.cave.reference(), + # "location": None, + # "caveOrLocation": "cave", + # "html": previouslbe.text}) + # else: + # tripForm = TripForm(initial={"date": previousdate, + # "title": previouslbe.title, + # "cave": None, + # "location": previouslbe.place, + # "caveOrLocation": "location", + # "html": previouslbe.text}) + # personTripFormSet = PersonTripFormSet(initial=[{"name": get_name(py.personexpedition), + # "TU": py.time_underground, + # "author": py.is_logbook_entry_author} + # for py in previouslbe.persontrip_set.all()]) + # else: + # tripForm = TripForm() # An unbound form + # personTripFormSet = PersonTripFormSet() + + # return render(request, 'newlogbookentry.html', { + # 'tripForm': tripForm, + # 'personTripFormSet': personTripFormSet, + + # }) + +# @login_required_if_public +# def deleteLogbookEntry(request, expeditionyear, date = None, slug = None): + # expedition = Expedition.objects.get(year=expeditionyear) + # previousdate = datetime.date(*[int(x) for x in date.split("-")]) + # previouslbe = LogbookEntry.objects.get(slug = slug, date = previousdate, expedition = expedition) + # delLogbookEntry(previouslbe) + # return HttpResponseRedirect(reverse('expedition', args=[expedition.year])) # Redirect after POST + +# def delLogbookEntry(lbe): + # for pt in lbe.persontrip_set.all(): + # pt.delete() + # lbe.delete() + # os.remove(lbe.filename) + +def get_people(request, expeditionslug): + exp = Expedition.objects.get(year = expeditionslug) + return render(request,'options.html', {"items": [(pe.slug, pe.name) for pe in exp.personexpedition_set.all()]}) + +def get_logbook_entries(request, expeditionslug): + exp = Expedition.objects.get(year = expeditionslug) + return render(request,'options.html', {"items": [(le.slug, "%s - %s" % (le.date, le.title)) for le in exp.logbookentry_set.all()]}) diff --git a/parsers/logbooks.py b/parsers/logbooks.py index a8f0cca..9e8fc8f 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -37,9 +37,10 @@ def GetTripPersons(trippeople, expedition, logtime_underground): tripperson = re.sub(round_bracket_regex, "", tripperson).strip() personyear = GetPersonExpeditionNameLookup(expedition).get(tripperson.lower()) if not personyear: - print((" - No name match for: '%s'" % tripperson)) - message = "No name match for: '%s' in year '%s'" % (tripperson, expedition.year) + message = "No name match for: ||'%s'|| in year '%s'" % (tripperson, expedition.year) + print(message) DataIssue.objects.create(parser='logbooks', message=message) + logdataissues[expedition.year + "~" + tripperson]=message res.append((personyear, logtime_underground)) if mul: author = personyear @@ -91,6 +92,7 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_ print(" ! - Skipping logentry: " + title + " - no author for entry") message = " ! - Skipping logentry: %s - no author for entry in year '%s'" % (title, expedition.year) DataIssue.objects.create(parser='logbooks', message=message) + logdataissues["title"]=message return # This needs attention. The slug field is derived from 'title' @@ -133,7 +135,7 @@ def ParseDate(tripdate, year): else: message = " ! - Bad date in logbook: " + tripdate + " - " + year DataIssue.objects.create(parser='logbooks', message=message) - logdataissues["author"]=message + logdataissues["tripdate"]=message assert False, tripdate return datetime.date(year, month, day) @@ -254,57 +256,77 @@ def Parseloghtmltxt(year, expedition, txt): "html", tripid1, logbook_entry_count) # main parser for 1991 - 2001. simpler because the data has been hacked so much to fit it +# trying it out for years 1982 - 1990 too. Some logbook editing required by hand.. def Parseloghtml01(year, expedition, txt): global logentries global logdataissues + errorcount = 0 tripparas = re.findall(r"([\s\S]*?)(?=)?(.*?)(.*)$(?i)", trippara) + if not s: + message = " ! - Skipping logentry on failure to parse header: " + tripentry + trippara[:300] + "..." + DataIssue.objects.create(parser='logbooks', message=message) + logdataissues[tripentry]=message + print(message) + break + tripheader, triptext = s.group(1), s.group(2) + mtripid = re.search(r']*>", "", tripheader) + + tripdate, triptitle, trippeople = tripheader.split("|") + ldate = ParseDate(tripdate.strip(), year) - s = re.match("(?s)\s*(?:

)?(.*?)(.*)$(?i)", trippara) - assert s, trippara[:300] - tripheader, triptext = s.group(1), s.group(2) - mtripid = re.search(r']*>", "", tripheader) + mtu = re.search(r']*>(T/?U.*)', triptext) + if mtu: + tu = mtu.group(1) + triptext = triptext[:mtu.start(0)] + triptext[mtu.end():] + else: + tu = "" - tripdate, triptitle, trippeople = tripheader.split("|") - ldate = ParseDate(tripdate.strip(), year) - - mtu = re.search(r']*>(T/?U.*)', triptext) - if mtu: - tu = mtu.group(1) - triptext = triptext[:mtu.start(0)] + triptext[mtu.end():] - else: - tu = "" + triptitles = triptitle.split(" - ") + tripcave = triptitles[0].strip() - triptitles = triptitle.split(" - ") - tripcave = triptitles[0].strip() + ltriptext = triptext + + mtail = re.search(r'(?:[^<]*|\s|/|-|&||\((?:same day|\d+)\))*$', ltriptext) + if mtail: + ltriptext = ltriptext[:mtail.start(0)] + ltriptext = re.sub(r"

", "", ltriptext) + ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext) + ltriptext = re.sub(r"

|
", "\n\n", ltriptext).strip() + ltriptext = re.sub(r"", "_", ltriptext) + ltriptext = re.sub(r"", "''", ltriptext) + ltriptext = re.sub(r"", "'''", ltriptext) - ltriptext = triptext - - mtail = re.search(r'(?:[^<]*|\s|/|-|&||\((?:same day|\d+)\))*$', ltriptext) - if mtail: - ltriptext = ltriptext[:mtail.start(0)] - ltriptext = re.sub(r"

", "", ltriptext) - ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext) - ltriptext = re.sub(r"

|
", "\n\n", ltriptext).strip() - ltriptext = re.sub(r"", "_", ltriptext) - ltriptext = re.sub(r"", "''", ltriptext) - ltriptext = re.sub(r"", "'''", ltriptext) + entrytuple = (ldate, tripcave, triptitle, ltriptext, + trippeople, expedition, tu, "html01", tripid) + logentries.append(entrytuple) - entrytuple = (ldate, tripcave, triptitle, ltriptext, - trippeople, expedition, tu, "html01", tripid) - logentries.append(entrytuple) + EnterLogIntoDbase(date=ldate, place=tripcave, title=triptitle, text=ltriptext, + trippeople=trippeople, expedition=expedition, logtime_underground=0, + entry_type="html") - EnterLogIntoDbase(date=ldate, place=tripcave, title=triptitle, text=ltriptext, - trippeople=trippeople, expedition=expedition, logtime_underground=0, - entry_type="html") - - EnterLogIntoObjStore(year, ldate, tripcave, triptitle, ltriptext, trippeople, tu, - "html01", tripid, logbook_entry_count) + EnterLogIntoObjStore(year, ldate, tripcave, triptitle, ltriptext, trippeople, tu, + "html01", tripid, logbook_entry_count) + except: + message = " ! - Skipping logentry due to exception in: " + tripentry + DataIssue.objects.create(parser='logbooks', message=message) + logdataissues[tripentry]=message + print(message) + errorcount += 1 + if errorcount >5 : + message = " !!- TOO MANY ERRORS - aborting logbook: " + year + DataIssue.objects.create(parser='logbooks', message=message) + logdataissues[tripentry]=message + print(message) + return # parser for 2003 def Parseloghtml03(year, expedition, txt): @@ -473,6 +495,8 @@ def LoadLogbookForExpedition(expedition,expect): def LoadLogbooks(): """ This is the master function for parsing all logbooks into the Troggle database. + Parser settings appropriate for each year are set in settings.py LOGBOOK_PARSER_SETTINGS. + This should be rewritten to use coroutines to load all logbooks from disc in parallel. """ global logdataissues @@ -481,13 +505,14 @@ def LoadLogbooks(): expos = Expedition.objects.all() if len(expos) <= 1: print(" ! No expeditions found. Load 'people' first.\n") - nologbook = ["1976", "1977","1978","1979","1980","1980","1981","1983","1984", - "1985","1986","1987","1988","1989","1990",] - entries = {"2020": 0, "2019": 20, "2018": 74, "2017": 60, "2016": 81, "2015": 79, + nologbook = ["1976", "1977", "1978", "1979", "1980", "1981", "1986", "2020",] + entries = {"2021": 0, "2019": 20, "2018": 74, "2017": 60, "2016": 81, "2015": 79, "2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52, "2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 40, "2002": 31, "2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41, - "1994": 32, "1993": 41, "1992": 61, "1991": 38, "1982": 0} + "1994": 32, "1993": 41, "1992": 61, "1991": 38, "1990": 87, "1989": 1,"1988": 1,"1987": 1, + "1985": 1,"1984": 1,"1983": 1,"1982": 42,} + # Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, or need hand-editing. try: os.remove("loadlogbk.log") except OSError: @@ -503,8 +528,8 @@ def LoadLogbooks(): nlbe[expo.year]=numentries expd[expo.year]= 0 print("** total trips in ObjStore:", len(trips)) - for i in logdataissues: - print("{:15s}: {}".format(i, logdataissues[i])) + #for i in logdataissues: + # print("{:15s}: {}".format(i, logdataissues[i])) for lbe in trips: year, date, tripcave, triptitle, text, trippeople, tu, formattype = trips[lbe] @@ -513,7 +538,7 @@ def LoadLogbooks(): for y in expd: print("{} {}".format(y, expd[y]), nlbe[y]) yt += expd[y] - print("{} total".format(yt)) + print("total {} log entries in all expeditions".format(yt)) with shelve.open('logbktrips.shelve',writeback=True) as odb: for lbe in trips: diff --git a/parsers/people.py b/parsers/people.py index e997b8c..bd67e8a 100644 --- a/parsers/people.py +++ b/parsers/people.py @@ -5,6 +5,12 @@ from utils import save_carefully from html.parser import HTMLParser from unidecode import unidecode +'''These functions do not match how the stand-alone script works. So the script produces an HTML file which has +href links to pages in troggle which troggle does not think are right. +The standalone script needs to be renedred defucnt, and all the parsing needs to be in troggle. Either that, +or they should use the same code by importing a module. +''' + def parseMugShotAndBlurb(personline, header, person): """create mugshot Photo instance""" mugShotFilename=personline[header["Mugshot"]] diff --git a/parsers/survex.py b/parsers/survex.py index 021244e..ff2e646 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -1252,6 +1252,7 @@ def LoadPositions(): print(" - Regenerating stale cavern .log and .3d for '{}'\n days old: {:.1f} {:.1f} {:.1f}". format(topdata, (svx_t - d3d_t)/(24*3600), (cav_t - d3d_t)/(24*3600), (now - d3d_t)/(24*3600))) call([settings.CAVERN, "--log", "--output={}".format(topdata), "{}.svx".format(topdata)]) + print(" - Regenerating {} {}.3d in {}".format(settings.THREEDTOPOS, topdata, settings.SURVEX_DATA)) call([settings.THREEDTOPOS, '{}.3d'.format(topdata)], cwd = settings.SURVEX_DATA) topdata = settings.SURVEX_DATA + settings.SURVEX_TOPNAME diff --git a/settings.py b/settings.py index 6c1bec1..f8ccdbd 100644 --- a/settings.py +++ b/settings.py @@ -87,6 +87,14 @@ LOGBOOK_PARSER_SETTINGS = { "1993": ("1993/log.htm", "Parseloghtml01"), "1992": ("1992/log.htm", "Parseloghtml01"), "1991": ("1991/log.htm", "Parseloghtml01"), + "1990": ("1990/log.htm", "Parseloghtml01"), + "1989": ("1989/log.htm", "Parseloghtml01"), + "1988": ("1988/log.htm", "Parseloghtml01"), + "1987": ("1987/log.htm", "Parseloghtml01"), + "1985": ("1985/log.htm", "Parseloghtml01"), + "1984": ("1984/log.htm", "Parseloghtml01"), + "1983": ("1983/log.htm", "Parseloghtml01"), + "1982": ("1982/log.htm", "Parseloghtml01"), } APPEND_SLASH = False diff --git a/templates/base.html b/templates/base.html index 282ec3e..cd8da71 100644 --- a/templates/base.html +++ b/templates/base.html @@ -16,7 +16,7 @@