Importing old logbooks

2025-12-14 05:55:06 +00:00 · 2021-02-06 00:18:48 +00:00
parent a4d7183260
commit 5836c6ff90
6 changed files with 356 additions and 48 deletions
--- a/core/views_logbooks2.py
+++ b/core/views_logbooks2.py
@@ -0,0 +1,268 @@
+import datetime
+import os.path
+import re
+
+import django.db.models
+from django.db.models import Min, Max
+from django.urls import reverse
+from django.http import HttpResponse, HttpResponseRedirect
+from django.shortcuts import render, render_to_response
+from django.template import Context, loader
+from django.template.defaultfilters import slugify
+from django.utils import timezone
+from django.views.generic.list import ListView
+
+import troggle.core.models as models
+import troggle.parsers.logbooks as logbookparsers
+from troggle.core.forms import getTripForm  # , get_name, PersonForm
+from troggle.core.models import Expedition, Person, PersonExpedition
+from troggle.core.models_caves import LogbookEntry, PersonTrip
+from troggle.core.models_survex import SurvexBlock
+from troggle.helper import login_required_if_public
+from troggle.parsers.logbooks import LoadLogbookForExpedition
+from troggle.parsers.people import GetPersonExpeditionNameLookup
+
+import troggle.settings as settings
+
+# Django uses Context, not RequestContext when you call render
+# to_response. We always want to use RequestContext, so that
+# django adds the context from settings.TEMPLATE_CONTEXT_PROCESSORS.
+# This way we automatically get necessary settings variables passed
+# to each template. So we use a custom method, render_response
+# instead of render_to_response. Hopefully future Django releases
+# will make this unnecessary.
+# from troggle.alwaysUseRequestContext import render_response
+
+# Deprecated in 1.11.29
+# @django.db.models.permalink #this allows the nice get_absolute_url syntax we are using
+
+def getNotablePersons():
+    notablepersons = []
+    for person in Person.objects.all():
+            if person.bisnotable():
+                notablepersons.append(person)
+    return notablepersons
+
+
+def personindex(request):
+    persons = Person.objects.all()
+    # From what I can tell, "persons" seems to be the table rows, while "personss" is the table columns. - AC 16 Feb 09
+    personss = [ ]
+    ncols = 4
+    nc = int((len(persons) + ncols - 1) / ncols)
+    for i in range(ncols):
+        personss.append(persons[i * nc: (i + 1) * nc])
+    
+    notablepersons = []
+    for person in Person.objects.all():
+            if person.bisnotable():
+                notablepersons.append(person)
+
+    return render(request,'personindex.html', {'persons': persons, 'personss':personss, 'notablepersons':notablepersons})
+
+
+def expedition(request, expeditionname):
+    this_expedition = Expedition.objects.get(year=int(expeditionname))
+    expeditions = Expedition.objects.all()
+    personexpeditiondays = [ ]
+    dateditems = list(this_expedition.logbookentry_set.all()) + list(this_expedition.survexblock_set.all())
+    dates = sorted(set([item.date for item in dateditems]))
+    for personexpedition in this_expedition.personexpedition_set.all():
+        prow = [ ]
+        for date in dates:
+            pcell = { "persontrips": PersonTrip.objects.filter(personexpedition=personexpedition, 
+                                                                logbook_entry__date=date) }
+            pcell["survexblocks"] = set(SurvexBlock.objects.filter(survexpersonrole__personexpedition=personexpedition, 
+                                                                    date = date))
+            prow.append(pcell)
+        personexpeditiondays.append({"personexpedition":personexpedition, "personrow":prow})
+        
+    if "reload" in request.GET:
+        LoadLogbookForExpedition(this_expedition)
+    return render(request,'expedition.html', {'expedition': this_expedition, 'expeditions':expeditions, 'personexpeditiondays':personexpeditiondays, 'settings':settings, 'dateditems': dateditems })
+
+def get_absolute_url(self):
+    return ('expedition', (expedition.year))
+
+class ExpeditionListView(ListView): # django thus expects a template called "expedition_list.html" 
+# from the name of the object not the name of the class.
+    model = Expedition
+
+
+class Expeditions_tsvListView(ListView): 
+    """This uses the Django built-in shortcut mechanism 
+    It defaults to use a template with name <app-label>/<model-name>_list.html.
+    https://www.agiliq.com/blog/2017/12/when-and-how-use-django-listview/
+    https://developer.mozilla.org/en-US/docs/Learn/Server-side/Django/Generic_views
+    Either a queryset variable  or set_queryset() function is used, but not needed
+    if you want all the obejcts of a particaulr type in which case just set model = <object>
+    """
+    template_name = 'core/expeditions_tsv_list.html' # if not present then uses core/expedition_list.html 
+    #queryset = Expedition.objects.all()
+    #context_object_name = 'expedition'
+    model = Expedition # equivalent to .objects.all() for a queryset
+
+class Expeditions_jsonListView(ListView): 
+    template_name = 'core/expeditions_json_list.html' 
+    model = Expedition 
+
+
+def person(request, first_name='', last_name='', ):
+    this_person = Person.objects.get(first_name = first_name, last_name = last_name)
+    
+    # This is for removing the reference to the user's profile, in case they set it to the wrong person
+    if request.method == 'GET':
+        if request.GET.get('clear_profile')=='True':
+            this_person.user=None
+            this_person.save()
+            return HttpResponseRedirect(reverse('profiles_select_profile'))
+    
+    return render(request,'person.html', {'person': this_person, })
+
+
+def GetPersonChronology(personexpedition):
+    '''Horrible bug here whern there is more than one survex block per day, it duplicates the entry but gets it wrong
+    Fortunately this is just the display on this page which is wroing, no bad calculations get into the database.
+    '''
+    res = { }
+    for persontrip in personexpedition.persontrip_set.all():
+        a = res.setdefault(persontrip.logbook_entry.date, { })
+        a.setdefault("persontrips", [ ]).append(persontrip)
+
+    for personrole in personexpedition.survexpersonrole_set.all():
+        a = res.setdefault(personrole.survexblock.date, { })
+        a.setdefault("personroles", [ ]).append(personrole.survexblock)
+    
+    # build up the tables
+    rdates = sorted(list(res.keys()))
+
+    res2 = [ ]
+    for rdate in rdates:
+        persontrips   = res[rdate].get("persontrips", [])
+        personroles   = res[rdate].get("personroles", [])
+        for n in range(max(len(persontrips), len(personroles) )):
+            res2.append(((n == 0 and rdate or "--"), (n < len(persontrips) and persontrips[n]), (n < len(personroles) and personroles[n]) ))
+            
+    return res2
+
+
+def personexpedition(request, first_name='',  last_name='', year=''):
+    person = Person.objects.get(first_name = first_name, last_name = last_name)
+    this_expedition = Expedition.objects.get(year=year)
+    personexpedition = person.personexpedition_set.get(expedition=this_expedition)
+    personchronology = GetPersonChronology(personexpedition)
+    return render(request,'personexpedition.html', {'personexpedition': personexpedition, 'personchronology':personchronology})
+
+
+def logbookentry(request, date, slug):
+    this_logbookentry = LogbookEntry.objects.filter(date=date, slug=slug)
+
+    if len(this_logbookentry)>1:
+        return render(request, 'object_list.html',{'object_list':this_logbookentry})
+    else:
+        this_logbookentry=this_logbookentry[0]
+        return render(request, 'logbookentry.html', {'logbookentry': this_logbookentry})
+
+
+def logbookSearch(request, extra):
+    query_string = ''
+    found_entries = None
+    if ('q' in request.GET) and request.GET['q'].strip():
+        query_string = request.GET['q']
+    entry_query = search.get_query(query_string, ['text','title',])
+    found_entries = LogbookEntry.objects.filter(entry_query)
+
+    return render(request,'logbooksearch.html',
+                          { 'query_string': query_string, 'found_entries': found_entries, })
+                          #context_instance=RequestContext(request))
+
+def personForm(request,pk):
+    person=Person.objects.get(pk=pk)
+    form=PersonForm(instance=person)
+    return render(request,'personform.html', {'form':form,})
+
+# tried to delete all this, and the reference in urls.py, but got impenetrable django error message
+# @login_required_if_public
+# def newLogbookEntry(request, expeditionyear, pdate = None, pslug = None):
+    # expedition = Expedition.objects.get(year=expeditionyear)
+    # PersonTripFormSet, TripForm = getTripForm(expedition)
+    # if pslug and pdate:
+        # previousdate = datetime.date(*[int(x) for x in pdate.split("-")])
+        # previouslbe = LogbookEntry.objects.get(slug = pslug, date = previousdate, expedition = expedition)
+        # assert previouslbe.filename
+    # if request.method == 'POST': # If the form has been submitted...
+        # tripForm = TripForm(request.POST) # A form bound to the POST data
+        # personTripFormSet = PersonTripFormSet(request.POST)
+        # if tripForm.is_valid() and personTripFormSet.is_valid(): # All validation rules pass
+            # dateStr = tripForm.cleaned_data["date"].strftime("%Y-%m-%d")
+            # directory = os.path.join(settings.EXPOWEB, 
+                                    # "years", 
+                                    # expedition.year, 
+                                    # "autologbook")
+            # filename = os.path.join(directory, 
+                                    # dateStr + "." + slugify(tripForm.cleaned_data["title"])[:50] + ".html")
+            # if not os.path.isdir(directory):
+                # os.mkdir(directory)
+            # if pslug and pdate:
+                # delLogbookEntry(previouslbe)
+            # f = open(filename, "w")
+            # template = loader.get_template('dataformat/logbookentry.html')
+            # context = Context({'trip': tripForm.cleaned_data, 
+                               # 'persons': personTripFormSet.cleaned_data,
+                               # 'date': dateStr,
+                               # 'expeditionyear': expeditionyear})
+            # f.write(template.render(context))
+            # f.close()
+            # print((logbookparsers.parseAutoLogBookEntry(filename)))
+            # return HttpResponseRedirect(reverse('expedition', args=[expedition.year])) # Redirect after POST
+    # else:
+        # if pslug and pdate:
+            # if previouslbe.cave:
+                # tripForm = TripForm(initial={"date": previousdate,
+                                             # "title": previouslbe.title,
+                                             # "cave": previouslbe.cave.reference(),
+                                             # "location": None,
+                                             # "caveOrLocation": "cave",
+                                             # "html": previouslbe.text})
+            # else:
+                # tripForm = TripForm(initial={"date": previousdate,
+                                             # "title": previouslbe.title,
+                                             # "cave": None,
+                                             # "location": previouslbe.place,
+                                             # "caveOrLocation": "location",
+                                             # "html": previouslbe.text})
+            # personTripFormSet = PersonTripFormSet(initial=[{"name": get_name(py.personexpedition), 
+                                                            # "TU": py.time_underground, 
+                                                            # "author": py.is_logbook_entry_author}
+                                                           # for py in previouslbe.persontrip_set.all()])
+        # else:        
+            # tripForm = TripForm() # An unbound form
+            # personTripFormSet = PersonTripFormSet()
+
+    # return render(request, 'newlogbookentry.html', {
+        # 'tripForm': tripForm,
+        # 'personTripFormSet': personTripFormSet,
+
+    # })
+
+# @login_required_if_public
+# def deleteLogbookEntry(request, expeditionyear, date = None, slug = None):
+    # expedition = Expedition.objects.get(year=expeditionyear)
+    # previousdate = datetime.date(*[int(x) for x in date.split("-")])
+    # previouslbe = LogbookEntry.objects.get(slug = slug, date = previousdate, expedition = expedition)
+    # delLogbookEntry(previouslbe)
+    # return HttpResponseRedirect(reverse('expedition', args=[expedition.year])) # Redirect after POST
+
+# def delLogbookEntry(lbe):
+    # for pt in lbe.persontrip_set.all():
+        # pt.delete()
+    # lbe.delete()
+    # os.remove(lbe.filename)
+
+def get_people(request, expeditionslug):
+    exp = Expedition.objects.get(year = expeditionslug)
+    return render(request,'options.html', {"items": [(pe.slug, pe.name) for pe in exp.personexpedition_set.all()]})
+
+def get_logbook_entries(request, expeditionslug):
+    exp = Expedition.objects.get(year = expeditionslug)
+    return render(request,'options.html', {"items": [(le.slug, "%s - %s" % (le.date, le.title)) for le in exp.logbookentry_set.all()]})
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -37,9 +37,10 @@ def GetTripPersons(trippeople, expedition, logtime_underground):
            tripperson = re.sub(round_bracket_regex, "", tripperson).strip()
            personyear = GetPersonExpeditionNameLookup(expedition).get(tripperson.lower())
            if not personyear:
-                print(("   - No name match for: '%s'" % tripperson))
-                message = "No name match for: '%s' in year '%s'" % (tripperson, expedition.year)
+                message = "No name match for: ||'%s'|| in year '%s'" % (tripperson, expedition.year)
+                print(message)
                DataIssue.objects.create(parser='logbooks', message=message)
+                logdataissues[expedition.year + "~" + tripperson]=message
            res.append((personyear, logtime_underground))
            if mul:
                author = personyear
@@ -91,6 +92,7 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
        print(" ! - Skipping logentry: " + title + " - no author for entry")
        message = " ! - Skipping logentry: %s - no author for entry in year '%s'" % (title, expedition.year)
        DataIssue.objects.create(parser='logbooks', message=message)
+        logdataissues["title"]=message
        return

    # This needs attention. The slug field is derived from 'title'
@@ -133,7 +135,7 @@ def ParseDate(tripdate, year):
    else:
        message = " ! - Bad date in logbook: " + tripdate + " - " + year
        DataIssue.objects.create(parser='logbooks', message=message)
-        logdataissues["author"]=message
+        logdataissues["tripdate"]=message
        assert False, tripdate

    return datetime.date(year, month, day)
@@ -254,17 +256,25 @@ def Parseloghtmltxt(year, expedition, txt):
                            "html", tripid1, logbook_entry_count)

 # main parser for 1991 - 2001.  simpler because the data has been hacked so much to fit it
+# trying it out for years 1982 - 1990 too. Some logbook editing required by hand..
 def Parseloghtml01(year, expedition, txt):
    global logentries
    global logdataissues
+    errorcount = 0

    tripparas = re.findall(r"<hr[\s/]*>([\s\S]*?)(?=<hr)", txt)
    logbook_entry_count = 0
    for trippara in tripparas:
        logbook_entry_count += 1
-        
+        try:
+            tripentry = year + "." + str(logbook_entry_count)
            s = re.match("(?s)\s*(?:<p>)?(.*?)</?p>(.*)$(?i)", trippara)
-        assert s, trippara[:300]
+            if not s:
+                message = " ! - Skipping logentry on failure to parse header: " + tripentry + trippara[:300] + "..."
+                DataIssue.objects.create(parser='logbooks', message=message)
+                logdataissues[tripentry]=message
+                print(message)
+                break
            tripheader, triptext = s.group(1), s.group(2)
            mtripid = re.search(r'<a id="(.*?)"', tripheader)
            tripid = mtripid and mtripid.group(1) or ""
@@ -305,6 +315,18 @@ def Parseloghtml01(year, expedition, txt):

            EnterLogIntoObjStore(year, ldate, tripcave, triptitle, ltriptext, trippeople, tu, 
                                "html01", tripid, logbook_entry_count)
+        except:
+            message = " ! - Skipping logentry due to exception in: " + tripentry 
+            DataIssue.objects.create(parser='logbooks', message=message)
+            logdataissues[tripentry]=message
+            print(message)
+            errorcount += 1
+            if errorcount >5 :
+                message = " !!- TOO MANY ERRORS - aborting logbook: " + year
+                DataIssue.objects.create(parser='logbooks', message=message)
+                logdataissues[tripentry]=message
+                print(message)
+                return

 # parser for 2003
 def Parseloghtml03(year, expedition, txt):
@@ -473,6 +495,8 @@ def LoadLogbookForExpedition(expedition,expect):

 def LoadLogbooks():
    """ This is the master function for parsing all logbooks into the Troggle database. 
+    Parser settings appropriate for each year are set in settings.py LOGBOOK_PARSER_SETTINGS.
+    This should be rewritten to use coroutines to load all logbooks from disc in parallel.
    """
    global logdataissues

@@ -481,13 +505,14 @@ def LoadLogbooks():
    expos = Expedition.objects.all()
    if len(expos) <= 1:
        print(" ! No expeditions found. Load 'people' first.\n")
-    nologbook = ["1976", "1977","1978","1979","1980","1980","1981","1983","1984",
-    "1985","1986","1987","1988","1989","1990",]
-    entries = {"2020": 0, "2019": 20, "2018": 74, "2017": 60, "2016": 81, "2015": 79, 
+    nologbook = ["1976", "1977", "1978", "1979", "1980", "1981", "1986", "2020",]
+    entries = {"2021": 0, "2019": 20, "2018": 74, "2017": 60, "2016": 81, "2015": 79, 
        "2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52, 
        "2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 40, "2002": 31, 
        "2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41, 
-        "1994": 32, "1993": 41, "1992": 61, "1991": 38, "1982": 0}
+        "1994": 32, "1993": 41, "1992": 61, "1991": 38, "1990": 87, "1989": 1,"1988": 1,"1987": 1,
+        "1985": 1,"1984": 1,"1983": 1,"1982": 42,}
+    # Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, or need hand-editing.
    try:
        os.remove("loadlogbk.log")
    except OSError:
@@ -503,8 +528,8 @@ def LoadLogbooks():
                nlbe[expo.year]=numentries
                expd[expo.year]= 0
    print("** total trips in ObjStore:", len(trips))
-    for i in logdataissues:
-        print("{:15s}:   {}".format(i, logdataissues[i]))
+    #for i in logdataissues:
+    #    print("{:15s}:   {}".format(i, logdataissues[i]))
        
    for lbe in trips:
        year, date, tripcave, triptitle, text, trippeople, tu, formattype = trips[lbe]
@@ -513,7 +538,7 @@ def LoadLogbooks():
    for y in expd:
        print("{} {}".format(y, expd[y]), nlbe[y])
        yt += expd[y]
-    print("{} total".format(yt))
+    print("total {} log entries in all expeditions".format(yt))
    
    with shelve.open('logbktrips.shelve',writeback=True) as odb:
        for lbe in trips:
--- a/parsers/people.py
+++ b/parsers/people.py
@@ -5,6 +5,12 @@ from utils import save_carefully
 from html.parser import HTMLParser
 from unidecode import unidecode

+'''These functions do not match how the stand-alone script works. So the script produces an HTML file which has 
+href links to pages in troggle which troggle does not think are right.
+The standalone script needs to be renedred defucnt, and all the parsing needs to be in troggle. Either that,
+or they should use the same code by importing a module.
+'''
+
 def parseMugShotAndBlurb(personline, header, person):
    """create mugshot Photo instance"""
    mugShotFilename=personline[header["Mugshot"]]
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -1252,6 +1252,7 @@ def LoadPositions():
        print(" -  Regenerating stale cavern .log and .3d for '{}'\n    days old: {:.1f}    {:.1f}    {:.1f}".
            format(topdata, (svx_t - d3d_t)/(24*3600), (cav_t - d3d_t)/(24*3600), (now - d3d_t)/(24*3600)))
        call([settings.CAVERN, "--log", "--output={}".format(topdata), "{}.svx".format(topdata)])
+        print(" -  Regenerating {} {}.3d  in  {}".format(settings.THREEDTOPOS, topdata, settings.SURVEX_DATA))
        call([settings.THREEDTOPOS, '{}.3d'.format(topdata)], cwd = settings.SURVEX_DATA)

    topdata = settings.SURVEX_DATA + settings.SURVEX_TOPNAME
--- a/settings.py
+++ b/settings.py
@@ -87,6 +87,14 @@ LOGBOOK_PARSER_SETTINGS = {
                "1993": ("1993/log.htm", "Parseloghtml01"), 
                "1992": ("1992/log.htm", "Parseloghtml01"), 
                "1991": ("1991/log.htm", "Parseloghtml01"), 
+                "1990": ("1990/log.htm", "Parseloghtml01"), 
+                "1989": ("1989/log.htm", "Parseloghtml01"), 
+                "1988": ("1988/log.htm", "Parseloghtml01"), 
+                "1987": ("1987/log.htm", "Parseloghtml01"), 
+                "1985": ("1985/log.htm", "Parseloghtml01"), 
+                "1984": ("1984/log.htm", "Parseloghtml01"), 
+                "1983": ("1983/log.htm", "Parseloghtml01"), 
+                "1982": ("1982/log.htm", "Parseloghtml01"), 
            }

 APPEND_SLASH = False
--- a/templates/base.html
+++ b/templates/base.html
@@ -16,7 +16,7 @@
 <body  onLoad="contentHeight();">

 <div id="header"> 
-  <h1>CUCC Expeditions to Austria: 1976 - 2020</h1>
+  <h1>CUCC Expeditions to Austria: 1976 - 2021</h1>
  <div id="editLinks"> {% block loginInfo %}
    <a href="{{settings.EXPOWEB_URL}}">Website home</a> |
    {% if user.username %}