From cabcada0b8738366bce33173ad1b3d376e8fb73c Mon Sep 17 00:00:00 2001
From: Philip Sargent <philip.sargent@gmail.com>
Date: Fri, 9 Dec 2022 23:45:07 +0000
Subject: [PATCH] 2003 logbook export/re-import as now HTML format

---
 core/views/other.py             | 34 +++++--------
 databaseReset.py                |  7 ++-
 parsers/imports.py              |  5 ++
 parsers/logbooks.py             | 84 ++++++++++++++++++++++-----------
 templates/controlPanel.html     |  5 +-
 templates/logbook2005style.html | 33 ++++++-------
 6 files changed, 98 insertions(+), 70 deletions(-)

diff --git a/core/views/other.py b/core/views/other.py
index aadb613..88d0589 100644
--- a/core/views/other.py
+++ b/core/views/other.py
@@ -148,15 +148,13 @@ def controlpanel(request):
 
 
 def exportlogbook(request,year=None,extension=None):
-    '''Constructs, from the database, a complete HTML formatted logbook - but TEXT ONLY
-    for the current year. Formats available are HTML2005 or HTML2022 (planned)
+    '''Constructs, from the database, a complete HTML formatted logbook 
+    for the current year. Formats available are HTML2005 (others old & broken or not written yet)
     
     There are no images stored in the database, so this is only a tool for a first pass, to be followed by
-    extensive hand-editing.
+     hand-editing. However links to images work int he HTML text of a logbook entry
     
-    NEED TO ADD IN THE MATERIAL WHICH IS NOT IN ANY LBE ! e.g. front matter.
-    
-    This function DOES NOT WORK.
+    NEED TO ADD IN THE MATERIAL WHICH IS NOT IN ANY LBE ! e.g. front matter.   
     
     This function is the recipient of the POST action os the export form in the control panel
     '''
@@ -170,26 +168,18 @@ def exportlogbook(request,year=None,extension=None):
     else:
         print(f'Logbook export {request.POST}')
 
-        if request.POST.get("year", '2016'):
-            year =  request.POST['year'] 
-        if request.POST.get("extension", 'html'):
-            extension =  request.POST['extension'] # e.g. html
-
+        year =  request.POST['year'] 
         current_expedition=Expedition.objects.get(year=year)
         logbook_entries=LogbookEntry.objects.filter(expedition=current_expedition).order_by('date') # need to be sorted by date!
         
-        #print(f'Logbook has {len(logbook_entries)} entries in it.')
+        print(f'Logbook has {len(logbook_entries)} entries in it.')
 
-        if extension == 'html2005':
-            response = HttpResponse(content_type='text/html')
-            style='2005'
-        else :
-            extension == 'html2022'
-            response = HttpResponse(content_type='text/html')
-            style='2022'
-           
-        filename='newlogbook.' + extension
-        template='logbook'+style+'style.'+extension
+        extension ='html'
+        response = HttpResponse(content_type='text/html')
+        style='2005'
+            
+        filename='logbook-new-format.' + extension
+        template='logbook'+style+'style.'+ extension
         response['Content-Disposition'] = 'attachment; filename='+filename
         t=loader.get_template(template)
         logbookfile = (t.render({'logbook_entries':logbook_entries}))
diff --git a/databaseReset.py b/databaseReset.py
index 630d3c6..db7a781 100644
--- a/databaseReset.py
+++ b/databaseReset.py
@@ -47,7 +47,7 @@ from django.db import transaction
 from troggle.core.utils import get_process_memory
 from troggle.core.models.caves import Cave, Entrance
 from troggle.parsers.imports import import_caves, import_people, import_surveyscans, \
-        import_logbooks, import_QMs, import_survex, import_loadpos, import_drawingsfiles
+        import_logbooks, import_logbook, import_QMs, import_survex, import_loadpos, import_drawingsfiles
 
 if os.geteuid() == 0:
     # This protects the server from having the wrong file permissions written on logs and caches
@@ -343,7 +343,8 @@ def usage():
              drawings  - read in the Tunnel & Therion files - which scans the survey scans too
              survex    - read in the survex files - all the survex blocks and entrances x/y/z 
 
-             dumplogbooks - Not used. write out autologbooks (not working?)
+             dumplogbooks - Not used. write out autologbooks (not working? use http://localhost:8000/controlpanel )
+             logbook   - read a single logbook. Defautl set in python code
 
              and [runlabel] is an optional string identifying this run of the script
              in the stored profiling data 'import-profile.json'
@@ -394,6 +395,8 @@ if __name__ == "__main__":
         jq.enq("caves",import_caves)
     elif "logbooks" in sys.argv:
         jq.enq("logbooks",import_logbooks)
+    elif "logbook" in sys.argv:
+        jq.enq("logbooks",import_logbook) # default year set in imports.py
     elif "people" in sys.argv:
         jq.enq("people",import_people)
     elif "QMs" in sys.argv:
diff --git a/parsers/imports.py b/parsers/imports.py
index df6e583..9cc945c 100644
--- a/parsers/imports.py
+++ b/parsers/imports.py
@@ -41,6 +41,11 @@ def import_logbooks():
     with transaction.atomic():
         troggle.parsers.logbooks.LoadLogbooks()
 
+def import_logbook(year=2003):
+    print(f"-- Importing Logbook {year}")
+    with transaction.atomic():
+        troggle.parsers.logbooks.LoadLogbook(year)
+
 def import_QMs():
     print("-- Importing old QMs for 161, 204, 234 from CSV files")
     with transaction.atomic():
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index b67369c..889387d 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -26,6 +26,8 @@ Parses and imports logbooks in all their wonderful confusion
 todo='''
 - refactor everything with some urgency, esp. LoadLogbookForExpedition()
 
+- remove the TROG and lbo things since we need the database for multiuser access? Or not?
+
 - profile the code to find bad repetitive things, of which there are many.
 
 - far too many uses of Django field dereferencing to get values, which is SLOW
@@ -55,15 +57,15 @@ DEFAULT_LOGBOOK_FILE = "logbook.html"
 # but several don't work, and are skipped by the parsing code, e.g. 1983
 LOGBOOK_PARSER_SETTINGS = {
                 "2010": ("logbook.html", "parser_html"), 
-                "2009": ("2009logbook.txt", "parser_wiki"), 
-                "2008": ("2008logbook.txt", "parser_wiki"), 
+                "2009": ("2009logbook.txt", "wiki_parser"), 
+                "2008": ("2008logbook.txt", "wiki_parser"), 
                 "2007": ("logbook.html", "parser_html"), 
                 "2006": ("logbook.html", "parser_html"), 
-#               "2006": ("logbook/logbook_06.txt", "parser_wiki"), 
+#               "2006": ("logbook/logbook_06.txt", "wiki_parser"), 
                 "2006": ("logbook.html", "parser_html"), 
                 "2005": ("logbook.html", "parser_html"), 
                 "2004": ("logbook.html", "parser_html"), 
-                "2003": ("logbook.html", "parser_html_03"), 
+                "2003": ("logbook.html", "parser_html"), 
                 "2002": ("logbook.html", "parser_html"), 
                 "2001": ("log.htm", "parser_html_01"), 
                 "2000": ("log.htm", "parser_html_01"), 
@@ -88,7 +90,7 @@ LOGBOOK_PARSER_SETTINGS = {
 
 entries = { "2022": 64, "2019": 56, "2018": 74, "2017": 60, "2016": 81, "2015": 79, 
     "2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52, 
-    "2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 40, "2002": 31, 
+    "2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 42, "2002": 31, 
     "2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41, 
     "1994": 32, "1993": 41, "1992": 61, "1991": 38, "1990": 87, "1989": 1,"1988": 1,"1987": 1,
     "1985": 24,"1984": 32,"1983": 52,"1982": 42,}
@@ -114,8 +116,9 @@ rx_round_bracket = re.compile(r"[\(\[].*?[\)\]]")
 def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
     res = [ ]
     author = None
-    #print(f'# {tid}')
-       
+    # print(f'# {tid}')
+    # print(f" -  {tid} '{trippeople}'  ")
+
     for tripperson in re.split(r",|\+|&amp;|&(?!\w+;)| and ", trippeople):
         tripperson = tripperson.strip()
         # mul = re.match(r"(?i)<u>(.*?)</u>$", tripperson)
@@ -147,6 +150,7 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
             return None, None
         author = res[-1][0]
         
+    #print(f" -  {tid}  [{author.person}] '{res[0][0].person}'...")
     return res, author
 
 def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, tid=None):
@@ -195,9 +199,10 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
     #NEW slug for a logbook entry here! Unique id + slugified title fragment
     
     if tid is not None:
-        slug = tid + "_" + slugify(title)[:10].replace('-','_')
+        slug = tid
+        # slug = tid + "_" + slugify(title)[:10].replace('-','_')
     else: 
-        slug = str(randint(1000,10000)) + "_" + slugify(title)[:10].replace('-','_')
+        slug = str(randint(1000,9999)) + "_" + slugify(title)[:10].replace('-','_')
     nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave_slug':str(cave), 'slug': slug}
     
     # This creates the lbo instance of LogbookEntry
@@ -205,6 +210,7 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
 
     
     for tripperson, time_underground in trippersons:
+        # print(f" -  {tid} '{tripperson}' author:{tripperson == author}")
         lookupAttribs={'personexpedition':tripperson, 'logbook_entry':lbo}
         nonLookupAttribs={'time_underground':time_underground, 'is_logbook_entry_author':(tripperson == author)}
         # this creates the PersonTrip instance. 
@@ -251,7 +257,7 @@ def ParseDate(tripdate, year):
         return datetime.date(1970, 1, 1)
 
 # (2006 - not any more), 2008 - 2009
-def parser_wiki(year, expedition, txt):
+def wiki_parser(year, expedition, txt):
     global logentries
     global logdataissues
 
@@ -316,6 +322,11 @@ def parser_html(year, expedition, txt):
         if s:
             tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups()
         else: # allow title and people to be swapped in order
+            msg = f" !- {year} Can't parse:{logbook_entry_count} '{trippara[:40]}'..."
+            print(msg)
+            DataIssue.objects.create(parser='logbooks', message=msg)
+            logdataissues[tid]=msg
+
             s2 = re.match(r'''(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)?  # second date
                                 \s*(?:<a\s+id="(.*?)"\s*/>\s*</a>)?
                                 \s*<div\s+class="tripdate"\s*(?:id="(.*?)")?>(.*?)</div>(?:<p>)?
@@ -329,7 +340,7 @@ def parser_html(year, expedition, txt):
                 tripid, tripid1, tripdate, triptitle, trippeople, triptext, tu = s2.groups()
             else:
                 if not re.search(r"Rigging Guide", trippara):
-                    msg = f" !- Logbook. Can't parse {tripid1}: {trippara} entry:{logbook_entry_count} "
+                    msg = f" !- Logbook. Can't parse entry on 2nd pass:{logbook_entry_count} '{trippara[:40]}'..."
                     print(msg)
                     DataIssue.objects.create(parser='logbooks', message=msg)
                     logdataissues[tid]=msg
@@ -343,7 +354,7 @@ def parser_html(year, expedition, txt):
             tripcave = "UNKNOWN"
         ltriptext = re.sub(r"</p>", "", triptext)
         ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext)
-        ltriptext = re.sub(r"<p>", "</br></br>", ltriptext).strip()
+        ltriptext = re.sub(r"<p>", "<br /><br />", ltriptext).strip()
 
         entrytuple = (ldate, tripcave, triptitle, ltriptext, 
                 trippeople, expedition, tu, tripid1)
@@ -430,11 +441,11 @@ def parser_html_01(year, expedition, txt):
                 ltriptext = ltriptext[:mtail.start(0)]
             ltriptext = re.sub(r"</p>", "", ltriptext)
             ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext)
-            ltriptext = re.sub(r"<p>|<br>", "\n\n", ltriptext).strip()
             ltriptext = re.sub(r"</?u>", "_", ltriptext)
             ltriptext = re.sub(r"</?i>", "''", ltriptext)
             ltriptext = re.sub(r"</?b>", "'''", ltriptext)
-            
+            ltriptext = re.sub(r"<p>", "<br /><br />", ltriptext).strip()
+           
             if ltriptext == "":
                 message = " ! - Zero content for logbook entry!: " + tid 
                 DataIssue.objects.create(parser='logbooks', message=message)
@@ -469,7 +480,7 @@ def parser_html_03(year, expedition, txt):
     logbook_entry_count = 0
     for trippara in tripparas:
         logbook_entry_count += 1
-        tid = set_trip_id(year,logbook_entry_count)
+        tid = set_trip_id(year,logbook_entry_count) # default trip id, before we read the date
         
         s = re.match(r"(?s)\s*<p>(.*?)</p>(.*)$", trippara)
         if not ( s ) :
@@ -485,23 +496,30 @@ def parser_html_03(year, expedition, txt):
         sheader = tripheader.split(" -- ")
         tu = ""
         if re.match("T/U|Time underwater", sheader[-1]):
-            tu = sheader.pop()
+            tu = sheader.pop() # not a number in 2003 usually
+            # print(f" -  {logbook_entry_count} '{tu}' ")
         if len(sheader) != 3:
-            print(" ! Header not three pieces", sheader)
+            print(" ! Header not three pieces for parser_html_03() ", sheader)
         tripdate, triptitle, trippeople = sheader
         ldate = ParseDate(tripdate.strip(), year)
-        triptitles = triptitle.split(" , ")
-        if len(triptitles) >= 2:
-            tripcave = triptitles[0]
+        # print(f" -  {logbook_entry_count} '{ldate}' from '{tripdate.strip()}' ")
+        # print(f" -  {logbook_entry_count} '{trippeople}'  ")
+        titlelist = triptitle.split(" , ")
+        if len(titlelist) >= 2:
+            location, *namelist = titlelist # list unpacking operator
+            tripname = ", ".join(namelist) # concatenate strings
+            # print(f" -  {logbook_entry_count} {location}  '{tripname}'")
         else:
-            tripcave = "UNKNOWN"
-        ltriptext = re.sub(r"</p>", "", triptext)
-        ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext)
-        ltriptext = re.sub(r"<p>", "\n\n", ltriptext).strip()
-        ltriptext = re.sub(r"[^\s0-9a-zA-Z\-.,:;'!&()\[\]<>?=+*%]", "_NONASCII_", ltriptext)
+            location = "UNKNOWN"
+            
+        ltriptext = triptext + "<br /><br />\n\n" + tu
+        ltriptext = re.sub(r"</p>", "", ltriptext)
+        #ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext)
+        ltriptext = re.sub(r"<p>", "<br /><br />\n\n", ltriptext).strip()
+        #ltriptext = re.sub(r"[^\s0-9a-zA-Z\-.,:;'!&()\[\]<>?=+*%]", "_NONASCII_", ltriptext)
 
 
-        entrytuple = (ldate, tripcave, triptitle, ltriptext, 
+        entrytuple = (ldate, location, tripname, ltriptext, 
                 trippeople, expedition, tu, tid)
         logentries.append(entrytuple)
 
@@ -509,8 +527,8 @@ def parser_html_03(year, expedition, txt):
 def LoadLogbookForExpedition(expedition):
     """ Parses all logbook entries for one expedition 
     """
-    # absolutely horrid. REFACTOR THIS (all my fault..)
     global logentries
+    # absolutely horrid. REFACTOR THIS (all my fault..)
     global logdataissues
     global entries
 
@@ -556,6 +574,10 @@ def LoadLogbookForExpedition(expedition):
         parsefunc   = DEFAULT_LOGBOOK_PARSER
 
     expedition.save()
+    
+    lbes = LogbookEntry.objects.filter(expedition=expedition)
+    for lbe in lbes:
+        lbe.delete()
 
     try:
         file_in = open(logbookpath,'rb')
@@ -594,6 +616,14 @@ def LoadLogbookForExpedition(expedition):
 
     return len(logentries)
 
+def LoadLogbook(year):
+    nlbe={}
+    TROG['pagecache']['expedition'][year] = None # clear cache
+    
+    expo = Expedition.objects.get(year=year)
+    nlbe[expo] = LoadLogbookForExpedition(expo)  # this actually loads the logbook for one expo
+     
+    
 def LoadLogbooks():
     """ This is the master function for parsing all logbooks into the Troggle database. 
     This should be rewritten to use coroutines to load all logbooks from disc in parallel,
diff --git a/templates/controlPanel.html b/templates/controlPanel.html
index 5d4cac0..52b0d7e 100644
--- a/templates/controlPanel.html
+++ b/templates/controlPanel.html
@@ -101,7 +101,7 @@
 
 
 <h3>Export to a different format:</h3>
-<p>This creates 'newlogbook.html' in the years/&lt;year&gt;/ folder
+<p>This creates 'logbook-new-format.html' in the years/&lt;year&gt;/ folder
 <table>
 <tr>
 
@@ -128,8 +128,7 @@
             Output style: 
             <select name="extension">
               <option value="html2005">.html file - 2005 style</option>
-              <option value="html2022">.html file - 2022 style</option>
-            </select>
+             </select>
             </p>
             <p>
             <input name="download_logbook" type="submit" value="Download logbook" />
diff --git a/templates/logbook2005style.html b/templates/logbook2005style.html
index af320a3..846e88d 100644
--- a/templates/logbook2005style.html
+++ b/templates/logbook2005style.html
@@ -1,26 +1,27 @@
+<!DOCTYPE html>
 <html>
-<head><title>{{logbook_entries.0.expedition}} Expo Logbook</title></head>
-<link rel="stylesheet" type="text/css" href="../../css/main2.css" />
-<style type="text/css">
-.tripdate	{ float: left;}
-.trippeople	{ float: right;}
-.triptitle	{ font-size: 120%; text-align: center; font-weight: bold; clear: both }
-.timeug		{ text-align: right; font-weight: bold }
-p		{ clear: both }
-</style>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<title>{{logbook_entries.0.expedition}} Expo Logbook</title>
+<link rel="stylesheet" href="../../css/main2.css" />
+</head>
+<!-- Exported by troggle in this format after having been imported using a different format and a different parser.
+This is because we are steadily converting old formats to a new common format so that we do not need to maintain half
+a dozen parser functions.
+Exported on {% now 'Y-m-d D' %} using control panel webpage and exportlogbook() in troggle/code/views/other.py
+-->
 <body>
-
 <h1>Expo {{logbook_entries.0.expedition}}</h1>
-
 {%for logbook_entry in logbook_entries%}
 <hr />
 
-<div class="tripdate" id="t{{logbook_entry.date}}A">{{logbook_entry.date}}</div>
-<div class="trippeople"><u>{{logbook_entry.author.person}}</u>
-{% for persontrip in logbook_entry.persontrip_set.all %}{{ persontrip.personexpedition.person }} {{ persontrip.personexpedition.time_underground }}, {% endfor %}
-</div>
-
+<div class="tripdate" id="{{logbook_entry.slug}}">{{logbook_entry.date|date:'Y-m-d'}}</div>
+<div class="trippeople">{% for persontrip in logbook_entry.persontrip_set.all %}{% if  persontrip.is_logbook_entry_author %}<u>{{persontrip.personexpedition.person}}</u>{% else %}{{ persontrip.personexpedition.person }}{% endif %}, {% endfor %}</div>
 <div class="triptitle">{{logbook_entry.place}} - {{logbook_entry.title}}</div>
 
 {{logbook_entry.text|safe}}
+<div class="timeug">T/U: {{logbook_entry.time_underground}}</div>
 {% endfor %}
+<hr />
+</body>
+</html>