From dbd186e299fecd8f10f3dca0a88b78f842b0c59b Mon Sep 17 00:00:00 2001
From: Philip Sargent <philip.sargent@klebos.com>
Date: Fri, 23 Apr 2021 03:07:21 +0100
Subject: [PATCH] make ?reload private and clean old error msgs

---
 core/templatetags/wiki_markup.py              | 30 ++++++--
 core/views/logbooks.py                        | 25 ++++--
 core/views/other.py                           | 22 +++---
 parsers/logbooks.py                           | 76 ++++++++++++++++---
 templates/core/todos.html                     |  2 +-
 templates/expedition.html                     |  6 +-
 ... => logbookentrynew-pending-deletion.html} |  0
 templates/scanuploadform.html                 | 42 ++++++++++
 templates/simpleupload.html                   | 50 ------------
 urls.py                                       |  4 +-
 10 files changed, 172 insertions(+), 85 deletions(-)
 rename templates/{newlogbookentry.html => logbookentrynew-pending-deletion.html} (100%)
 create mode 100644 templates/scanuploadform.html
 delete mode 100644 templates/simpleupload.html

diff --git a/core/templatetags/wiki_markup.py b/core/templatetags/wiki_markup.py
index 276586a..966853d 100644
--- a/core/templatetags/wiki_markup.py
+++ b/core/templatetags/wiki_markup.py
@@ -6,20 +6,29 @@ from django.conf import settings
 from troggle.core.models.caves import LogbookEntry, QM, Cave
 import re, urllib.parse
 
-register = template.Library()
-'''Several templates are still (2021) using these wiki filters extensively to process data extracted from the database,
-and to restructure values into valid URLs to go elsewhere in the system, even where these are not actually 'wiki'. See
-the regexes at the end of this file.
-So the data in the database needs to be checked that there is no wiki-format content before these are deleted, and the
-filter functions of these regexes needs to be explored in practice.
+'''Several templates are still (2021) using these  filters extensively to process data 
+extracted from the database, and to restructure values into valid URLs to go elsewhere in the 
+system, even where these are not actually 'wiki'. See the regexes at the end of this file.
+
 '''
+todo = '''The data in the database and all input files 
+needs to be checked that there is no wiki-format content before all the functions in this 
+file are deleted, and the filter functions of these regexes and functions, particularly 
+wiki_to_html() which is used dozens of times in the templates, needs to be explored in 
+practice before being renamed more appropriately.
+'''
+register = template.Library()
+
 
 @register.filter()
 def plusone(n):
+    '''used in templates/svxcaveseveral.html and templates/svxcavessingle.html for formatting
+    '''
     return n + 1
 
 
 def wiki_list(line, listdepth):
+        '''Does not seem to be used anywhere except photoSrcRepl() below'''
         l = ""
         for d in listdepth:
             l += d
@@ -50,6 +59,9 @@ def wiki_to_html(value, autoescape=None):
     """
     This is the tag which turns wiki syntax into html. It is intended for long pieces of wiki.
     Hence it splits the wiki into HTML paragraphs based on double line feeds.
+    
+    But it is used as a filter when rendering many, many fields, e.g. 
+    epersonexpedition.person|wiki_to_html_short in presonexpedition.html
     """
     #find paragraphs
     outValue = ""
@@ -65,6 +77,10 @@ def wiki_to_html_short(value, autoescape=None):
     """
     This is the tag which turns wiki syntax into html. It is intended for short pieces of wiki.
     Hence it is not split the wiki into paragraphs using where it finds double line feeds.
+    
+    But it is used as a filter when rendering many, many fields, e.g. 
+    entrance.entrance_description|wiki_to_html in extrance.html
+    
     """
     if autoescape:
         value = conditional_escape(value)
@@ -120,6 +136,7 @@ def wiki_to_html_short(value, autoescape=None):
     photoLinkPattern="\[\[\s*photo:(?P<photoName>[^\s]+)\s*(?P<linkText>.*)\]\]"
     photoSrcPattern="\[\[\s*display:(?P<style>[^\s]+) photo:(?P<photoName>[^\s]+)\s*\]\]"
     def photoLinkRepl(matchobj):
+        '''Does not seem to be used anywhere except photoSrcRepl() below'''
         matchdict=matchobj.groupdict()
         try:
             linkText=matchdict['linkText']
@@ -136,6 +153,7 @@ def wiki_to_html_short(value, autoescape=None):
         return res
 
     def photoSrcRepl(matchobj):
+        '''Does not seem to be used anywhere'''
         matchdict=matchobj.groupdict()
         style=matchdict['style']
         try:
diff --git a/core/views/logbooks.py b/core/views/logbooks.py
index 05faa64..454483c 100644
--- a/core/views/logbooks.py
+++ b/core/views/logbooks.py
@@ -57,9 +57,22 @@ def expedition(request, expeditionname):
     By specifying a '0' for the expected number of entries in the logbook cache, this forces the parser to
     re-parse the original logbook HTML file.
     '''
-    if "reload" in request.GET:
-        this_expedition = Expedition.objects.get(year=int(expeditionname))
-        LoadLogbookForExpedition(this_expedition, 0) # 0 means re-parse
+    if request.user.is_authenticated: 
+        if "reload" in request.GET:
+            this_expedition = Expedition.objects.get(year=int(expeditionname))
+            # Need to delete the exisitng entries or we get duplicaiton
+            entries = this_expedition.logbookentry_set.all()
+            print(f'! - expo {expeditionname} {len(entries)}  entries')
+            for entry in entries:
+                print(f'! - delete entry: "{entry}"')
+                entry.delete() 
+            entries = this_expedition.logbookentry_set.all()
+            print(f'! - expo {expeditionname} {len(entries)}  entries')
+            LoadLogbookForExpedition(this_expedition, 0) # 0 means re-parse
+        logged_in = True
+    else:
+        logged_in = False
+      
 
     ts = TROG['pagecache']['expedition']
     if settings.CACHEDPAGES:
@@ -67,7 +80,7 @@ def expedition(request, expeditionname):
         #print(f'! - expo {expeditionname} CACHEDPAGES {nexpos} expo pages in cache.')           
         if expeditionname in ts:
             #print('! - expo {expeditionanme} using cached page')           
-            return render(request,'expedition.html', ts[expeditionname] )
+            return render(request,'expedition.html', { **ts[expeditionname], 'logged_in' : logged_in })
                     
     this_expedition = Expedition.objects.get(year=int(expeditionname))
 
@@ -88,11 +101,11 @@ def expedition(request, expeditionname):
     
     ts[expeditionname] = {'expedition': this_expedition, 'expeditions':expeditions, 
         'personexpeditiondays':personexpeditiondays, 'settings':settings, 
-        'dateditems': dateditems }
+        'dateditems': dateditems}
     TROG['pagecache']['expedition'][expeditionname] = ts[expeditionname]
     nexpos = len( TROG['pagecache']['expedition'])
     #print(f'! - expo {expeditionname}  pre-render N expos:{nexpos}')           
-    return render(request,'expedition.html', ts[expeditionname] )
+    return render(request,'expedition.html', { **ts[expeditionname], 'logged_in' : logged_in } )
 
 
 # def get_absolute_url(self): # seems to have come seriously adrift. This should be in a class?!
diff --git a/core/views/other.py b/core/views/other.py
index 639ea5f..9a12b1d 100644
--- a/core/views/other.py
+++ b/core/views/other.py
@@ -41,11 +41,15 @@ def todos(request, module):
     '''
     from troggle.core.TESTS.tests import todo as tests
     from troggle.core.views.logbooks import todo as viewlogbooks
+    from troggle.parsers.logbooks import todo as parserslogbooks
     from troggle.core.forms import todo as forms
+    from troggle.core.templatetags.wiki_markup import todo as wiki
     tododict = {'views/other': todo,
         'tests': tests, 
         'views/logbooks': viewlogbooks, 
-        'core/forms': forms}
+        'parsers/logbooks': parserslogbooks, 
+        'core/forms': forms,
+        'core/templatetags/wiki_markup': wiki}
     return render(request,'core/todos.html', {'tododict': tododict})
 
 def troggle404(request): # cannot get this to work. Handler404 in urls.py not right syntax
@@ -215,25 +219,25 @@ def newfile(request, pslug = None):
     return render(request, 'editfile.html', {'fileForm': fileform, })
 
 @login_required_if_public
-def simpleupload(request):
-    print(f'! - FORM simpleupload - start')
+def scanupload(request, year='2050'):
+    print(f'! - FORM scanupload - start')
     if request.method == 'POST':
         form = SimpleUploadFileForm(request.POST,request.FILES)
         if form.is_valid():
             #form.save() # comment out so nothing saved in MEDIA_ROOT/fileuploads
             f = request.FILES["simplefile"]
             w = request.POST["title"]
-            print(f'! - FORM simpleupload uploaded {f.name}')
-            fs = FileSystemStorage(os.path.join(settings.SURVEY_SCANS, '2021', w))
+            print(f'! - FORM scanupload uploaded {f.name}')
+            fs = FileSystemStorage(os.path.join(settings.SURVEY_SCANS, year, w))
            
             actual_saved = fs.save(f.name, content=f) # name may chnage to avoid clash
-            # INSERT check if name is chnaged, to allow user to abort and rename - or lets do a chaecjk anyway.
+            # INSERT check if name is changed, to allow user to abort and rename - or lets do a chaecjk anyway.
 
-            print(f'! - FORM simpleupload {actual_saved}')
+            print(f'! - FORM scanupload {actual_saved}')
 
             form = SimpleUploadFileForm()
-            return render(request, 'simpleupload.html', {'form': form,'filesaved': True, 'actual_saved': actual_saved})
+            return render(request, 'scanuploadform.html', {'form': form,'filesaved': True, 'actual_saved': actual_saved})
     else:
         form = SimpleUploadFileForm()
-    return render(request, 'simpleupload.html', {'form':form,})
+    return render(request, 'scanuploadform.html', {'form':form,})
 
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index 4310fdb..46aba96 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -21,6 +21,39 @@ Parses and imports logbooks in all their wonderful confusion
 # When we edit logbook entries, allow a "?" after any piece of data to say we've frigged it and
 # it can be checked up later from the hard-copy if necessary; or it's not possible to determin (name, trip place, etc)
 '''
+todo='''
+- Put the object store 'trips' and the 'logdataissues' into TROG global object
+
+- refactor everything with some urgency, esp. LoadLogbookForExpedition()
+
+- delete all the autoLogbooKEntry stuff when we are absolutely certain what it does
+
+- Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, 
+  or it is broken/incomplete and need hand-editing.
+
+- import/parse/re-export-as-html the 'artisanal-format' old logbooks so that
+  we keep only a modern HTML05 format. Then we can retiure the old parsers and reduce the
+  volume of code here substantially.
+
+- edit LoadLogbooks() to use coroutines to speed up import substantially,
+  but perhaps we had better profile it first?
+  
+- rewrite to use generators rather than storing everything intermediate in lists - to reduce memory impact.
+
+- the object store will need additional functions to replicate the persontrip calculation 
+  and storage. For the moment we leave all that to be done in the django db
+  
+- We should ensure logbook.html is utf-8 and stop this crap:             
+            file_in = open(logbookfile,'rb')
+            txt = file_in.read().decode("latin1")
+'''
+
+logentries = [] # the entire logbook for one year is a single object: a list of entries
+noncaveplaces = [ "QMplaceholder", "Journey", "Loser Plateau", "UNKNOWN", 'plateau', 
+        'base camp', 'basecamp', 'top camp', 'topcamp' ]
+logdataissues = {}
+trips ={}
+
 
 #
 # the logbook loading section
@@ -77,12 +110,6 @@ def GetTripCave(place):
         return None
 
 
-logentries = [] # the entire logbook for one year is a single object: a list of entries
-noncaveplaces = [ "QMplaceholder", "Journey", "Loser Plateau", "UNKNOWN", 'plateau', 
-        'base camp', 'basecamp', 'top camp', 'topcamp' ]
-logdataissues = {}
-trips ={}
-
 def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, entry_type="wiki"):
     """ saves a logbook entry and related persontrips 
     Does NOT save the expeditionday_id  - all NULLs. why?
@@ -205,8 +232,10 @@ def EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, tu,
        #print(" - New id ",tid)
     else:
         tid= tripid1
+
     if tid in trips:
-        msg = "   ! DUPLICATE id .{}. {} ~{}~".format(tid, trips[tid][0], trips[tid][1])
+        tyear, tdate, *trest = trips[tid]
+        msg = f"   ! DUPLICATE on {tdate} id: '{tid}'"
         print(msg)
         DataIssue.objects.create(parser='logbooks', message=msg)
         tid= "d{}-s{:02d}".format(str(date),seq)
@@ -427,6 +456,7 @@ def LoadLogbookForExpedition(expedition, expect):
     """
     # absolutely horrid. REFACTOR THIS (all my fault..)
     global logentries
+    global logdataissues
     logbook_parseable = False
     logbook_cached = False
     yearlinks   = settings.LOGBOOK_PARSER_SETTINGS
@@ -445,6 +475,26 @@ def LoadLogbookForExpedition(expedition, expect):
             return False
         return True
     
+    def cleanerrors(year):
+        global logdataissues
+        print(f'   - CLEAN {year} {len(logdataissues)} data issues in total')
+        dataissues = DataIssue.objects.filter(parser='logbooks')
+        for di in dataissues:
+            ph = "t" + year + "-"
+            if re.search(ph, di.message) is not None:
+                print(f'   - CLEANING dataissue {di.message}')
+                di.delete()
+     
+        for te, content in logdataissues:
+            #  tripentry = year + "." + str(logbook_entry_count)
+            print(f'   - CLEAN {te}')
+            if te.startswith(year + "."):
+                print(f'   - CLEANING logdataissue {te}')
+                logdataissues.pop(te)
+
+
+    cleanerrors(expedition.year)
+
     if expedition.year in yearlinks:
         logbookfile = os.path.join(expologbase, yearlinks[expedition.year][0])
         expedition.logbookfile = yearlinks[expedition.year][0] 
@@ -478,10 +528,10 @@ def LoadLogbookForExpedition(expedition, expect):
                 print("  -- Loaded ", len(logentries), " log entries")
                 logbook_cached = True
             else:
-                print("  !- Should be ", expect, " but ", len(logentries), " found in cache")
+                print("  !- Told to expect ", expect, " but ", len(logentries), " found in cache")
                 raise
         except:
-            print("   ! Failed to load corrupt cache. Deleting it.")
+            print("   ! Failed to load corrupt cache.  (Or I was told to ignore it). Deleting it.")
             os.remove(cache_filename)
             logentries=[]
             raise
@@ -554,7 +604,7 @@ def LoadLogbooks():
             TROG['pagecache']['expedition'][expo.year] = None # clear cache
             if expo.year not in nologbook:
                 print((" - Logbook for: " + expo.year))
-                numentries = LoadLogbookForExpedition(expo, entries[expo.year])
+                numentries = LoadLogbookForExpedition(expo, entries[expo.year])  # this actually loads the logbook for one year
                 log.write("{} {:5d} should be {}\n".format(expo.year, numentries, entries[expo.year]))
                 nlbe[expo.year]=numentries
                 expd[expo.year]= 0
@@ -588,6 +638,12 @@ locationRegex = re.compile(r'<span\s+class="location">(.*?)</span>', re.S)
 caveRegex = re.compile(r'<span\s+class="cave">(.*?)</span>', re.S)
 
 def parseAutoLogBookEntry(filename):
+    '''An AutoLogBookEntry appears to be one that was created online using a form, for a single trip,
+    which is then stored in a separate location to the usual logbook.html 
+    But when importing logbook.html all these individual entries also need ot be parsed.
+    
+    This is all redundant as we are getting rid of the whole individual trip entry system
+    '''
     errors = []
     f = open(filename, "r")
     contents = f.read()
diff --git a/templates/core/todos.html b/templates/core/todos.html
index de1fd1d..3122ddc 100644
--- a/templates/core/todos.html
+++ b/templates/core/todos.html
@@ -1,3 +1,3 @@
 {% extends "baseapi.html" %}
-{% block content %}<pre>{% for k, v in tododict.items %}<b><big>{{k}}</big></b>: {{v}}<br />
+{% block content %}<pre>{% for k, v in tododict.items %}<b><big>{{k}}</big></b>:<br> {{v}}<br />
 {% endfor %}</pre>{% endblock %}
\ No newline at end of file
diff --git a/templates/expedition.html b/templates/expedition.html
index 3533e28..ac86efe 100644
--- a/templates/expedition.html
+++ b/templates/expedition.html
@@ -20,7 +20,9 @@
 {% endfor %}
 </p>
 <p>See also the <a href="/years/{{expedition.year}}/">documentation index</a> for this Expo 
+{% if logged_in %}
 <p>Reparse and reload this year's logbook by clicking here: <a href="/expedition/{{expedition.year}}?reload">RELOAD</a>
+{% endif %}
 
 <p><b>At a single glance:</b> The table shows all expo cavers and their recorded trips.  
 The columns are the date in the month (July or August), with a "T" for a logbook entry, and 
@@ -60,7 +62,9 @@ an "S" for a survey trip.  The colours are the same for people on the same trip.
 {% endfor %}
 </table>
 
-<form action="" method="GET"><input type="submit" name="reload" value="Reload"></form>
+{% if logged_in %}
+<form action="" method="GET"><input type="submit" name="reload" value="Reload from logbook"></form>
+{% endif %}
 
 <h3>Logbooks and survey trips per day</h3>
 
diff --git a/templates/newlogbookentry.html b/templates/logbookentrynew-pending-deletion.html
similarity index 100%
rename from templates/newlogbookentry.html
rename to templates/logbookentrynew-pending-deletion.html
diff --git a/templates/scanuploadform.html b/templates/scanuploadform.html
new file mode 100644
index 0000000..c08a165
--- /dev/null
+++ b/templates/scanuploadform.html
@@ -0,0 +1,42 @@
+{% extends "base.html" %}
+
+{% block title %}Simple Fileupload{% endblock %}
+
+{% block content %}
+
+<h2>Survey Scan upload into Wallet</h2>
+
+
+ 
+<div style="column-count: 2;">
+    <div style = "max-width:40%px; " >
+    {% if filesaved %}
+        <p style="margin-left:20%;">
+        <b>The file was saved as <em>'{{actual_saved}}'</em> <br><br>Upload another?</b>
+        </p>
+        <br>
+    {% endif %}
+
+
+    <form method ='post' enctype ="multipart/form-data">
+        {% csrf_token %}              
+            <br>         
+                   <input type = "file" style = "margin-left:20%;" 
+                   placeholder = "Simplefile" name = "simplefile" id="files"><label for="files">Scan file(s)</label>
+            <br>  <br>    
+                   <input type = "text" style = "margin-left:20%;" 
+                   placeholder = "Wallet id e.g. 2021#23" name = "title" value='2050#99'id="wallet"><label for="wallet"> Wallet id</label>
+              
+    </div>         
+    <div style = "max-width:30%;">
+                <center>       
+                   <button style = "color: #fff; border:1px; background-color:#999; margin-top:8%; 
+                      height:35px; width:80%; margin-left:19%;" type = "submit" value = "Upload" >
+                      <strong>Upload</strong>
+                   </button>
+                </center>
+    </div>
+    </form>
+    </div>
+</div>
+ {% endblock %}
diff --git a/templates/simpleupload.html b/templates/simpleupload.html
deleted file mode 100644
index d97234c..0000000
--- a/templates/simpleupload.html
+++ /dev/null
@@ -1,50 +0,0 @@
-{% extends "base.html" %}
-
-{% block title %}Simple Fileupload{% endblock %}
-
-{% block content %}
-
-<h2>Survey Scan upload into Wallet</h2>
-
-<div style = "max-width:470px; " >
- 
-
-{% if filesaved %}
-<p style="margin-left:20%;">
-<b>The file was saved as <em>'{{actual_saved}}'</em> <br><br>Upload another?</b>
-</p>
-<br>
-{% endif %}
-
-
-<form method ='post' enctype ="multipart/form-data">
-    {% csrf_token %}
- 
-			
-         <br>
-         
-          
-               <input type = "file" style = "margin-left:20%;" 
-               placeholder = "Simplefile" name = "simplefile" />
-          
-        <br>  <br>
-         
-          
-               <input type = "text" style = "margin-left:20%;" 
-               placeholder = "Wallet id e.g. 2021#23" name = "title" value='2021#99' label="Wallet id"/>
-          
-         </div>
-         
-         <div style = "max-width:470px;">
-            <center> 
-            
-               <button style = "color: #fff; border:1px; background-color:#999; margin-top:8%; 
-                  height:35px; width:80%; margin-left:19%;" type = "submit" value = "Upload" >
-                  <strong>Upload</strong>
-               </button>
-               
-            </center>
-         </div>
-</form>
-</div>
- {% endblock %}
diff --git a/urls.py b/urls.py
index db9894e..5d475a9 100644
--- a/urls.py
+++ b/urls.py
@@ -10,7 +10,7 @@ from django.urls import reverse, resolve
 
 from troggle.core.views import caves, statistics, survex
 from troggle.core.views.surveys import surveyscansingle, surveyscansfolder, surveyscansfolders, dwgdata, dwgfilesingle, dwgfileupload
-from troggle.core.views.other import troggle404, frontpage, todos, controlpanel, frontpage, newfile, simpleupload
+from troggle.core.views.other import troggle404, frontpage, todos, controlpanel, frontpage, newfile, scanupload
 from troggle.core.views.other import downloadlogbook, ajax_QM_number, downloadQMs
 from troggle.core.views.caves import ent, cavepage
 from troggle.core.views.logbooks import get_logbook_entries, logbookentry, logbookSearch
@@ -75,7 +75,7 @@ trogglepatterns = [
     re_path(r'^admin/doc/',  include('django.contrib.admindocs.urls')), # needs docutils Python module (http://docutils.sf.net/).
     re_path(r'^admin/',      admin.site.urls), # includes admin login & logout urls
     
-    path('upload',      simpleupload, name='simpleupload'), # includes admin login & logout urls
+    path('scanupload',      scanupload, name='scanupload'), 
 
 # setting LOGIN_URL = '/accounts/login/' is default
 # url ENDS WITH this string