make ?reload private and clean old error msgs

2021-04-23 03:07:21 +01:00
parent 1a4be0f02e
commit dbd186e299
10 changed files with 172 additions and 85 deletions
@@ -6,20 +6,29 @@ from django.conf import settings
 from troggle.core.models.caves import LogbookEntry, QM, Cave
 import re, urllib.parse

-register = template.Library()
-'''Several templates are still (2021) using these wiki filters extensively to process data extracted from the database,
-and to restructure values into valid URLs to go elsewhere in the system, even where these are not actually 'wiki'. See
-the regexes at the end of this file.
-So the data in the database needs to be checked that there is no wiki-format content before these are deleted, and the
-filter functions of these regexes needs to be explored in practice.
+'''Several templates are still (2021) using these  filters extensively to process data 
+extracted from the database, and to restructure values into valid URLs to go elsewhere in the 
+system, even where these are not actually 'wiki'. See the regexes at the end of this file.
+
 '''
+todo = '''The data in the database and all input files 
+needs to be checked that there is no wiki-format content before all the functions in this 
+file are deleted, and the filter functions of these regexes and functions, particularly 
+wiki_to_html() which is used dozens of times in the templates, needs to be explored in 
+practice before being renamed more appropriately.
+'''
+register = template.Library()
+

@register.filter()
 def plusone(n):
+    '''used in templates/svxcaveseveral.html and templates/svxcavessingle.html for formatting
+    '''
    return n + 1


 def wiki_list(line, listdepth):
+        '''Does not seem to be used anywhere except photoSrcRepl() below'''
        l = ""
        for d in listdepth:
            l += d
@@ -50,6 +59,9 @@ def wiki_to_html(value, autoescape=None):
    """
    This is the tag which turns wiki syntax into html. It is intended for long pieces of wiki.
    Hence it splits the wiki into HTML paragraphs based on double line feeds.
+    
+    But it is used as a filter when rendering many, many fields, e.g. 
+    epersonexpedition.person|wiki_to_html_short in presonexpedition.html
    """
    #find paragraphs
    outValue = ""
@@ -65,6 +77,10 @@ def wiki_to_html_short(value, autoescape=None):
    """
    This is the tag which turns wiki syntax into html. It is intended for short pieces of wiki.
    Hence it is not split the wiki into paragraphs using where it finds double line feeds.
+    
+    But it is used as a filter when rendering many, many fields, e.g. 
+    entrance.entrance_description|wiki_to_html in extrance.html
+    
    """
    if autoescape:
        value = conditional_escape(value)
@@ -120,6 +136,7 @@ def wiki_to_html_short(value, autoescape=None):
    photoLinkPattern="\[\[\s*photo:(?P<photoName>[^\s]+)\s*(?P<linkText>.*)\]\]"
    photoSrcPattern="\[\[\s*display:(?P<style>[^\s]+) photo:(?P<photoName>[^\s]+)\s*\]\]"
    def photoLinkRepl(matchobj):
+        '''Does not seem to be used anywhere except photoSrcRepl() below'''
        matchdict=matchobj.groupdict()
        try:
            linkText=matchdict['linkText']
@@ -136,6 +153,7 @@ def wiki_to_html_short(value, autoescape=None):
        return res

    def photoSrcRepl(matchobj):
+        '''Does not seem to be used anywhere'''
        matchdict=matchobj.groupdict()
        style=matchdict['style']
        try:
@@ -57,9 +57,22 @@ def expedition(request, expeditionname):
    By specifying a '0' for the expected number of entries in the logbook cache, this forces the parser to
    re-parse the original logbook HTML file.
    '''
-    if "reload" in request.GET:
-        this_expedition = Expedition.objects.get(year=int(expeditionname))
-        LoadLogbookForExpedition(this_expedition, 0) # 0 means re-parse
+    if request.user.is_authenticated: 
+        if "reload" in request.GET:
+            this_expedition = Expedition.objects.get(year=int(expeditionname))
+            # Need to delete the exisitng entries or we get duplicaiton
+            entries = this_expedition.logbookentry_set.all()
+            print(f'! - expo {expeditionname} {len(entries)}  entries')
+            for entry in entries:
+                print(f'! - delete entry: "{entry}"')
+                entry.delete() 
+            entries = this_expedition.logbookentry_set.all()
+            print(f'! - expo {expeditionname} {len(entries)}  entries')
+            LoadLogbookForExpedition(this_expedition, 0) # 0 means re-parse
+        logged_in = True
+    else:
+        logged_in = False
+      

    ts = TROG['pagecache']['expedition']
    if settings.CACHEDPAGES:
@@ -67,7 +80,7 @@ def expedition(request, expeditionname):
        #print(f'! - expo {expeditionname} CACHEDPAGES {nexpos} expo pages in cache.')           
        if expeditionname in ts:
            #print('! - expo {expeditionanme} using cached page')           
-            return render(request,'expedition.html', ts[expeditionname] )
+            return render(request,'expedition.html', { **ts[expeditionname], 'logged_in' : logged_in })
                    
    this_expedition = Expedition.objects.get(year=int(expeditionname))

@@ -88,11 +101,11 @@ def expedition(request, expeditionname):
    
    ts[expeditionname] = {'expedition': this_expedition, 'expeditions':expeditions, 
        'personexpeditiondays':personexpeditiondays, 'settings':settings, 
-        'dateditems': dateditems }
+        'dateditems': dateditems}
    TROG['pagecache']['expedition'][expeditionname] = ts[expeditionname]
    nexpos = len( TROG['pagecache']['expedition'])
    #print(f'! - expo {expeditionname}  pre-render N expos:{nexpos}')           
-    return render(request,'expedition.html', ts[expeditionname] )
+    return render(request,'expedition.html', { **ts[expeditionname], 'logged_in' : logged_in } )


 # def get_absolute_url(self): # seems to have come seriously adrift. This should be in a class?!
@@ -41,11 +41,15 @@ def todos(request, module):
    '''
    from troggle.core.TESTS.tests import todo as tests
    from troggle.core.views.logbooks import todo as viewlogbooks
+    from troggle.parsers.logbooks import todo as parserslogbooks
    from troggle.core.forms import todo as forms
+    from troggle.core.templatetags.wiki_markup import todo as wiki
    tododict = {'views/other': todo,
        'tests': tests, 
        'views/logbooks': viewlogbooks, 
-        'core/forms': forms}
+        'parsers/logbooks': parserslogbooks, 
+        'core/forms': forms,
+        'core/templatetags/wiki_markup': wiki}
    return render(request,'core/todos.html', {'tododict': tododict})

 def troggle404(request): # cannot get this to work. Handler404 in urls.py not right syntax
@@ -215,25 +219,25 @@ def newfile(request, pslug = None):
    return render(request, 'editfile.html', {'fileForm': fileform, })

@login_required_if_public
-def simpleupload(request):
-    print(f'! - FORM simpleupload - start')
+def scanupload(request, year='2050'):
+    print(f'! - FORM scanupload - start')
    if request.method == 'POST':
        form = SimpleUploadFileForm(request.POST,request.FILES)
        if form.is_valid():
            #form.save() # comment out so nothing saved in MEDIA_ROOT/fileuploads
            f = request.FILES["simplefile"]
            w = request.POST["title"]
-            print(f'! - FORM simpleupload uploaded {f.name}')
-            fs = FileSystemStorage(os.path.join(settings.SURVEY_SCANS, '2021', w))
+            print(f'! - FORM scanupload uploaded {f.name}')
+            fs = FileSystemStorage(os.path.join(settings.SURVEY_SCANS, year, w))
           
            actual_saved = fs.save(f.name, content=f) # name may chnage to avoid clash
-            # INSERT check if name is chnaged, to allow user to abort and rename - or lets do a chaecjk anyway.
+            # INSERT check if name is changed, to allow user to abort and rename - or lets do a chaecjk anyway.

-            print(f'! - FORM simpleupload {actual_saved}')
+            print(f'! - FORM scanupload {actual_saved}')

            form = SimpleUploadFileForm()
-            return render(request, 'simpleupload.html', {'form': form,'filesaved': True, 'actual_saved': actual_saved})
+            return render(request, 'scanuploadform.html', {'form': form,'filesaved': True, 'actual_saved': actual_saved})
    else:
        form = SimpleUploadFileForm()
-    return render(request, 'simpleupload.html', {'form':form,})
+    return render(request, 'scanuploadform.html', {'form':form,})

@@ -21,6 +21,39 @@ Parses and imports logbooks in all their wonderful confusion
 # When we edit logbook entries, allow a "?" after any piece of data to say we've frigged it and
 # it can be checked up later from the hard-copy if necessary; or it's not possible to determin (name, trip place, etc)
 '''
+todo='''
+- Put the object store 'trips' and the 'logdataissues' into TROG global object
+
+- refactor everything with some urgency, esp. LoadLogbookForExpedition()
+
+- delete all the autoLogbooKEntry stuff when we are absolutely certain what it does
+
+- Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, 
+  or it is broken/incomplete and need hand-editing.
+
+- import/parse/re-export-as-html the 'artisanal-format' old logbooks so that
+  we keep only a modern HTML05 format. Then we can retiure the old parsers and reduce the
+  volume of code here substantially.
+
+- edit LoadLogbooks() to use coroutines to speed up import substantially,
+  but perhaps we had better profile it first?
+  
+- rewrite to use generators rather than storing everything intermediate in lists - to reduce memory impact.
+
+- the object store will need additional functions to replicate the persontrip calculation 
+  and storage. For the moment we leave all that to be done in the django db
+  
+- We should ensure logbook.html is utf-8 and stop this crap:             
+            file_in = open(logbookfile,'rb')
+            txt = file_in.read().decode("latin1")
+'''
+
+logentries = [] # the entire logbook for one year is a single object: a list of entries
+noncaveplaces = [ "QMplaceholder", "Journey", "Loser Plateau", "UNKNOWN", 'plateau', 
+        'base camp', 'basecamp', 'top camp', 'topcamp' ]
+logdataissues = {}
+trips ={}
+

 #
 # the logbook loading section
@@ -77,12 +110,6 @@ def GetTripCave(place):
        return None


-logentries = [] # the entire logbook for one year is a single object: a list of entries
-noncaveplaces = [ "QMplaceholder", "Journey", "Loser Plateau", "UNKNOWN", 'plateau', 
-        'base camp', 'basecamp', 'top camp', 'topcamp' ]
-logdataissues = {}
-trips ={}
-
 def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, entry_type="wiki"):
    """ saves a logbook entry and related persontrips 
    Does NOT save the expeditionday_id  - all NULLs. why?
@@ -205,8 +232,10 @@ def EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, tu,
       #print(" - New id ",tid)
    else:
        tid= tripid1
+
    if tid in trips:
-        msg = "   ! DUPLICATE id .{}. {} ~{}~".format(tid, trips[tid][0], trips[tid][1])
+        tyear, tdate, *trest = trips[tid]
+        msg = f"   ! DUPLICATE on {tdate} id: '{tid}'"
        print(msg)
        DataIssue.objects.create(parser='logbooks', message=msg)
        tid= "d{}-s{:02d}".format(str(date),seq)
@@ -427,6 +456,7 @@ def LoadLogbookForExpedition(expedition, expect):
    """
    # absolutely horrid. REFACTOR THIS (all my fault..)
    global logentries
+    global logdataissues
    logbook_parseable = False
    logbook_cached = False
    yearlinks   = settings.LOGBOOK_PARSER_SETTINGS
@@ -445,6 +475,26 @@ def LoadLogbookForExpedition(expedition, expect):
            return False
        return True
    
+    def cleanerrors(year):
+        global logdataissues
+        print(f'   - CLEAN {year} {len(logdataissues)} data issues in total')
+        dataissues = DataIssue.objects.filter(parser='logbooks')
+        for di in dataissues:
+            ph = "t" + year + "-"
+            if re.search(ph, di.message) is not None:
+                print(f'   - CLEANING dataissue {di.message}')
+                di.delete()
+     
+        for te, content in logdataissues:
+            #  tripentry = year + "." + str(logbook_entry_count)
+            print(f'   - CLEAN {te}')
+            if te.startswith(year + "."):
+                print(f'   - CLEANING logdataissue {te}')
+                logdataissues.pop(te)
+
+
+    cleanerrors(expedition.year)
+
    if expedition.year in yearlinks:
        logbookfile = os.path.join(expologbase, yearlinks[expedition.year][0])
        expedition.logbookfile = yearlinks[expedition.year][0] 
@@ -478,10 +528,10 @@ def LoadLogbookForExpedition(expedition, expect):
                print("  -- Loaded ", len(logentries), " log entries")
                logbook_cached = True
            else:
-                print("  !- Should be ", expect, " but ", len(logentries), " found in cache")
+                print("  !- Told to expect ", expect, " but ", len(logentries), " found in cache")
                raise
        except:
-            print("   ! Failed to load corrupt cache. Deleting it.")
+            print("   ! Failed to load corrupt cache.  (Or I was told to ignore it). Deleting it.")
            os.remove(cache_filename)
            logentries=[]
            raise
@@ -554,7 +604,7 @@ def LoadLogbooks():
            TROG['pagecache']['expedition'][expo.year] = None # clear cache
            if expo.year not in nologbook:
                print((" - Logbook for: " + expo.year))
-                numentries = LoadLogbookForExpedition(expo, entries[expo.year])
+                numentries = LoadLogbookForExpedition(expo, entries[expo.year])  # this actually loads the logbook for one year
                log.write("{} {:5d} should be {}\n".format(expo.year, numentries, entries[expo.year]))
                nlbe[expo.year]=numentries
                expd[expo.year]= 0
@@ -588,6 +638,12 @@ locationRegex = re.compile(r'<span\s+class="location">(.*?)</span>', re.S)
 caveRegex = re.compile(r'<span\s+class="cave">(.*?)</span>', re.S)

 def parseAutoLogBookEntry(filename):
+    '''An AutoLogBookEntry appears to be one that was created online using a form, for a single trip,
+    which is then stored in a separate location to the usual logbook.html 
+    But when importing logbook.html all these individual entries also need ot be parsed.
+    
+    This is all redundant as we are getting rid of the whole individual trip entry system
+    '''
    errors = []
    f = open(filename, "r")
    contents = f.read()
@@ -1,3 +1,3 @@
 {% extends "baseapi.html" %}
-{% block content %}<pre>{% for k, v in tododict.items %}<b><big>{{k}}</big></b>: {{v}}<br />
+{% block content %}<pre>{% for k, v in tododict.items %}<b><big>{{k}}</big></b>:<br> {{v}}<br />
 {% endfor %}</pre>{% endblock %}
@@ -20,7 +20,9 @@
 {% endfor %}
 </p>
 <p>See also the <a href="/years/{{expedition.year}}/">documentation index</a> for this Expo 
+{% if logged_in %}
 <p>Reparse and reload this year's logbook by clicking here: <a href="/expedition/{{expedition.year}}?reload">RELOAD</a>
+{% endif %}

 <p><b>At a single glance:</b> The table shows all expo cavers and their recorded trips.  
 The columns are the date in the month (July or August), with a "T" for a logbook entry, and 
@@ -60,7 +62,9 @@ an "S" for a survey trip.  The colours are the same for people on the same trip.
 {% endfor %}
 </table>

-<form action="" method="GET"><input type="submit" name="reload" value="Reload"></form>
+{% if logged_in %}
+<form action="" method="GET"><input type="submit" name="reload" value="Reload from logbook"></form>
+{% endif %}

 <h3>Logbooks and survey trips per day</h3>

@@ -0,0 +1,42 @@
+{% extends "base.html" %}
+
+{% block title %}Simple Fileupload{% endblock %}
+
+{% block content %}
+
+<h2>Survey Scan upload into Wallet</h2>
+
+
+ 
+<div style="column-count: 2;">
+    <div style = "max-width:40%px; " >
+    {% if filesaved %}
+        <p style="margin-left:20%;">
+        <b>The file was saved as <em>'{{actual_saved}}'</em> <br><br>Upload another?</b>
+        </p>
+        <br>
+    {% endif %}
+
+
+    <form method ='post' enctype ="multipart/form-data">
+        {% csrf_token %}              
+            <br>         
+                   <input type = "file" style = "margin-left:20%;" 
+                   placeholder = "Simplefile" name = "simplefile" id="files"><label for="files">Scan file(s)</label>
+            <br>  <br>    
+                   <input type = "text" style = "margin-left:20%;" 
+                   placeholder = "Wallet id e.g. 2021#23" name = "title" value='2050#99'id="wallet"><label for="wallet"> Wallet id</label>
+              
+    </div>         
+    <div style = "max-width:30%;">
+                <center>       
+                   <button style = "color: #fff; border:1px; background-color:#999; margin-top:8%; 
+                      height:35px; width:80%; margin-left:19%;" type = "submit" value = "Upload" >
+                      <strong>Upload</strong>
+                   </button>
+                </center>
+    </div>
+    </form>
+    </div>
+</div>
+ {% endblock %}
@@ -1,50 +0,0 @@
-{% extends "base.html" %}
-
-{% block title %}Simple Fileupload{% endblock %}
-
-{% block content %}
-
-<h2>Survey Scan upload into Wallet</h2>
-
-<div style = "max-width:470px; " >
- 
-
-{% if filesaved %}
-<p style="margin-left:20%;">
-<b>The file was saved as <em>'{{actual_saved}}'</em> <br><br>Upload another?</b>
-</p>
-<br>
-{% endif %}
-
-
-<form method ='post' enctype ="multipart/form-data">
-    {% csrf_token %}
- 
-			
-         <br>
-         
-          
-               <input type = "file" style = "margin-left:20%;" 
-               placeholder = "Simplefile" name = "simplefile" />
-          
-        <br>  <br>
-         
-          
-               <input type = "text" style = "margin-left:20%;" 
-               placeholder = "Wallet id e.g. 2021#23" name = "title" value='2021#99' label="Wallet id"/>
-          
-         </div>
-         
-         <div style = "max-width:470px;">
-            <center> 
-            
-               <button style = "color: #fff; border:1px; background-color:#999; margin-top:8%; 
-                  height:35px; width:80%; margin-left:19%;" type = "submit" value = "Upload" >
-                  <strong>Upload</strong>
-               </button>
-               
-            </center>
-         </div>
-</form>
-</div>
- {% endblock %}
@@ -10,7 +10,7 @@ from django.urls import reverse, resolve

 from troggle.core.views import caves, statistics, survex
 from troggle.core.views.surveys import surveyscansingle, surveyscansfolder, surveyscansfolders, dwgdata, dwgfilesingle, dwgfileupload
-from troggle.core.views.other import troggle404, frontpage, todos, controlpanel, frontpage, newfile, simpleupload
+from troggle.core.views.other import troggle404, frontpage, todos, controlpanel, frontpage, newfile, scanupload
 from troggle.core.views.other import downloadlogbook, ajax_QM_number, downloadQMs
 from troggle.core.views.caves import ent, cavepage
 from troggle.core.views.logbooks import get_logbook_entries, logbookentry, logbookSearch
@@ -75,7 +75,7 @@ trogglepatterns = [
    re_path(r'^admin/doc/',  include('django.contrib.admindocs.urls')), # needs docutils Python module (http://docutils.sf.net/).
    re_path(r'^admin/',      admin.site.urls), # includes admin login & logout urls
    
-    path('upload',      simpleupload, name='simpleupload'), # includes admin login & logout urls
+    path('scanupload',      scanupload, name='scanupload'), 

 # setting LOGIN_URL = '/accounts/login/' is default
 # url ENDS WITH this string