making flat files delivery more robust

2025-12-17 17:57:06 +00:00 · 2021-03-31 17:57:43 +01:00
parent 3452c2c5d4
commit a6ed0a964e
4 changed files with 119 additions and 93 deletions
--- a/core/views_expo.py
+++ b/core/views_expo.py
@@ -9,6 +9,7 @@ from django.http import HttpResponse, HttpResponseRedirect, Http404
 from django.urls import reverse, resolve
 from django.template import Context, loader
 from django.views.decorators.csrf import ensure_csrf_cookie
+from django.contrib import admin

 import django.forms as forms

@@ -55,93 +56,96 @@ def expofilesdir(request, dirpath, filepath):
            fileitems.append((Path(urlpath) / f.parts[-1], str(f.parts[-1]), getmimetype(f)))
    return render(request, 'dirdisplay.html', { 'filepath': urlpath, 'fileitems':fileitems, 'diritems': diritems,'settings': settings })

+def expowebpage(request, expowebpath, path):
+    '''Adds memnus and serves an HTML page
+    '''
+    if not Path(expowebpath / path).is_file():
+        return render(request, 'pagenotfound.html', {'path': path})
+        
+    with open(os.path.normpath(expowebpath / path), "rb") as o:   
+        html = o.read()
+    
+    m = re.search(rb'(.*)<\s*head([^>]*)>(.*)<\s*/head\s*>(.*)<\s*body([^>]*)>(.*)<\s*/body\s*>(.*)', html, re.DOTALL + re.IGNORECASE)
+    if m:
+        preheader, headerattrs, head, postheader, bodyattrs, body, postbody = m.groups()
+    else:
+        return HttpResponse(html + "HTML Parsing failure: Page could not be split into header and body: failed in expowebpage in views_expo.py")
+    m = re.search(rb"<title>(.*)</title>", head, re.DOTALL + re.IGNORECASE)
+    if m:
+        title, = m.groups()
+    else:
+        title = ""
+    m = re.search(rb"<meta([^>]*)noedit", head, re.DOTALL + re.IGNORECASE)
+    if m:
+        editable = False
+    else:
+        editable = True
+    
+    has_menu = False
+    menumatch = re.match(rb'(.*)<div id="menu">', body, re.DOTALL + re.IGNORECASE)
+    if menumatch:
+        has_menu = True
+    menumatch = re.match(rb'(.*)<ul id="links">', body, re.DOTALL + re.IGNORECASE)
+    if menumatch:
+        has_menu = True
+    return render(request, 'flatpage.html', {'editable': editable, 'path': path, 'title': title, 
+                'body': body, 'homepage': (path == "index.htm"), 'has_menu': has_menu})
+    
+
 def expopage(request, path):
    '''Either renders an HTML page from expoweb with all the menus,
    or serves an unadorned binary file with mime type
-    
-    This is a horrible mess and some code is redundant and unreachable because of urls.py setup
    '''
-    # print(" - EXPOPAGES delivering the file: {} as MIME type: {}".format(path,getmimetype(path)),flush=True)
+    #print(" - EXPOPAGES delivering the file: '{}':{} as MIME type: {}".format(request.path, path,getmimetype(path)),flush=True)

    if path.startswith("noinfo") and settings.PUBLIC_SITE and not request.user.is_authenticated():
-        # print((" - EXPOPAGES redirect to logon: flat path noinfo", path))
        return HttpResponseRedirect(urljoin(reverse("auth_login"),'?next={}'.format(request.path)))

-    expowebpath = Path(settings.EXPOWEB)
-    if path.endswith("/") or path == "":
-        # print(" - EXPOPAGES the file: {} ENDSWITH ...".format(path))
+    if path.startswith("admin/"):
+        # don't even attempt to handle these sorts of mistakes
+        return HttpResponseRedirect("/admin/")
+
+    expowebpath = Path(settings.EXPOWEB)
+
+    if path == "":
+        return expowebpage(request, expowebpath, "index.htm")

-        try:
-            o = open(os.path.normpath(expowebpath / path / "index.html"), "rb")
-            path = path + "index.html"
-        except IOError:
-            try:
-                o = open(os.path.normpath(expowebpath / path / "index.htm"), "rb")
-                path = path + "index.htm"
-            except IOError:
-                return render(request, 'pagenotfound.html', {'path': path})
-    else:
-        # print(" - EXPOPAGES the file: '{}'  ...".format(path))
-        if path.startswith('site_media'):
-            # print(" - MEDIA_ROOT: {}  ...{}".format(settings.MEDIA_ROOT, path))
-            path = path.replace("site_media", settings.MEDIA_ROOT)
-            filetobeopened = os.path.normpath(path)
-        elif path.startswith("static"):
-            # print(" - STATIC_ROOT: {}  ...{}".format(settings.MEDIA_ROOT, path))
-            path = path.replace("static", settings.MEDIA_ROOT)
-            filetobeopened = os.path.normpath(path)
-        else:
-            # print(" - NO    _ROOT: {}  ...".format(expowebpath))
-            filetobeopened = os.path.normpath(expowebpath / path)
-        
-        # print(" - EXPOPAGES full path : {}  ...".format(filetobeopened))
-        try:
-            o = open(filetobeopened, "rb")
-            #print(" - EXPOPAGES full path no error: {}  ...".format(filetobeopened))
-        except IOError:
-            #print(" - EXPOPAGES ERROR: {}  ...".format(filetobeopened))
-            #o.close() 
-            return render(request, 'pagenotfound.html', {'path': path})
- 
- 
    if path.endswith(".htm") or path.endswith(".html"):
-        # add the menus etc.
-        with open(os.path.normpath(expowebpath / path), "rb") as o:   
-            html = o.read()
+        return expowebpage(request, expowebpath, path)
+    
+    if Path(expowebpath / path ).is_dir():
+        for p in ["index.html", "index.htm", "default.html"]:
+            try:
+                o = open(os.path.normpath(expowebpath / path / p), "rb")
+            except IOError:
+                pass
+            else: # no exception, so file was found
+                return expowebpage(request, expowebpath, Path(path) / p)
+        return render(request, 'pagenotfound.html', {'path': Path(path) / "index.html"})
        
-        m = re.search(rb'(.*)<\s*head([^>]*)>(.*)<\s*/head\s*>(.*)<\s*body([^>]*)>(.*)<\s*/body\s*>(.*)', html, re.DOTALL + re.IGNORECASE)
-        if m:
-            preheader, headerattrs, head, postheader, bodyattrs, body, postbody = m.groups()
-        else:
-            return HttpResponse(html + "HTML Parsing failure: Page could not be split into header and body: failed in expopages.views.py")
-        m = re.search(rb"<title>(.*)</title>", head, re.DOTALL + re.IGNORECASE)
-        if m:
-            title, = m.groups()
-        else:
-            title = ""
-        m = re.search(rb"<meta([^>]*)noedit", head, re.DOTALL + re.IGNORECASE)
-        if m:
-            editable = False
-        else:
-            editable = True
-        
-        has_menu = False
-        menumatch = re.match(rb'(.*)<div id="menu">', body, re.DOTALL + re.IGNORECASE)
-        if menumatch:
-            has_menu = True
-        menumatch = re.match(rb'(.*)<ul id="links">', body, re.DOTALL + re.IGNORECASE)
-        if menumatch:
-            has_menu = True
-            #body, = menumatch.groups()
-#        if re.search(rb"iso-8859-1", html):
-#            body = str(body, "iso-8859-1")
-#            body.strip
-        return render(request, 'flatpage.html', {'editable': editable, 'path': path, 'title': title, 
-                    'body': body, 'homepage': (path == "index.htm"), 'has_menu': has_menu})
+    if path.endswith("/"):
+        # we already know it is not a directory.
+        # the final / may have been appended by middleware if there was no page without it
+        # do not redirect to a file path without the slash as we may get in a loop. Let the user fix it:
+        return render(request, 'dirnotfound.html', {'path': path, 'subpath': path[0:-1]})
+   
+    if path.startswith('site_media'): # BUT we may have missing files, directories or .html here too?!
+        # print(" - MEDIA_ROOT: {}  ...{}".format(settings.MEDIA_ROOT, path))
+        npath = path.replace("site_media", settings.MEDIA_ROOT)
+        filetobeopened = os.path.normpath(npath)
+    elif path.startswith("static"):
+        # print(" - STATIC_ROOT: {}  ...{}".format(settings.MEDIA_ROOT, path))
+        npath = path.replace("static", settings.MEDIA_ROOT)
+        filetobeopened = os.path.normpath(npath)
    else:
-        # print(" - EXPOPAGES delivering the file: {} as MIME type: {}".format(path,getmimetype(path)))
+        filetobeopened = os.path.normpath(expowebpath / path)
+    
+    try:
        return HttpResponse(content=open(filetobeopened, "rb"), content_type=getmimetype(path))
-        #return HttpResponse(content=open(singlescan.ffile,"rb"), content_type=getmimetype(path))
+    except IOError:
+        return render(request, 'pagenotfound.html', {'path': path})
+ 
+ 

 def getmimetype(path):
    path = str(path)