making flat files delivery more robust

This commit is contained in:
Philip Sargent 2021-03-31 17:57:43 +01:00
parent 3452c2c5d4
commit a6ed0a964e
4 changed files with 119 additions and 93 deletions

View File

@ -9,6 +9,7 @@ from django.http import HttpResponse, HttpResponseRedirect, Http404
from django.urls import reverse, resolve
from django.template import Context, loader
from django.views.decorators.csrf import ensure_csrf_cookie
from django.contrib import admin
import django.forms as forms
@ -55,93 +56,96 @@ def expofilesdir(request, dirpath, filepath):
fileitems.append((Path(urlpath) / f.parts[-1], str(f.parts[-1]), getmimetype(f)))
return render(request, 'dirdisplay.html', { 'filepath': urlpath, 'fileitems':fileitems, 'diritems': diritems,'settings': settings })
def expowebpage(request, expowebpath, path):
'''Adds memnus and serves an HTML page
'''
if not Path(expowebpath / path).is_file():
return render(request, 'pagenotfound.html', {'path': path})
with open(os.path.normpath(expowebpath / path), "rb") as o:
html = o.read()
m = re.search(rb'(.*)<\s*head([^>]*)>(.*)<\s*/head\s*>(.*)<\s*body([^>]*)>(.*)<\s*/body\s*>(.*)', html, re.DOTALL + re.IGNORECASE)
if m:
preheader, headerattrs, head, postheader, bodyattrs, body, postbody = m.groups()
else:
return HttpResponse(html + "HTML Parsing failure: Page could not be split into header and body: failed in expowebpage in views_expo.py")
m = re.search(rb"<title>(.*)</title>", head, re.DOTALL + re.IGNORECASE)
if m:
title, = m.groups()
else:
title = ""
m = re.search(rb"<meta([^>]*)noedit", head, re.DOTALL + re.IGNORECASE)
if m:
editable = False
else:
editable = True
has_menu = False
menumatch = re.match(rb'(.*)<div id="menu">', body, re.DOTALL + re.IGNORECASE)
if menumatch:
has_menu = True
menumatch = re.match(rb'(.*)<ul id="links">', body, re.DOTALL + re.IGNORECASE)
if menumatch:
has_menu = True
return render(request, 'flatpage.html', {'editable': editable, 'path': path, 'title': title,
'body': body, 'homepage': (path == "index.htm"), 'has_menu': has_menu})
def expopage(request, path):
'''Either renders an HTML page from expoweb with all the menus,
or serves an unadorned binary file with mime type
This is a horrible mess and some code is redundant and unreachable because of urls.py setup
'''
# print(" - EXPOPAGES delivering the file: {} as MIME type: {}".format(path,getmimetype(path)),flush=True)
#print(" - EXPOPAGES delivering the file: '{}':{} as MIME type: {}".format(request.path, path,getmimetype(path)),flush=True)
if path.startswith("noinfo") and settings.PUBLIC_SITE and not request.user.is_authenticated():
# print((" - EXPOPAGES redirect to logon: flat path noinfo", path))
return HttpResponseRedirect(urljoin(reverse("auth_login"),'?next={}'.format(request.path)))
if path.startswith("admin/"):
# don't even attempt to handle these sorts of mistakes
return HttpResponseRedirect("/admin/")
expowebpath = Path(settings.EXPOWEB)
if path.endswith("/") or path == "":
# print(" - EXPOPAGES the file: {} ENDSWITH ...".format(path))
try:
o = open(os.path.normpath(expowebpath / path / "index.html"), "rb")
path = path + "index.html"
except IOError:
try:
o = open(os.path.normpath(expowebpath / path / "index.htm"), "rb")
path = path + "index.htm"
except IOError:
return render(request, 'pagenotfound.html', {'path': path})
else:
# print(" - EXPOPAGES the file: '{}' ...".format(path))
if path.startswith('site_media'):
# print(" - MEDIA_ROOT: {} ...{}".format(settings.MEDIA_ROOT, path))
path = path.replace("site_media", settings.MEDIA_ROOT)
filetobeopened = os.path.normpath(path)
elif path.startswith("static"):
# print(" - STATIC_ROOT: {} ...{}".format(settings.MEDIA_ROOT, path))
path = path.replace("static", settings.MEDIA_ROOT)
filetobeopened = os.path.normpath(path)
else:
# print(" - NO _ROOT: {} ...".format(expowebpath))
filetobeopened = os.path.normpath(expowebpath / path)
# print(" - EXPOPAGES full path : {} ...".format(filetobeopened))
try:
o = open(filetobeopened, "rb")
#print(" - EXPOPAGES full path no error: {} ...".format(filetobeopened))
except IOError:
#print(" - EXPOPAGES ERROR: {} ...".format(filetobeopened))
#o.close()
return render(request, 'pagenotfound.html', {'path': path})
if path == "":
return expowebpage(request, expowebpath, "index.htm")
if path.endswith(".htm") or path.endswith(".html"):
# add the menus etc.
with open(os.path.normpath(expowebpath / path), "rb") as o:
html = o.read()
return expowebpage(request, expowebpath, path)
m = re.search(rb'(.*)<\s*head([^>]*)>(.*)<\s*/head\s*>(.*)<\s*body([^>]*)>(.*)<\s*/body\s*>(.*)', html, re.DOTALL + re.IGNORECASE)
if m:
preheader, headerattrs, head, postheader, bodyattrs, body, postbody = m.groups()
else:
return HttpResponse(html + "HTML Parsing failure: Page could not be split into header and body: failed in expopages.views.py")
m = re.search(rb"<title>(.*)</title>", head, re.DOTALL + re.IGNORECASE)
if m:
title, = m.groups()
else:
title = ""
m = re.search(rb"<meta([^>]*)noedit", head, re.DOTALL + re.IGNORECASE)
if m:
editable = False
else:
editable = True
if Path(expowebpath / path ).is_dir():
for p in ["index.html", "index.htm", "default.html"]:
try:
o = open(os.path.normpath(expowebpath / path / p), "rb")
except IOError:
pass
else: # no exception, so file was found
return expowebpage(request, expowebpath, Path(path) / p)
return render(request, 'pagenotfound.html', {'path': Path(path) / "index.html"})
has_menu = False
menumatch = re.match(rb'(.*)<div id="menu">', body, re.DOTALL + re.IGNORECASE)
if menumatch:
has_menu = True
menumatch = re.match(rb'(.*)<ul id="links">', body, re.DOTALL + re.IGNORECASE)
if menumatch:
has_menu = True
#body, = menumatch.groups()
# if re.search(rb"iso-8859-1", html):
# body = str(body, "iso-8859-1")
# body.strip
return render(request, 'flatpage.html', {'editable': editable, 'path': path, 'title': title,
'body': body, 'homepage': (path == "index.htm"), 'has_menu': has_menu})
if path.endswith("/"):
# we already know it is not a directory.
# the final / may have been appended by middleware if there was no page without it
# do not redirect to a file path without the slash as we may get in a loop. Let the user fix it:
return render(request, 'dirnotfound.html', {'path': path, 'subpath': path[0:-1]})
if path.startswith('site_media'): # BUT we may have missing files, directories or .html here too?!
# print(" - MEDIA_ROOT: {} ...{}".format(settings.MEDIA_ROOT, path))
npath = path.replace("site_media", settings.MEDIA_ROOT)
filetobeopened = os.path.normpath(npath)
elif path.startswith("static"):
# print(" - STATIC_ROOT: {} ...{}".format(settings.MEDIA_ROOT, path))
npath = path.replace("static", settings.MEDIA_ROOT)
filetobeopened = os.path.normpath(npath)
else:
# print(" - EXPOPAGES delivering the file: {} as MIME type: {}".format(path,getmimetype(path)))
filetobeopened = os.path.normpath(expowebpath / path)
try:
return HttpResponse(content=open(filetobeopened, "rb"), content_type=getmimetype(path))
#return HttpResponse(content=open(singlescan.ffile,"rb"), content_type=getmimetype(path))
except IOError:
return render(request, 'pagenotfound.html', {'path': path})
def getmimetype(path):
path = str(path)

View File

@ -1,7 +1,6 @@
from django.conf import settings
from django import http
from django.urls import reverse, resolve
#from django.core.urlresolvers import resolve
from django.urls import reverse, resolve,Resolver404
"""Non-standard django middleware is loaded from this file.
"""
@ -17,19 +16,31 @@ class SmartAppendSlashMiddleware(object):
"""
def process_request(self, request):
"""
Rewrite the URL based on settings.SMART_APPEND_SLASH
"""
'''Called for every url so return as quickly as possible
Append a slash if SMART_APPEND_SLASH is set, the resulting URL resolves and it doesn't without the /
'''
if not settings.SMART_APPEND_SLASH:
return None
if request.path.endswith('/'):
return None
if request.path.endswith('_edit'):
return None
# Check for a redirect based on settings.SMART_APPEND_SLASH
host = http.HttpRequest.get_host(request)
old_url = [host, request.path]
if _resolves(old_url[1]):
return None
# So: it does not resolve according to our criteria, i.e. _edit doesn't count
new_url = old_url[:]
# Append a slash if SMART_APPEND_SLASH is set and the resulting URL
# resolves.
if settings.SMART_APPEND_SLASH and (not old_url[1].endswith('/')) and not _resolves(old_url[1]) and _resolves(old_url[1] + '/'):
new_url[1] = new_url[1] + '/'
new_url[1] = new_url[1] + '/'
if not _resolves(new_url[1]):
return None
else:
if settings.DEBUG and request.method == 'POST':
# replace this exception with a redirect to an error page
raise RuntimeError("You called this URL via POST, but the URL doesn't end in a slash and you have SMART_APPEND_SLASH set. Django can't redirect to the slash URL while maintaining POST data. Change your form to point to %s%s (note the trailing slash), or set SMART_APPEND_SLASH=False in your Django settings." % (new_url[0], new_url[1]))
if new_url != old_url:
# Redirect
@ -45,9 +56,12 @@ class SmartAppendSlashMiddleware(object):
def _resolves(url):
try:
resolve(url)
# If the URL does not resolve, the function raises a Resolver404 exception (a subclass of Http404)
match = resolve(url)
# this will ALWAYS be resolved by expopages because it will produce pagenotfound if not the thing asked for
# so handle this in expopages, not in middleware
return True
except http.Http404:
except Resolver404:
return False
except:
print(url)

View File

@ -104,7 +104,7 @@ LOGBOOK_PARSER_SETTINGS = {
"1982": ("1982/log.htm", "Parseloghtml01"),
}
APPEND_SLASH = False
APPEND_SLASH = False # never relevant because we have urls that match unknown files and produce an 'edit this page' response
SMART_APPEND_SLASH = True
@ -130,7 +130,7 @@ INSTALLED_APPS = (
# See the recommended order of these in https://docs.djangoproject.com/en/2.2/ref/middleware/
MIDDLEWARE_CLASSES = (
#'django.middleware.security.SecurityMiddleware', # SECURE_SSL_REDIRECT and SECURE_SSL_HOST # we don't use this
'django.middleware.gzip.GZipMiddleware', # not needed as expofiles and photos served by apache
'django.middleware.gzip.GZipMiddleware', # not needed when expofiles and photos served by apache
'django.contrib.sessions.middleware.SessionMiddleware', # Manages sessions, if CSRF_USE_SESSIONS then it needs to be early
'django.middleware.common.CommonMiddleware', # DISALLOWED_USER_AGENTS, APPEND_SLASH and PREPEND_WWW
'django.middleware.csrf.CsrfViewMiddleware', # Cross Site Request Forgeries by adding hidden form fields to POST
@ -138,20 +138,20 @@ MIDDLEWARE_CLASSES = (
'django.contrib.admindocs.middleware.XViewMiddleware', # this and docutils needed by admindocs
'django.contrib.messages.middleware.MessageMiddleware', # Cookie-based and session-based message support. Needed by admin system
'django.middleware.clickjacking.XFrameOptionsMiddleware', # clickjacking protection via the X-Frame-Options header
'troggle.middleware.SmartAppendSlashMiddleware' #
'troggle.middleware.SmartAppendSlashMiddleware' # doesn't seem to be working...
)
ROOT_URLCONF = 'troggle.urls'
WSGI_APPLICATION = 'troggle.wsgi.application'
WSGI_APPLICATION = 'troggle.wsgi.application' # change to asgi as soon as we upgrade to Django 3.0
ACCOUNT_ACTIVATION_DAYS=3
AUTH_PROFILE_MODULE = 'core.person'
# AUTH_PROFILE_MODULE = 'core.person' # used by removed profiles app ?
QM_PATTERN="\[\[\s*[Qq][Mm]:([ABC]?)(\d{4})-(\d*)-(\d*)\]\]"
# Re-enable TinyMCE when Dj upgraded to v3. Also templates/editflatpage.html
# Re-enable TinyMCE when Dj upgraded to v3. Also templates/editexpopage.html
# TINYMCE_DEFAULT_CONFIG = {
# 'plugins': "table,spellchecker,paste,searchreplace",
# 'theme': "advanced",

View File

@ -0,0 +1,8 @@
{% extends "expobase.html" %}
{% block title %}Directory not found {{ path }}{% endblock %}
{% block body %}
<h1>Directory not found '{{ path }}'</h1>
<h3>Click here: <a href="/{{ subpath }}">/{{ subpath }}</a> </h3>
<p>i.e. without the final '/'
{% include "menu.html" %}
{% endblock %}