making flat files delivery more robust

This commit is contained in:
Philip Sargent 2021-03-31 17:57:43 +01:00
parent 3452c2c5d4
commit a6ed0a964e
4 changed files with 119 additions and 93 deletions

View File

@ -9,6 +9,7 @@ from django.http import HttpResponse, HttpResponseRedirect, Http404
from django.urls import reverse, resolve from django.urls import reverse, resolve
from django.template import Context, loader from django.template import Context, loader
from django.views.decorators.csrf import ensure_csrf_cookie from django.views.decorators.csrf import ensure_csrf_cookie
from django.contrib import admin
import django.forms as forms import django.forms as forms
@ -55,93 +56,96 @@ def expofilesdir(request, dirpath, filepath):
fileitems.append((Path(urlpath) / f.parts[-1], str(f.parts[-1]), getmimetype(f))) fileitems.append((Path(urlpath) / f.parts[-1], str(f.parts[-1]), getmimetype(f)))
return render(request, 'dirdisplay.html', { 'filepath': urlpath, 'fileitems':fileitems, 'diritems': diritems,'settings': settings }) return render(request, 'dirdisplay.html', { 'filepath': urlpath, 'fileitems':fileitems, 'diritems': diritems,'settings': settings })
def expowebpage(request, expowebpath, path):
'''Adds memnus and serves an HTML page
'''
if not Path(expowebpath / path).is_file():
return render(request, 'pagenotfound.html', {'path': path})
with open(os.path.normpath(expowebpath / path), "rb") as o:
html = o.read()
m = re.search(rb'(.*)<\s*head([^>]*)>(.*)<\s*/head\s*>(.*)<\s*body([^>]*)>(.*)<\s*/body\s*>(.*)', html, re.DOTALL + re.IGNORECASE)
if m:
preheader, headerattrs, head, postheader, bodyattrs, body, postbody = m.groups()
else:
return HttpResponse(html + "HTML Parsing failure: Page could not be split into header and body: failed in expowebpage in views_expo.py")
m = re.search(rb"<title>(.*)</title>", head, re.DOTALL + re.IGNORECASE)
if m:
title, = m.groups()
else:
title = ""
m = re.search(rb"<meta([^>]*)noedit", head, re.DOTALL + re.IGNORECASE)
if m:
editable = False
else:
editable = True
has_menu = False
menumatch = re.match(rb'(.*)<div id="menu">', body, re.DOTALL + re.IGNORECASE)
if menumatch:
has_menu = True
menumatch = re.match(rb'(.*)<ul id="links">', body, re.DOTALL + re.IGNORECASE)
if menumatch:
has_menu = True
return render(request, 'flatpage.html', {'editable': editable, 'path': path, 'title': title,
'body': body, 'homepage': (path == "index.htm"), 'has_menu': has_menu})
def expopage(request, path): def expopage(request, path):
'''Either renders an HTML page from expoweb with all the menus, '''Either renders an HTML page from expoweb with all the menus,
or serves an unadorned binary file with mime type or serves an unadorned binary file with mime type
This is a horrible mess and some code is redundant and unreachable because of urls.py setup
''' '''
# print(" - EXPOPAGES delivering the file: {} as MIME type: {}".format(path,getmimetype(path)),flush=True) #print(" - EXPOPAGES delivering the file: '{}':{} as MIME type: {}".format(request.path, path,getmimetype(path)),flush=True)
if path.startswith("noinfo") and settings.PUBLIC_SITE and not request.user.is_authenticated(): if path.startswith("noinfo") and settings.PUBLIC_SITE and not request.user.is_authenticated():
# print((" - EXPOPAGES redirect to logon: flat path noinfo", path))
return HttpResponseRedirect(urljoin(reverse("auth_login"),'?next={}'.format(request.path))) return HttpResponseRedirect(urljoin(reverse("auth_login"),'?next={}'.format(request.path)))
if path.startswith("admin/"):
# don't even attempt to handle these sorts of mistakes
return HttpResponseRedirect("/admin/")
expowebpath = Path(settings.EXPOWEB) expowebpath = Path(settings.EXPOWEB)
if path.endswith("/") or path == "":
# print(" - EXPOPAGES the file: {} ENDSWITH ...".format(path))
try:
o = open(os.path.normpath(expowebpath / path / "index.html"), "rb")
path = path + "index.html"
except IOError:
try:
o = open(os.path.normpath(expowebpath / path / "index.htm"), "rb")
path = path + "index.htm"
except IOError:
return render(request, 'pagenotfound.html', {'path': path})
else:
# print(" - EXPOPAGES the file: '{}' ...".format(path))
if path.startswith('site_media'):
# print(" - MEDIA_ROOT: {} ...{}".format(settings.MEDIA_ROOT, path))
path = path.replace("site_media", settings.MEDIA_ROOT)
filetobeopened = os.path.normpath(path)
elif path.startswith("static"):
# print(" - STATIC_ROOT: {} ...{}".format(settings.MEDIA_ROOT, path))
path = path.replace("static", settings.MEDIA_ROOT)
filetobeopened = os.path.normpath(path)
else:
# print(" - NO _ROOT: {} ...".format(expowebpath))
filetobeopened = os.path.normpath(expowebpath / path)
# print(" - EXPOPAGES full path : {} ...".format(filetobeopened))
try:
o = open(filetobeopened, "rb")
#print(" - EXPOPAGES full path no error: {} ...".format(filetobeopened))
except IOError:
#print(" - EXPOPAGES ERROR: {} ...".format(filetobeopened))
#o.close()
return render(request, 'pagenotfound.html', {'path': path})
if path == "":
return expowebpage(request, expowebpath, "index.htm")
if path.endswith(".htm") or path.endswith(".html"): if path.endswith(".htm") or path.endswith(".html"):
# add the menus etc. return expowebpage(request, expowebpath, path)
with open(os.path.normpath(expowebpath / path), "rb") as o:
html = o.read()
m = re.search(rb'(.*)<\s*head([^>]*)>(.*)<\s*/head\s*>(.*)<\s*body([^>]*)>(.*)<\s*/body\s*>(.*)', html, re.DOTALL + re.IGNORECASE) if Path(expowebpath / path ).is_dir():
if m: for p in ["index.html", "index.htm", "default.html"]:
preheader, headerattrs, head, postheader, bodyattrs, body, postbody = m.groups() try:
else: o = open(os.path.normpath(expowebpath / path / p), "rb")
return HttpResponse(html + "HTML Parsing failure: Page could not be split into header and body: failed in expopages.views.py") except IOError:
m = re.search(rb"<title>(.*)</title>", head, re.DOTALL + re.IGNORECASE) pass
if m: else: # no exception, so file was found
title, = m.groups() return expowebpage(request, expowebpath, Path(path) / p)
else: return render(request, 'pagenotfound.html', {'path': Path(path) / "index.html"})
title = ""
m = re.search(rb"<meta([^>]*)noedit", head, re.DOTALL + re.IGNORECASE)
if m:
editable = False
else:
editable = True
has_menu = False if path.endswith("/"):
menumatch = re.match(rb'(.*)<div id="menu">', body, re.DOTALL + re.IGNORECASE) # we already know it is not a directory.
if menumatch: # the final / may have been appended by middleware if there was no page without it
has_menu = True # do not redirect to a file path without the slash as we may get in a loop. Let the user fix it:
menumatch = re.match(rb'(.*)<ul id="links">', body, re.DOTALL + re.IGNORECASE) return render(request, 'dirnotfound.html', {'path': path, 'subpath': path[0:-1]})
if menumatch:
has_menu = True if path.startswith('site_media'): # BUT we may have missing files, directories or .html here too?!
#body, = menumatch.groups() # print(" - MEDIA_ROOT: {} ...{}".format(settings.MEDIA_ROOT, path))
# if re.search(rb"iso-8859-1", html): npath = path.replace("site_media", settings.MEDIA_ROOT)
# body = str(body, "iso-8859-1") filetobeopened = os.path.normpath(npath)
# body.strip elif path.startswith("static"):
return render(request, 'flatpage.html', {'editable': editable, 'path': path, 'title': title, # print(" - STATIC_ROOT: {} ...{}".format(settings.MEDIA_ROOT, path))
'body': body, 'homepage': (path == "index.htm"), 'has_menu': has_menu}) npath = path.replace("static", settings.MEDIA_ROOT)
filetobeopened = os.path.normpath(npath)
else: else:
# print(" - EXPOPAGES delivering the file: {} as MIME type: {}".format(path,getmimetype(path))) filetobeopened = os.path.normpath(expowebpath / path)
try:
return HttpResponse(content=open(filetobeopened, "rb"), content_type=getmimetype(path)) return HttpResponse(content=open(filetobeopened, "rb"), content_type=getmimetype(path))
#return HttpResponse(content=open(singlescan.ffile,"rb"), content_type=getmimetype(path)) except IOError:
return render(request, 'pagenotfound.html', {'path': path})
def getmimetype(path): def getmimetype(path):
path = str(path) path = str(path)

View File

@ -1,7 +1,6 @@
from django.conf import settings from django.conf import settings
from django import http from django import http
from django.urls import reverse, resolve from django.urls import reverse, resolve,Resolver404
#from django.core.urlresolvers import resolve
"""Non-standard django middleware is loaded from this file. """Non-standard django middleware is loaded from this file.
""" """
@ -17,19 +16,31 @@ class SmartAppendSlashMiddleware(object):
""" """
def process_request(self, request): def process_request(self, request):
""" '''Called for every url so return as quickly as possible
Rewrite the URL based on settings.SMART_APPEND_SLASH Append a slash if SMART_APPEND_SLASH is set, the resulting URL resolves and it doesn't without the /
""" '''
if not settings.SMART_APPEND_SLASH:
return None
if request.path.endswith('/'):
return None
if request.path.endswith('_edit'):
return None
# Check for a redirect based on settings.SMART_APPEND_SLASH
host = http.HttpRequest.get_host(request) host = http.HttpRequest.get_host(request)
old_url = [host, request.path] old_url = [host, request.path]
if _resolves(old_url[1]):
return None
# So: it does not resolve according to our criteria, i.e. _edit doesn't count
new_url = old_url[:] new_url = old_url[:]
# Append a slash if SMART_APPEND_SLASH is set and the resulting URL new_url[1] = new_url[1] + '/'
# resolves. if not _resolves(new_url[1]):
if settings.SMART_APPEND_SLASH and (not old_url[1].endswith('/')) and not _resolves(old_url[1]) and _resolves(old_url[1] + '/'): return None
new_url[1] = new_url[1] + '/' else:
if settings.DEBUG and request.method == 'POST': if settings.DEBUG and request.method == 'POST':
# replace this exception with a redirect to an error page
raise RuntimeError("You called this URL via POST, but the URL doesn't end in a slash and you have SMART_APPEND_SLASH set. Django can't redirect to the slash URL while maintaining POST data. Change your form to point to %s%s (note the trailing slash), or set SMART_APPEND_SLASH=False in your Django settings." % (new_url[0], new_url[1])) raise RuntimeError("You called this URL via POST, but the URL doesn't end in a slash and you have SMART_APPEND_SLASH set. Django can't redirect to the slash URL while maintaining POST data. Change your form to point to %s%s (note the trailing slash), or set SMART_APPEND_SLASH=False in your Django settings." % (new_url[0], new_url[1]))
if new_url != old_url: if new_url != old_url:
# Redirect # Redirect
@ -45,9 +56,12 @@ class SmartAppendSlashMiddleware(object):
def _resolves(url): def _resolves(url):
try: try:
resolve(url) # If the URL does not resolve, the function raises a Resolver404 exception (a subclass of Http404)
match = resolve(url)
# this will ALWAYS be resolved by expopages because it will produce pagenotfound if not the thing asked for
# so handle this in expopages, not in middleware
return True return True
except http.Http404: except Resolver404:
return False return False
except: except:
print(url) print(url)

View File

@ -104,7 +104,7 @@ LOGBOOK_PARSER_SETTINGS = {
"1982": ("1982/log.htm", "Parseloghtml01"), "1982": ("1982/log.htm", "Parseloghtml01"),
} }
APPEND_SLASH = False APPEND_SLASH = False # never relevant because we have urls that match unknown files and produce an 'edit this page' response
SMART_APPEND_SLASH = True SMART_APPEND_SLASH = True
@ -130,7 +130,7 @@ INSTALLED_APPS = (
# See the recommended order of these in https://docs.djangoproject.com/en/2.2/ref/middleware/ # See the recommended order of these in https://docs.djangoproject.com/en/2.2/ref/middleware/
MIDDLEWARE_CLASSES = ( MIDDLEWARE_CLASSES = (
#'django.middleware.security.SecurityMiddleware', # SECURE_SSL_REDIRECT and SECURE_SSL_HOST # we don't use this #'django.middleware.security.SecurityMiddleware', # SECURE_SSL_REDIRECT and SECURE_SSL_HOST # we don't use this
'django.middleware.gzip.GZipMiddleware', # not needed as expofiles and photos served by apache 'django.middleware.gzip.GZipMiddleware', # not needed when expofiles and photos served by apache
'django.contrib.sessions.middleware.SessionMiddleware', # Manages sessions, if CSRF_USE_SESSIONS then it needs to be early 'django.contrib.sessions.middleware.SessionMiddleware', # Manages sessions, if CSRF_USE_SESSIONS then it needs to be early
'django.middleware.common.CommonMiddleware', # DISALLOWED_USER_AGENTS, APPEND_SLASH and PREPEND_WWW 'django.middleware.common.CommonMiddleware', # DISALLOWED_USER_AGENTS, APPEND_SLASH and PREPEND_WWW
'django.middleware.csrf.CsrfViewMiddleware', # Cross Site Request Forgeries by adding hidden form fields to POST 'django.middleware.csrf.CsrfViewMiddleware', # Cross Site Request Forgeries by adding hidden form fields to POST
@ -138,20 +138,20 @@ MIDDLEWARE_CLASSES = (
'django.contrib.admindocs.middleware.XViewMiddleware', # this and docutils needed by admindocs 'django.contrib.admindocs.middleware.XViewMiddleware', # this and docutils needed by admindocs
'django.contrib.messages.middleware.MessageMiddleware', # Cookie-based and session-based message support. Needed by admin system 'django.contrib.messages.middleware.MessageMiddleware', # Cookie-based and session-based message support. Needed by admin system
'django.middleware.clickjacking.XFrameOptionsMiddleware', # clickjacking protection via the X-Frame-Options header 'django.middleware.clickjacking.XFrameOptionsMiddleware', # clickjacking protection via the X-Frame-Options header
'troggle.middleware.SmartAppendSlashMiddleware' # 'troggle.middleware.SmartAppendSlashMiddleware' # doesn't seem to be working...
) )
ROOT_URLCONF = 'troggle.urls' ROOT_URLCONF = 'troggle.urls'
WSGI_APPLICATION = 'troggle.wsgi.application' WSGI_APPLICATION = 'troggle.wsgi.application' # change to asgi as soon as we upgrade to Django 3.0
ACCOUNT_ACTIVATION_DAYS=3 ACCOUNT_ACTIVATION_DAYS=3
AUTH_PROFILE_MODULE = 'core.person' # AUTH_PROFILE_MODULE = 'core.person' # used by removed profiles app ?
QM_PATTERN="\[\[\s*[Qq][Mm]:([ABC]?)(\d{4})-(\d*)-(\d*)\]\]" QM_PATTERN="\[\[\s*[Qq][Mm]:([ABC]?)(\d{4})-(\d*)-(\d*)\]\]"
# Re-enable TinyMCE when Dj upgraded to v3. Also templates/editflatpage.html # Re-enable TinyMCE when Dj upgraded to v3. Also templates/editexpopage.html
# TINYMCE_DEFAULT_CONFIG = { # TINYMCE_DEFAULT_CONFIG = {
# 'plugins': "table,spellchecker,paste,searchreplace", # 'plugins': "table,spellchecker,paste,searchreplace",
# 'theme': "advanced", # 'theme': "advanced",

View File

@ -0,0 +1,8 @@
{% extends "expobase.html" %}
{% block title %}Directory not found {{ path }}{% endblock %}
{% block body %}
<h1>Directory not found '{{ path }}'</h1>
<h3>Click here: <a href="/{{ subpath }}">/{{ subpath }}</a> </h3>
<p>i.e. without the final '/'
{% include "menu.html" %}
{% endblock %}