diff --git a/core/views/expo.py b/core/views/expo.py index 84367f7b..ffa39e24 100644 --- a/core/views/expo.py +++ b/core/views/expo.py @@ -66,10 +66,12 @@ def expofiles_redirect(request, filepath): """ return redirect(urljoin("http://expo.survex.com/expofiles/", filepath)) -def spider(request, _): - # urls ending in "_edit_edit" or .php - return render(request, "pagenotfound.html", {"path": path}, status=404) - # return redirect("/?#") # so that suffixes applied by spider are no longer part of the url +def spider(request, path): + """These are simple filename filters set in urls.py that we know we don't have any of in troggle, + e.g. urls ending in "_edit_edit" or .php + """ + return logger404(request, path) + # return redirect("/?#") # so that suffixes applied by spider are no longer part of the url def map(request): """Serves unadorned the expoweb/map/slippy/map.html file""" @@ -290,12 +292,22 @@ def mediapage(request, subpath=None, doc_root=None): else: return render(request, "pagenotfound.html", {"path": subpath}, status=404) +def logger404(request, path, e=None): + logger = logging.getLogger('troggle') + meta = request.META + # stamp = datetime.now().isoformat(sep=' ', timespec='seconds') + if not e: + logmsg = f"[{meta['REMOTE_ADDR']:>15}] '/{path}' FILE NOT FOUND 404" + logger.warning(logmsg) + else: + logmsg = f"[{meta['REMOTE_ADDR']:>15}] '/{path}' EXCEPTION:'{e}'" + logger.warning(logmsg) + return render(request, "pagenotfound.html", {"path": path}, status=404) def expopage(request, path): """Either renders an HTML page from expoweb with all the menus, or serves an unadorned binary file with mime type """ - logger = logging.getLogger('troggle') # print(" - EXPOPAGES delivering the file: '{}':{} as MIME type: {}".format(request.path, path,getmimetype(path)),flush=True) if path.startswith("noinfo") and settings.PUBLIC_SITE and not request.user.is_authenticated: @@ -336,27 +348,16 @@ def expopage(request, path): + "

UTF-8 Parsing Failure:
Default file encoding on this Troggle installation is not UTF-8:
failure detected in expowebpage in views.expo.py

Please Please reconfigure Debian/Apache/Django to fix this, i.e. contact Wookey. 15} [{stamp}] '/{path}'" - #DataIssue.objects.create(parser="view404", message=message) - logmsg = f"[{meta['REMOTE_ADDR']:>15}] '/{path}' FILE NOT FOUND 404" - logger.warning(logmsg) - return render(request, "pagenotfound.html", {"path": path}, status=404) + return logger404(request, path) except Exception as e: - message = f" ! - 404? REMOTE_ADDR: {meta['REMOTE_ADDR']:>15} [{stamp}] '/{path}' {e}" - #DataIssue.objects.create(parser="view404", message=message) - logmsg = f"[{meta['REMOTE_ADDR']:>15}] '/{path}' FILE NOT FOUND 404" - logger.warning(logmsg) - return render(request, "pagenotfound.html", {"path": path}, status=404) + return logger404(request, path, e) - def getmimetype(path): """Our own version rather than relying on what is provided by the python library. Note that when Apache or nginx is used to deliver /expofiles/ it will use it's own idea of mimetypes and diff --git a/urls.py b/urls.py index 2f2ebfcd..a01230b7 100644 --- a/urls.py +++ b/urls.py @@ -158,12 +158,12 @@ trogglepatterns = [ path('expofiles/', include(expofilesurls)), # intercepted by Apache, if it is running. path('expofiles', include(expofilesurls)), # curious interaction with the include() here, not just a slash problem. - re_path(r'^(.*)_edit_edit$', spider, name="spider"), # web spider or bot. Intercept and manage it. - re_path(r'^(.*)\.php$', spider, name="spider"), # web spider or bot. Intercept and manage it. - re_path(r'^(.*)\.do$', spider, name="spider"), # web spider or bot. Intercept and manage it. - re_path(r'^wp-admin(.*)', spider, name="spider"), # web spider or bot. Intercept and manage it. - re_path(r'^wp-content(.*)', spider, name="spider"), # web spider or bot. Intercept and manage it. - re_path(r'^wp-includes(.*)', spider, name="spider"), # web spider or bot. Intercept and manage it. + re_path(r'^(.*_edit_edit)$', spider, name="spider"), # web spider or bot. Intercept and manage it. + re_path(r'^(.*.php)$', spider, name="spider"), # web spider or bot. Intercept and manage it. + re_path(r'^(.*\.do)$', spider, name="spider"), # web spider or bot. Intercept and manage it. + re_path(r'^(wp-admin.*)', spider, name="spider"), # web spider or bot. Intercept and manage it. + re_path(r'^(wp-content.*)', spider, name="spider"), # web spider or bot. Intercept and manage it. + re_path(r'^(wp-includes.*)', spider, name="spider"), # web spider or bot. Intercept and manage it. re_path(r'^caves$', cavesall, name="cavesall"), re_path(r'^indxal.htm$', cavesall, name="cavesall"), # ~420 hrefs to this url in expoweb files