2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2026-02-28 17:05:22 +00:00

extend logging to the previously-ignored .php etc paths

This commit is contained in:
2026-02-13 17:46:15 +00:00
parent 55c3eb4e25
commit 4659519791
2 changed files with 25 additions and 24 deletions

View File

@@ -66,9 +66,11 @@ def expofiles_redirect(request, filepath):
""" """
return redirect(urljoin("http://expo.survex.com/expofiles/", filepath)) return redirect(urljoin("http://expo.survex.com/expofiles/", filepath))
def spider(request, _): def spider(request, path):
# urls ending in "_edit_edit" or .php """These are simple filename filters set in urls.py that we know we don't have any of in troggle,
return render(request, "pagenotfound.html", {"path": path}, status=404) e.g. urls ending in "_edit_edit" or .php
"""
return logger404(request, path)
# return redirect("/?#") # so that suffixes applied by spider are no longer part of the url # return redirect("/?#") # so that suffixes applied by spider are no longer part of the url
def map(request): def map(request):
@@ -290,12 +292,22 @@ def mediapage(request, subpath=None, doc_root=None):
else: else:
return render(request, "pagenotfound.html", {"path": subpath}, status=404) return render(request, "pagenotfound.html", {"path": subpath}, status=404)
def logger404(request, path, e=None):
logger = logging.getLogger('troggle')
meta = request.META
# stamp = datetime.now().isoformat(sep=' ', timespec='seconds')
if not e:
logmsg = f"[{meta['REMOTE_ADDR']:>15}] '/{path}' FILE NOT FOUND 404"
logger.warning(logmsg)
else:
logmsg = f"[{meta['REMOTE_ADDR']:>15}] '/{path}' EXCEPTION:'{e}'"
logger.warning(logmsg)
return render(request, "pagenotfound.html", {"path": path}, status=404)
def expopage(request, path): def expopage(request, path):
"""Either renders an HTML page from expoweb with all the menus, """Either renders an HTML page from expoweb with all the menus,
or serves an unadorned binary file with mime type or serves an unadorned binary file with mime type
""" """
logger = logging.getLogger('troggle')
# print(" - EXPOPAGES delivering the file: '{}':{} as MIME type: {}".format(request.path, path,getmimetype(path)),flush=True) # print(" - EXPOPAGES delivering the file: '{}':{} as MIME type: {}".format(request.path, path,getmimetype(path)),flush=True)
if path.startswith("noinfo") and settings.PUBLIC_SITE and not request.user.is_authenticated: if path.startswith("noinfo") and settings.PUBLIC_SITE and not request.user.is_authenticated:
@@ -336,25 +348,14 @@ def expopage(request, path):
+ "<h3>UTF-8 Parsing Failure:<br>Default file encoding on this Troggle installation is not UTF-8:<br>failure detected in expowebpage in views.expo.py</h3> Please Please reconfigure Debian/Apache/Django to fix this, i.e. contact Wookey. </body" + "<h3>UTF-8 Parsing Failure:<br>Default file encoding on this Troggle installation is not UTF-8:<br>failure detected in expowebpage in views.expo.py</h3> Please Please reconfigure Debian/Apache/Django to fix this, i.e. contact Wookey. </body"
) )
meta = request.META
stamp = datetime.now().isoformat(sep=' ', timespec='seconds')
try: try:
content = open(filetobeopened, "rb") content = open(filetobeopened, "rb")
content_type = getmimetype(path) content_type = getmimetype(path)
return HttpResponse(content=content, content_type=content_type) return HttpResponse(content=content, content_type=content_type)
except FileNotFoundError as e: except FileNotFoundError as e:
message = f" ! - 404 FileNotFound REMOTE_ADDR: {meta['REMOTE_ADDR']:>15} [{stamp}] '/{path}'" return logger404(request, path)
#DataIssue.objects.create(parser="view404", message=message)
logmsg = f"[{meta['REMOTE_ADDR']:>15}] '/{path}' FILE NOT FOUND 404"
logger.warning(logmsg)
return render(request, "pagenotfound.html", {"path": path}, status=404)
except Exception as e: except Exception as e:
message = f" ! - 404? REMOTE_ADDR: {meta['REMOTE_ADDR']:>15} [{stamp}] '/{path}' {e}" return logger404(request, path, e)
#DataIssue.objects.create(parser="view404", message=message)
logmsg = f"[{meta['REMOTE_ADDR']:>15}] '/{path}' FILE NOT FOUND 404"
logger.warning(logmsg)
return render(request, "pagenotfound.html", {"path": path}, status=404)
def getmimetype(path): def getmimetype(path):

12
urls.py
View File

@@ -158,12 +158,12 @@ trogglepatterns = [
path('expofiles/', include(expofilesurls)), # intercepted by Apache, if it is running. path('expofiles/', include(expofilesurls)), # intercepted by Apache, if it is running.
path('expofiles', include(expofilesurls)), # curious interaction with the include() here, not just a slash problem. path('expofiles', include(expofilesurls)), # curious interaction with the include() here, not just a slash problem.
re_path(r'^(.*)_edit_edit$', spider, name="spider"), # web spider or bot. Intercept and manage it. re_path(r'^(.*_edit_edit)$', spider, name="spider"), # web spider or bot. Intercept and manage it.
re_path(r'^(.*)\.php$', spider, name="spider"), # web spider or bot. Intercept and manage it. re_path(r'^(.*.php)$', spider, name="spider"), # web spider or bot. Intercept and manage it.
re_path(r'^(.*)\.do$', spider, name="spider"), # web spider or bot. Intercept and manage it. re_path(r'^(.*\.do)$', spider, name="spider"), # web spider or bot. Intercept and manage it.
re_path(r'^wp-admin(.*)', spider, name="spider"), # web spider or bot. Intercept and manage it. re_path(r'^(wp-admin.*)', spider, name="spider"), # web spider or bot. Intercept and manage it.
re_path(r'^wp-content(.*)', spider, name="spider"), # web spider or bot. Intercept and manage it. re_path(r'^(wp-content.*)', spider, name="spider"), # web spider or bot. Intercept and manage it.
re_path(r'^wp-includes(.*)', spider, name="spider"), # web spider or bot. Intercept and manage it. re_path(r'^(wp-includes.*)', spider, name="spider"), # web spider or bot. Intercept and manage it.
re_path(r'^caves$', cavesall, name="cavesall"), re_path(r'^caves$', cavesall, name="cavesall"),
re_path(r'^indxal.htm$', cavesall, name="cavesall"), # ~420 hrefs to this url in expoweb files re_path(r'^indxal.htm$', cavesall, name="cavesall"), # ~420 hrefs to this url in expoweb files