2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2026-02-28 15:35:23 +00:00

extend logging to the previously-ignored .php etc paths

This commit is contained in:
2026-02-13 17:46:15 +00:00
parent 55c3eb4e25
commit 4659519791
2 changed files with 25 additions and 24 deletions

View File

@@ -66,9 +66,11 @@ def expofiles_redirect(request, filepath):
"""
return redirect(urljoin("http://expo.survex.com/expofiles/", filepath))
def spider(request, _):
# urls ending in "_edit_edit" or .php
return render(request, "pagenotfound.html", {"path": path}, status=404)
def spider(request, path):
"""These are simple filename filters set in urls.py that we know we don't have any of in troggle,
e.g. urls ending in "_edit_edit" or .php
"""
return logger404(request, path)
# return redirect("/?#") # so that suffixes applied by spider are no longer part of the url
def map(request):
@@ -290,12 +292,22 @@ def mediapage(request, subpath=None, doc_root=None):
else:
return render(request, "pagenotfound.html", {"path": subpath}, status=404)
def logger404(request, path, e=None):
logger = logging.getLogger('troggle')
meta = request.META
# stamp = datetime.now().isoformat(sep=' ', timespec='seconds')
if not e:
logmsg = f"[{meta['REMOTE_ADDR']:>15}] '/{path}' FILE NOT FOUND 404"
logger.warning(logmsg)
else:
logmsg = f"[{meta['REMOTE_ADDR']:>15}] '/{path}' EXCEPTION:'{e}'"
logger.warning(logmsg)
return render(request, "pagenotfound.html", {"path": path}, status=404)
def expopage(request, path):
"""Either renders an HTML page from expoweb with all the menus,
or serves an unadorned binary file with mime type
"""
logger = logging.getLogger('troggle')
# print(" - EXPOPAGES delivering the file: '{}':{} as MIME type: {}".format(request.path, path,getmimetype(path)),flush=True)
if path.startswith("noinfo") and settings.PUBLIC_SITE and not request.user.is_authenticated:
@@ -336,25 +348,14 @@ def expopage(request, path):
+ "<h3>UTF-8 Parsing Failure:<br>Default file encoding on this Troggle installation is not UTF-8:<br>failure detected in expowebpage in views.expo.py</h3> Please Please reconfigure Debian/Apache/Django to fix this, i.e. contact Wookey. </body"
)
meta = request.META
stamp = datetime.now().isoformat(sep=' ', timespec='seconds')
try:
content = open(filetobeopened, "rb")
content_type = getmimetype(path)
return HttpResponse(content=content, content_type=content_type)
except FileNotFoundError as e:
message = f" ! - 404 FileNotFound REMOTE_ADDR: {meta['REMOTE_ADDR']:>15} [{stamp}] '/{path}'"
#DataIssue.objects.create(parser="view404", message=message)
logmsg = f"[{meta['REMOTE_ADDR']:>15}] '/{path}' FILE NOT FOUND 404"
logger.warning(logmsg)
return render(request, "pagenotfound.html", {"path": path}, status=404)
return logger404(request, path)
except Exception as e:
message = f" ! - 404? REMOTE_ADDR: {meta['REMOTE_ADDR']:>15} [{stamp}] '/{path}' {e}"
#DataIssue.objects.create(parser="view404", message=message)
logmsg = f"[{meta['REMOTE_ADDR']:>15}] '/{path}' FILE NOT FOUND 404"
logger.warning(logmsg)
return render(request, "pagenotfound.html", {"path": path}, status=404)
return logger404(request, path, e)
def getmimetype(path):

12
urls.py
View File

@@ -158,12 +158,12 @@ trogglepatterns = [
path('expofiles/', include(expofilesurls)), # intercepted by Apache, if it is running.
path('expofiles', include(expofilesurls)), # curious interaction with the include() here, not just a slash problem.
re_path(r'^(.*)_edit_edit$', spider, name="spider"), # web spider or bot. Intercept and manage it.
re_path(r'^(.*)\.php$', spider, name="spider"), # web spider or bot. Intercept and manage it.
re_path(r'^(.*)\.do$', spider, name="spider"), # web spider or bot. Intercept and manage it.
re_path(r'^wp-admin(.*)', spider, name="spider"), # web spider or bot. Intercept and manage it.
re_path(r'^wp-content(.*)', spider, name="spider"), # web spider or bot. Intercept and manage it.
re_path(r'^wp-includes(.*)', spider, name="spider"), # web spider or bot. Intercept and manage it.
re_path(r'^(.*_edit_edit)$', spider, name="spider"), # web spider or bot. Intercept and manage it.
re_path(r'^(.*.php)$', spider, name="spider"), # web spider or bot. Intercept and manage it.
re_path(r'^(.*\.do)$', spider, name="spider"), # web spider or bot. Intercept and manage it.
re_path(r'^(wp-admin.*)', spider, name="spider"), # web spider or bot. Intercept and manage it.
re_path(r'^(wp-content.*)', spider, name="spider"), # web spider or bot. Intercept and manage it.
re_path(r'^(wp-includes.*)', spider, name="spider"), # web spider or bot. Intercept and manage it.
re_path(r'^caves$', cavesall, name="cavesall"),
re_path(r'^indxal.htm$', cavesall, name="cavesall"), # ~420 hrefs to this url in expoweb files