spiders are asking for weird urls

This commit is contained in:
Philip Sargent 2023-02-02 11:13:02 +00:00
parent b55bfb8868
commit 3b9695b4f3
3 changed files with 12 additions and 4 deletions

View File

@ -55,10 +55,15 @@ Set up mysql (as root)
mysql -p
CREATE DATABASE troggle;
GRANT ALL PRIVILEGES ON troggle.* TO 'expo'@'localhost' IDENTIFIED BY 'somepassword';
Ctrl-D to exit
somepassword is set in localsettings.py
sudo service mariadb stop
sudo service mariadb start
to delete the database, it is
DROP DATABASE troggle;
install django:
sudo apt install python-django python-django-registration python-django-imagekit python-django-tinymce fonts-freefont-ttf libapache2-mod-wsgi

View File

@ -56,6 +56,8 @@ def expofiles_redirect(request, filepath):
"""
return redirect(urljoin("http://expo.survex.com/expofiles/", filepath))
def spider(request, _):
return redirect("/?#") # so that suffixes applied by spider are no longer part of the url
def map(request):
"""Serves unadorned the expoweb/map/map.html file"""

View File

@ -11,7 +11,7 @@ from troggle.core.views.drawings import dwgallfiles, dwgfilesingle
from troggle.core.views.editor_helpers import image_selector, new_image_form
from troggle.core.views.expo import (editexpopage, expofiles_redirect,
expofilessingle, expopage, map, mapfile,
mediapage)
mediapage, spider)
from troggle.core.views.logbooks import (Expeditions_jsonListView,
Expeditions_tsvListView, expedition,
get_logbook_entries, get_people,
@ -79,6 +79,8 @@ trogglepatterns = [
path('expofiles/', include(expofilesurls)), # intercepted by Apache, if it is running.
path('expofiles', include(expofilesurls)), # curious interaction with the include() here, not just a slash problem.
re_path(r'^(.*)_edit_edit$', spider, name="spider"), # web spider funny
re_path(r'^caves$', caveindex, name="caveindex"),
re_path(r'^indxal.htm$', caveindex, name="caveindex"), # ~420 hrefs to this url in expoweb files
re_path(r'^people/?$', notablepersons, name="notablepersons"),
@ -208,7 +210,6 @@ trogglepatterns = [
re_path(r'^image_selector/(?P<path>.*)', image_selector, name = 'image_selector'),
re_path(r'^new_image_form/(?P<path>.*)', new_image_form, name = 'new_image_form'),
# Final catchall which also serves expoweb handbook pages and imagestiny
re_path(r'^(.*)$', expopage, name="expopage"), # CATCHALL assumed relative to EXPOWEB
]