spiders are asking for weird urls

This commit is contained in:
Philip Sargent 2023-02-02 11:13:02 +00:00
parent b55bfb8868
commit 3b9695b4f3
3 changed files with 12 additions and 4 deletions

View File

@ -55,10 +55,15 @@ Set up mysql (as root)
mysql -p mysql -p
CREATE DATABASE troggle; CREATE DATABASE troggle;
GRANT ALL PRIVILEGES ON troggle.* TO 'expo'@'localhost' IDENTIFIED BY 'somepassword'; GRANT ALL PRIVILEGES ON troggle.* TO 'expo'@'localhost' IDENTIFIED BY 'somepassword';
Ctrl-D to exit
somepassword is set in localsettings.py
sudo service mariadb stop sudo service mariadb stop
sudo service mariadb start sudo service mariadb start
to delete the database, it is
DROP DATABASE troggle;
install django: install django:
sudo apt install python-django python-django-registration python-django-imagekit python-django-tinymce fonts-freefont-ttf libapache2-mod-wsgi sudo apt install python-django python-django-registration python-django-imagekit python-django-tinymce fonts-freefont-ttf libapache2-mod-wsgi

View File

@ -56,6 +56,8 @@ def expofiles_redirect(request, filepath):
""" """
return redirect(urljoin("http://expo.survex.com/expofiles/", filepath)) return redirect(urljoin("http://expo.survex.com/expofiles/", filepath))
def spider(request, _):
return redirect("/?#") # so that suffixes applied by spider are no longer part of the url
def map(request): def map(request):
"""Serves unadorned the expoweb/map/map.html file""" """Serves unadorned the expoweb/map/map.html file"""

View File

@ -11,7 +11,7 @@ from troggle.core.views.drawings import dwgallfiles, dwgfilesingle
from troggle.core.views.editor_helpers import image_selector, new_image_form from troggle.core.views.editor_helpers import image_selector, new_image_form
from troggle.core.views.expo import (editexpopage, expofiles_redirect, from troggle.core.views.expo import (editexpopage, expofiles_redirect,
expofilessingle, expopage, map, mapfile, expofilessingle, expopage, map, mapfile,
mediapage) mediapage, spider)
from troggle.core.views.logbooks import (Expeditions_jsonListView, from troggle.core.views.logbooks import (Expeditions_jsonListView,
Expeditions_tsvListView, expedition, Expeditions_tsvListView, expedition,
get_logbook_entries, get_people, get_logbook_entries, get_people,
@ -79,6 +79,8 @@ trogglepatterns = [
path('expofiles/', include(expofilesurls)), # intercepted by Apache, if it is running. path('expofiles/', include(expofilesurls)), # intercepted by Apache, if it is running.
path('expofiles', include(expofilesurls)), # curious interaction with the include() here, not just a slash problem. path('expofiles', include(expofilesurls)), # curious interaction with the include() here, not just a slash problem.
re_path(r'^(.*)_edit_edit$', spider, name="spider"), # web spider funny
re_path(r'^caves$', caveindex, name="caveindex"), re_path(r'^caves$', caveindex, name="caveindex"),
re_path(r'^indxal.htm$', caveindex, name="caveindex"), # ~420 hrefs to this url in expoweb files re_path(r'^indxal.htm$', caveindex, name="caveindex"), # ~420 hrefs to this url in expoweb files
re_path(r'^people/?$', notablepersons, name="notablepersons"), re_path(r'^people/?$', notablepersons, name="notablepersons"),
@ -208,7 +210,6 @@ trogglepatterns = [
re_path(r'^image_selector/(?P<path>.*)', image_selector, name = 'image_selector'), re_path(r'^image_selector/(?P<path>.*)', image_selector, name = 'image_selector'),
re_path(r'^new_image_form/(?P<path>.*)', new_image_form, name = 'new_image_form'), re_path(r'^new_image_form/(?P<path>.*)', new_image_form, name = 'new_image_form'),
# Final catchall which also serves expoweb handbook pages and imagestiny # Final catchall which also serves expoweb handbook pages and imagestiny
re_path(r'^(.*)$', expopage, name="expopage"), # CATCHALL assumed relative to EXPOWEB re_path(r'^(.*)$', expopage, name="expopage"), # CATCHALL assumed relative to EXPOWEB
] ]