2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2024-11-21 23:01:52 +00:00

spiders are asking for weird urls

This commit is contained in:
Philip Sargent 2023-02-02 11:13:02 +00:00
parent b55bfb8868
commit 3b9695b4f3
3 changed files with 12 additions and 4 deletions

View File

@ -55,10 +55,15 @@ Set up mysql (as root)
mysql -p mysql -p
CREATE DATABASE troggle; CREATE DATABASE troggle;
GRANT ALL PRIVILEGES ON troggle.* TO 'expo'@'localhost' IDENTIFIED BY 'somepassword'; GRANT ALL PRIVILEGES ON troggle.* TO 'expo'@'localhost' IDENTIFIED BY 'somepassword';
Ctrl-D to exit
somepassword is set in localsettings.py
sudo service mariadb stop sudo service mariadb stop
sudo service mariadb start sudo service mariadb start
to delete the database, it is
DROP DATABASE troggle;
install django: install django:
sudo apt install python-django python-django-registration python-django-imagekit python-django-tinymce fonts-freefont-ttf libapache2-mod-wsgi sudo apt install python-django python-django-registration python-django-imagekit python-django-tinymce fonts-freefont-ttf libapache2-mod-wsgi

View File

@ -56,6 +56,8 @@ def expofiles_redirect(request, filepath):
""" """
return redirect(urljoin("http://expo.survex.com/expofiles/", filepath)) return redirect(urljoin("http://expo.survex.com/expofiles/", filepath))
def spider(request, _):
return redirect("/?#") # so that suffixes applied by spider are no longer part of the url
def map(request): def map(request):
"""Serves unadorned the expoweb/map/map.html file""" """Serves unadorned the expoweb/map/map.html file"""

View File

@ -11,7 +11,7 @@ from troggle.core.views.drawings import dwgallfiles, dwgfilesingle
from troggle.core.views.editor_helpers import image_selector, new_image_form from troggle.core.views.editor_helpers import image_selector, new_image_form
from troggle.core.views.expo import (editexpopage, expofiles_redirect, from troggle.core.views.expo import (editexpopage, expofiles_redirect,
expofilessingle, expopage, map, mapfile, expofilessingle, expopage, map, mapfile,
mediapage) mediapage, spider)
from troggle.core.views.logbooks import (Expeditions_jsonListView, from troggle.core.views.logbooks import (Expeditions_jsonListView,
Expeditions_tsvListView, expedition, Expeditions_tsvListView, expedition,
get_logbook_entries, get_people, get_logbook_entries, get_people,
@ -78,7 +78,9 @@ else:
trogglepatterns = [ trogglepatterns = [
path('expofiles/', include(expofilesurls)), # intercepted by Apache, if it is running. path('expofiles/', include(expofilesurls)), # intercepted by Apache, if it is running.
path('expofiles', include(expofilesurls)), # curious interaction with the include() here, not just a slash problem. path('expofiles', include(expofilesurls)), # curious interaction with the include() here, not just a slash problem.
re_path(r'^(.*)_edit_edit$', spider, name="spider"), # web spider funny
re_path(r'^caves$', caveindex, name="caveindex"), re_path(r'^caves$', caveindex, name="caveindex"),
re_path(r'^indxal.htm$', caveindex, name="caveindex"), # ~420 hrefs to this url in expoweb files re_path(r'^indxal.htm$', caveindex, name="caveindex"), # ~420 hrefs to this url in expoweb files
re_path(r'^people/?$', notablepersons, name="notablepersons"), re_path(r'^people/?$', notablepersons, name="notablepersons"),
@ -133,7 +135,7 @@ trogglepatterns = [
re_path(r'^(?P<path>.*)/(?P<slug>[^/]+)_cave_edit/$', edit_cave, name="edit_cave"), # edit_cave needed by cave.html template for url matching re_path(r'^(?P<path>.*)/(?P<slug>[^/]+)_cave_edit/$', edit_cave, name="edit_cave"), # edit_cave needed by cave.html template for url matching
re_path(r'^(?P<path>.*)/(?P<caveslug>[^/]+):(?P<slug>[^:]+)_entrance_edit', edit_entrance, name = "editentrance"), #edit existing entrance re_path(r'^(?P<path>.*)/(?P<caveslug>[^/]+):(?P<slug>[^:]+)_entrance_edit', edit_entrance, name = "editentrance"), #edit existing entrance
re_path(r'^(?P<path>.*)/(?P<caveslug>[^/]+)_entrance_new$', edit_entrance, name = "newentrance"), # new entrance for a cave re_path(r'^(?P<path>.*)/(?P<caveslug>[^/]+)_entrance_new$', edit_entrance, name = "newentrance"), # new entrance for a cave
re_path(r'^(.*)_edit$', editexpopage, name="editexpopage"), re_path(r'^(.*)_edit$', editexpopage, name="editexpopage"),
re_path(r'^(?P<karea>\d\d\d\d)(?P<subpath>.*)$', cavepage, name="cavepage"), # shorthand /1623/264 or 1623/161/top.htm re_path(r'^(?P<karea>\d\d\d\d)(?P<subpath>.*)$', cavepage, name="cavepage"), # shorthand /1623/264 or 1623/161/top.htm
# Note that urls eg '/1623/161/l/rl89a.htm' are handled by cavepage which redirects them to 'expopage' # Note that _edit$ for a cave description page in a subfolder e.g. /1623/204/204.html_edit gets caught here and breaks with 404 # Note that urls eg '/1623/161/l/rl89a.htm' are handled by cavepage which redirects them to 'expopage' # Note that _edit$ for a cave description page in a subfolder e.g. /1623/204/204.html_edit gets caught here and breaks with 404
@ -207,7 +209,6 @@ trogglepatterns = [
# Helpers to edit HTML # Helpers to edit HTML
re_path(r'^image_selector/(?P<path>.*)', image_selector, name = 'image_selector'), re_path(r'^image_selector/(?P<path>.*)', image_selector, name = 'image_selector'),
re_path(r'^new_image_form/(?P<path>.*)', new_image_form, name = 'new_image_form'), re_path(r'^new_image_form/(?P<path>.*)', new_image_form, name = 'new_image_form'),
# Final catchall which also serves expoweb handbook pages and imagestiny # Final catchall which also serves expoweb handbook pages and imagestiny
re_path(r'^(.*)$', expopage, name="expopage"), # CATCHALL assumed relative to EXPOWEB re_path(r'^(.*)$', expopage, name="expopage"), # CATCHALL assumed relative to EXPOWEB