2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2024-11-21 14:51:51 +00:00

spiders are asking for weird urls

This commit is contained in:
Philip Sargent 2023-02-02 11:13:02 +00:00
parent b55bfb8868
commit 3b9695b4f3
3 changed files with 12 additions and 4 deletions

View File

@ -55,10 +55,15 @@ Set up mysql (as root)
mysql -p
CREATE DATABASE troggle;
GRANT ALL PRIVILEGES ON troggle.* TO 'expo'@'localhost' IDENTIFIED BY 'somepassword';
Ctrl-D to exit
somepassword is set in localsettings.py
sudo service mariadb stop
sudo service mariadb start
to delete the database, it is
DROP DATABASE troggle;
install django:
sudo apt install python-django python-django-registration python-django-imagekit python-django-tinymce fonts-freefont-ttf libapache2-mod-wsgi

View File

@ -56,6 +56,8 @@ def expofiles_redirect(request, filepath):
"""
return redirect(urljoin("http://expo.survex.com/expofiles/", filepath))
def spider(request, _):
return redirect("/?#") # so that suffixes applied by spider are no longer part of the url
def map(request):
"""Serves unadorned the expoweb/map/map.html file"""

View File

@ -11,7 +11,7 @@ from troggle.core.views.drawings import dwgallfiles, dwgfilesingle
from troggle.core.views.editor_helpers import image_selector, new_image_form
from troggle.core.views.expo import (editexpopage, expofiles_redirect,
expofilessingle, expopage, map, mapfile,
mediapage)
mediapage, spider)
from troggle.core.views.logbooks import (Expeditions_jsonListView,
Expeditions_tsvListView, expedition,
get_logbook_entries, get_people,
@ -78,7 +78,9 @@ else:
trogglepatterns = [
path('expofiles/', include(expofilesurls)), # intercepted by Apache, if it is running.
path('expofiles', include(expofilesurls)), # curious interaction with the include() here, not just a slash problem.
re_path(r'^(.*)_edit_edit$', spider, name="spider"), # web spider funny
re_path(r'^caves$', caveindex, name="caveindex"),
re_path(r'^indxal.htm$', caveindex, name="caveindex"), # ~420 hrefs to this url in expoweb files
re_path(r'^people/?$', notablepersons, name="notablepersons"),
@ -133,7 +135,7 @@ trogglepatterns = [
re_path(r'^(?P<path>.*)/(?P<slug>[^/]+)_cave_edit/$', edit_cave, name="edit_cave"), # edit_cave needed by cave.html template for url matching
re_path(r'^(?P<path>.*)/(?P<caveslug>[^/]+):(?P<slug>[^:]+)_entrance_edit', edit_entrance, name = "editentrance"), #edit existing entrance
re_path(r'^(?P<path>.*)/(?P<caveslug>[^/]+)_entrance_new$', edit_entrance, name = "newentrance"), # new entrance for a cave
re_path(r'^(.*)_edit$', editexpopage, name="editexpopage"),
re_path(r'^(?P<karea>\d\d\d\d)(?P<subpath>.*)$', cavepage, name="cavepage"), # shorthand /1623/264 or 1623/161/top.htm
# Note that urls eg '/1623/161/l/rl89a.htm' are handled by cavepage which redirects them to 'expopage' # Note that _edit$ for a cave description page in a subfolder e.g. /1623/204/204.html_edit gets caught here and breaks with 404
@ -207,7 +209,6 @@ trogglepatterns = [
# Helpers to edit HTML
re_path(r'^image_selector/(?P<path>.*)', image_selector, name = 'image_selector'),
re_path(r'^new_image_form/(?P<path>.*)', new_image_form, name = 'new_image_form'),
# Final catchall which also serves expoweb handbook pages and imagestiny
re_path(r'^(.*)$', expopage, name="expopage"), # CATCHALL assumed relative to EXPOWEB