2022-04-05 08:37:31 +01:00
import os
import re
2022-06-23 17:01:25 +01:00
from sys import getfilesystemencoding as sys_getfilesystemencoding
2022-04-05 08:37:31 +01:00
from pathlib import Path
from urllib . parse import urljoin , unquote as urlunquote
from urllib . request import urlopen
from django . shortcuts import render , redirect
from django . http import HttpResponse , HttpResponseRedirect , Http404
from django . urls import reverse , resolve
from django . template import Context , loader
from django . views . decorators . csrf import ensure_csrf_cookie
from django . contrib import admin
import django . forms as forms
from . auth import login_required_if_public
from troggle . core . models . caves import Cave
import troggle . core . views . caves
import troggle . settings as settings
2022-07-18 15:37:22 +01:00
from troggle . core . utils import write_and_commit , WriteAndCommitError
2022-06-23 18:48:29 +01:00
2022-06-26 14:16:42 +01:00
from troggle . core . views . editor_helpers import HTMLarea
2022-06-18 23:43:21 +01:00
2022-04-05 08:37:31 +01:00
''' Formerly a separate package called ' flatpages ' written by Martin Green 2011.
This was NOT django . contrib . flatpages which stores HTML in the database , so the name was changed to expopages .
Then it was incorporated into troggle directly , rather than being an unnecessary external package .
'''
default_head = ''' <head>
< meta http - equiv = " Content-Type " content = " text/html; charset=iso-8859-1 " / >
< title > CUCC Expedition - index < / title >
< link rel = " stylesheet " type = " text/css " href = " ../css/main2.css " / >
< link rel = " stylesheet " type = " text/css " href = " ../../css/main2.css " / >
< link rel = " stylesheet " type = " text/css " href = " ../../../css/main2.css " / >
< / head >
< body >
< h1 > Expo < / h1 >
< h2 id = " tophead " > CUCC Expedition < / h2 >
< ul id = " links " >
< li > < a href = " /index.htm " > Home < / a > < / li >
< li > < a href = " /infodx.htm " > Main Index < / a > < / li >
< li > < a href = " /handbook/index.htm " > Handbook < / a > < / li >
< li > < a href = " /handbook/computing/onlinesystems.html " > Online systems < / a > < / li >
< li > < a href = " /pubs.htm " > Reports < / a > < / li >
< li > < a href = " /areas.htm " > Areas < / a > < / li >
< li > < a href = " /caves " > Caves < / a > < / li >
< li > < a href = " /expedition/2019 " > Troggle < / a > < / li >
< li > < form name = P method = get action = " /search " target = " _top " >
< input id = " omega-autofocus " type = search name = P size = 8 autofocus >
< input type = submit value = " Search " > < / li >
< / ul > ''' # this gets overwritten by templates/menu.html by django for most normal pages
def expofiles_redirect ( request , filepath ) :
''' This is used only when running as a test system without a local copy of /expofiles/
when settings . EXPOFILESREMOTE is True
'''
return redirect ( urljoin ( ' http://expo.survex.com/expofiles/ ' , filepath ) )
def map ( request ) :
''' Serves unadorned the expoweb/map/map.html file
'''
fn = Path ( settings . EXPOWEB , ' map ' , ' map.html ' )
return HttpResponse ( content = open ( fn , " r " ) , content_type = ' text/html ' )
def mapfile ( request , path ) :
''' Serves unadorned file
'''
fn = Path ( settings . EXPOWEB , ' map ' , path )
return HttpResponse ( content = open ( fn , " r " ) , content_type = getmimetype ( fn ) )
def expofilessingle ( request , filepath ) :
''' sends a single binary file to the user, if not found, show the parent directory
If the path actually is a directory , then show that .
'''
#print(f' - expofilessingle {filepath}')
if filepath == " " or filepath == " / " :
return expofilesdir ( request , settings . EXPOFILES , " " )
fn = urlunquote ( filepath )
fn = Path ( settings . EXPOFILES , filepath )
if fn . is_dir ( ) :
return expofilesdir ( request , Path ( fn ) , Path ( filepath ) )
if fn . is_file ( ) :
return HttpResponse ( content = open ( fn , " rb " ) , content_type = getmimetype ( filepath ) ) # any file
else :
# not a file, so show parent directory - DANGER need to check this is limited to below expofiles
if Path ( fn ) . parent == Path ( settings . EXPOFILES ) . parent :
return expofilesdir ( request , Path ( settings . EXPOFILES ) , Path ( filepath ) . parent )
else :
return expofilesdir ( request , Path ( fn ) . parent , Path ( filepath ) . parent )
def expofilesdir ( request , dirpath , filepath ) :
''' does a directory display. If there is an index.html file we should display that.
- dirpath is a full Path ( ) resolved including local machine / expofiles /
- filepath is a Path ( ) and it does not have / expofiles / in it
'''
#print(f' - expofilesdir {dirpath} settings.EXPOFILESREMOTE: {settings.EXPOFILESREMOTE}')
if filepath :
urlpath = ' expofiles ' / Path ( filepath )
else :
urlpath = Path ( ' expofiles ' )
try :
for f in dirpath . iterdir ( ) :
pass
except FileNotFoundError :
#print(f' - expofilesdir error {dirpath}')
return expofilesdir ( request , dirpath . parent , filepath . parent )
fileitems = [ ]
diritems = [ ]
for f in dirpath . iterdir ( ) :
if f . is_dir ( ) :
diritems . append ( ( urlpath / f . parts [ - 1 ] , str ( f . parts [ - 1 ] ) ) )
else :
# if f.parts[-1].lower() == 'index.htm' or f.parts[-1].lower() == 'index.html': # css cwd problem
# return HttpResponse(content=open(f, "rb"),content_type=getmimetype(filepath)) # any file
# return expofilessingle(request, str(Path(filepath / f.parts[-1])))
fileitems . append ( ( Path ( urlpath ) / f . parts [ - 1 ] , str ( f . parts [ - 1 ] ) , getmimetype ( f ) ) )
return render ( request , ' dirdisplay.html ' , { ' filepath ' : urlpath , ' fileitems ' : fileitems , ' diritems ' : diritems , ' settings ' : settings } )
def expowebpage ( request , expowebpath , path ) :
''' Adds menus and serves an HTML page
'''
2022-06-22 09:10:56 +01:00
if not os . path . isfile ( expowebpath / path ) :
2022-04-05 08:37:31 +01:00
# Should not get here if the path has suffix "_edit"
print ( f ' - 404 error in expowebpage() { path } ' )
return render ( request , ' pagenotfound.html ' , { ' path ' : path } , status = " 404 " )
2022-06-23 17:01:25 +01:00
# print(f' - {sys_getfilesystemencoding()=}')
if ( sys_getfilesystemencoding ( ) != " utf-8 " ) :
return HttpResponse ( default_head + ' <h3>UTF-8 Parsing Failure:<br>Default file encoding on this Troggle installation is not UTF-8:<br>failure detected in expowebpage in views.expo.py</h3> Please Please reconfigure Debian/Apache/Django to fix this, i.e. contact Wookey. </body ' )
# This next bit can be drastically simplified now that we know that the system encoding actually is utf-8
2022-04-05 08:37:31 +01:00
try :
2022-06-22 09:10:56 +01:00
with open ( expowebpath / path , " r " , encoding = ' utf-8 ' ) as o :
2022-04-05 08:37:31 +01:00
html = o . read ( )
2022-06-19 01:37:51 +01:00
except :
2022-04-27 22:30:43 +01:00
# exception raised on debian with python 3.9.2 but not on WSL Ubuntu with python 3.9.5
# because debian was assuming default text encoding was 'ascii'. Now specified explicitly so should be OK
2022-04-27 21:07:02 +01:00
try :
2022-06-22 09:10:56 +01:00
with open ( expowebpath / path , " rb " ) as o :
2022-04-27 21:43:15 +01:00
html = str ( o . read ( ) ) . replace ( " <h1> " , " <h1>BAD NON-UTF-8 characters here - " )
html = html . replace ( " \\ n " , " \n " )
2022-04-27 22:30:43 +01:00
html = html . replace ( " \\ r " , " " )
2022-04-27 21:43:15 +01:00
html = html . replace ( " \\ t " , " \t " )
html = html . replace ( " \\ ' " , " \' " )
2022-04-27 21:07:02 +01:00
except :
2022-04-27 21:43:15 +01:00
return HttpResponse ( default_head + ' <h3>UTF-8 Parsing Failure:<br>Page could not be parsed using UTF-8:<br>failure detected in expowebpage in views.expo.py</h3> Please edit this <var>:expoweb:</var> page to replace dubious umlauts and £ symbols with correct HTML entities e.g. <em>&pound;;</em>. </body ' )
2022-04-05 08:37:31 +01:00
m = re . search ( r ' (.*)< \ s*head([^>]*)>(.*)< \ s*/head \ s*>(.*)< \ s*body([^>]*)>(.*)< \ s*/body \ s*>(.*) ' , html , re . DOTALL + re . IGNORECASE )
if m :
preheader , headerattrs , head , postheader , bodyattrs , body , postbody = m . groups ( )
else :
return HttpResponse ( default_head + html + ' <h3>HTML Parsing failure:<br>Page could not be parsed into header and body:<br>failure detected in expowebpage in views.expo.py</h3> Please edit this <var>:expoweb:</var> page to be in the expected full HTML format </body ' )
m = re . search ( r " <title>(.*)</title> " , head , re . DOTALL + re . IGNORECASE )
if m :
title , = m . groups ( )
else :
title = " "
m = re . search ( r " ^<meta([^>]*)noedit " , head , re . DOTALL + re . IGNORECASE )
if m :
editable = False
else :
2022-06-22 09:10:56 +01:00
editable = os . access ( expowebpath / path , os . W_OK ) # are file permissions writeable?
2022-04-05 08:37:31 +01:00
has_menu = False
menumatch = re . match ( r ' (.*)<ul id= " links " > ' , body , re . DOTALL + re . IGNORECASE )
if menumatch :
2022-06-25 01:07:17 +01:00
has_menu = False
2022-06-24 19:28:31 +01:00
2022-06-24 21:58:00 +01:00
#Determine which caves this page relates to
2022-06-24 19:28:31 +01:00
m = re . search ( r " (162 \ d \ /[^ \ /]+)[ \ / \ .] " , path , re . DOTALL + re . IGNORECASE )
if m :
path_start , = m . groups ( )
parent_caves = Cave . objects . filter ( url__startswith = path_start )
else :
parent_caves = None
2022-06-24 15:48:35 +01:00
2022-06-24 21:58:00 +01:00
#Determine if this page relates to a particular year
m = re . search ( r " years \ /( \ d \ d \ d \ d) \ /.* " , path , re . DOTALL + re . IGNORECASE )
if m :
year , = m . groups ( )
else :
year = None
#Determine if this page is part of the handbook
2022-06-24 15:48:35 +01:00
handbook = path . startswith ( " handbook " )
2022-06-24 21:58:00 +01:00
2022-04-05 08:37:31 +01:00
return render ( request , ' expopage.html ' , { ' editable ' : editable , ' path ' : path , ' title ' : title ,
2022-06-24 21:58:00 +01:00
' body ' : body , ' homepage ' : ( path == " index.htm " ) , ' has_menu ' : has_menu ,
' year ' : year , ' handbook ' : handbook , ' parent_caves ' : parent_caves } )
2022-04-05 08:37:31 +01:00
def mediapage ( request , subpath = None , doc_root = None ) :
''' This is for special prefix paths /photos/ /site_media/, /static/ etc.
as defined in urls . py . If given a directory , gives a failure page .
'''
#print(" - XXXXX_ROOT: {} ...{}".format(doc_root, subpath))
if doc_root is not None :
filetobeopened = Path ( doc_root , subpath )
if filetobeopened . is_dir ( ) :
return render ( request , ' nodirlist.html ' , { ' path ' : subpath } )
try :
return HttpResponse ( content = open ( filetobeopened , " rb " ) , content_type = getmimetype ( subpath ) )
except IOError :
return render ( request , ' pagenotfound.html ' , { ' path ' : subpath } , status = " 404 " )
else :
return render ( request , ' pagenotfound.html ' , { ' path ' : subpath } , status = " 404 " )
def expopage ( request , path ) :
''' Either renders an HTML page from expoweb with all the menus,
or serves an unadorned binary file with mime type
'''
#print(" - EXPOPAGES delivering the file: '{}':{} as MIME type: {}".format(request.path, path,getmimetype(path)),flush=True)
if path . startswith ( " noinfo " ) and settings . PUBLIC_SITE and not request . user . is_authenticated :
return HttpResponseRedirect ( urljoin ( reverse ( " auth_login " ) , ' ?next= {} ' . format ( request . path ) ) )
if path . startswith ( " admin/ " ) :
# don't even attempt to handle these sorts of mistakes
return HttpResponseRedirect ( " /admin/ " )
expowebpath = Path ( settings . EXPOWEB )
if path == " " :
return expowebpage ( request , expowebpath , " index.htm " )
if path . endswith ( " .htm " ) or path . endswith ( " .html " ) :
return expowebpage ( request , expowebpath , path )
if Path ( expowebpath / path ) . is_dir ( ) :
for p in [ " index.html " , " index.htm " ] :
if ( expowebpath / path / p ) . is_file ( ) :
# This needs to reset the path to the new subdirectory
return HttpResponseRedirect ( ' / ' + str ( Path ( path ) / p ) )
return render ( request , ' pagenotfound.html ' , { ' path ' : Path ( path ) / " index.html " } , status = " 404 " )
if path . endswith ( " / " ) :
# we already know it is not a directory.
# the final / may have been appended by middleware if there was no page without it
# do not redirect to a file path without the slash as we may get in a loop. Let the user fix it:
return render ( request , ' dirnotfound.html ' , { ' path ' : path , ' subpath ' : path [ 0 : - 1 ] } )
2022-06-23 17:01:25 +01:00
# So it must be a file in /expoweb/ but not .htm or .html probably an image, maybe a txt file
2022-06-22 09:08:01 +01:00
filetobeopened = expowebpath / path
2022-06-23 17:01:25 +01:00
# print(f' - {sys_getfilesystemencoding()=}')
if ( sys_getfilesystemencoding ( ) != " utf-8 " ) :
return HttpResponse ( default_head + ' <h3>UTF-8 Parsing Failure:<br>Default file encoding on this Troggle installation is not UTF-8:<br>failure detected in expowebpage in views.expo.py</h3> Please Please reconfigure Debian/Apache/Django to fix this, i.e. contact Wookey. </body ' )
2022-04-05 08:37:31 +01:00
try :
2022-06-22 08:52:04 +01:00
content = open ( filetobeopened , " rb " )
2022-06-18 23:13:40 +01:00
content_type = getmimetype ( path )
return HttpResponse ( content = content , content_type = content_type )
2022-04-05 08:37:31 +01:00
except IOError :
return render ( request , ' pagenotfound.html ' , { ' path ' : path } , status = " 404 " )
def getmimetype ( path ) :
''' Our own version rather than relying on what is provided by the python library. Note that when
Apache or nginx is used to deliver / expofiles / it will use it ' s own idea of mimetypes and
not these .
'''
path = str ( path )
if path . lower ( ) . endswith ( " .css " ) : return " text/css "
if path . lower ( ) . endswith ( " .txt " ) : return " text/css "
if path . lower ( ) . endswith ( " .js " ) : return " application/javascript "
if path . lower ( ) . endswith ( " .json " ) : return " application/javascript "
if path . lower ( ) . endswith ( " .ico " ) : return " image/vnd.microsoft.icon "
if path . lower ( ) . endswith ( " .png " ) : return " image/png "
if path . lower ( ) . endswith ( " .tif " ) : return " image/tif "
if path . lower ( ) . endswith ( " .gif " ) : return " image/gif "
if path . lower ( ) . endswith ( " .jpeg " ) : return " image/jpeg "
if path . lower ( ) . endswith ( " .jpg " ) : return " image/jpeg "
if path . lower ( ) . endswith ( " svg " ) : return " image/svg+xml "
if path . lower ( ) . endswith ( " xml " ) : return " application/xml " # we use "text/xhtml" for tunnel files
if path . lower ( ) . endswith ( " .pdf " ) : return " application/pdf "
if path . lower ( ) . endswith ( " .ps " ) : return " application/postscript "
if path . lower ( ) . endswith ( " .svx " ) : return " application/x-survex-svx "
if path . lower ( ) . endswith ( " .3d " ) : return " application/x-survex-3d "
if path . lower ( ) . endswith ( " .pos " ) : return " application/x-survex-pos "
if path . lower ( ) . endswith ( " .err " ) : return " application/x-survex-err "
if path . lower ( ) . endswith ( " .odt " ) : return " application/vnd.oasis.opendocument.text "
if path . lower ( ) . endswith ( " .ods " ) : return " application/vnd.oasis.opendocument.spreadsheet "
if path . lower ( ) . endswith ( " .docx " ) : return " application/vnd.openxmlformats-officedocument.wordprocessingml.document "
if path . lower ( ) . endswith ( " .xslx " ) : return " application/vnd.openxmlformats-officedocument.spreadsheetml.sheet "
if path . lower ( ) . endswith ( " .gz " ) : return " application/x-7z-compressed "
if path . lower ( ) . endswith ( " .7z " ) : return " application/x-7z-compressed "
if path . lower ( ) . endswith ( " .zip " ) : return " application/zip "
return " "
@login_required_if_public
@ensure_csrf_cookie
def editexpopage ( request , path ) :
''' Manages the ' Edit this Page ' capability for expo handbook and other html pages.
Relies on HTML5 or javascript to provide the in - browser editing environment .
'''
try :
# if a cave not a webpage at all.
r = Cave . objects . get ( url = path )
return troggle . core . views . caves . editCave ( request , r . cave . slug )
except Cave . DoesNotExist :
pass
2022-06-23 17:01:25 +01:00
print ( f ' - { sys_getfilesystemencoding ( ) =} ' )
if ( sys_getfilesystemencoding ( ) != " utf-8 " ) :
return HttpResponse ( default_head + ' <h3>UTF-8 Parsing Failure:<br>Default file encoding on this Troggle installation is not UTF-8:<br>failure detected in expowebpage in views.expo.py</h3> Please Please reconfigure Debian/Apache/Django to fix this, i.e. contact Wookey. </body ' )
2022-04-05 08:37:31 +01:00
try :
2022-06-19 01:02:41 +01:00
filepath = Path ( settings . EXPOWEB ) / path
2022-06-22 09:08:01 +01:00
o = open ( filepath , " r " , encoding = " utf8 " )
2022-06-20 21:38:46 +01:00
html = o . read ( )
2022-04-05 08:37:31 +01:00
autogeneratedmatch = re . search ( r " \ < \ !-- \ s*(.*?(Do not edit|It is auto-generated).*?) \ s*-- \ > " , html , re . DOTALL + re . IGNORECASE )
if autogeneratedmatch :
return HttpResponse ( autogeneratedmatch . group ( 1 ) )
m = re . search ( r " (.*)<head([^>]*)>(.*)</head>(.*)<body([^>]*)>(.*)</body>(.*) " , html , re . DOTALL + re . IGNORECASE )
if m :
filefound = True
preheader , headerargs , head , postheader , bodyargs , body , postbody = m . groups ( )
2022-06-23 20:03:05 +01:00
# linksmatch = re.match(r'(.*)(<ul\s+id="links">.*)', body, re.DOTALL + re.IGNORECASE)
# if linksmatch:
# body, links = linksmatch.groups()
2022-04-05 08:37:31 +01:00
else :
2022-06-19 00:25:48 +01:00
return HttpResponse ( default_head + html + ' <h3>HTML Parsing failure:<br>Page could not be parsed into header and body:<br>failure detected in expowebpage in views.expo.py</h3> Please edit this <var>:expoweb:</var> page to be in the expected full HTML format .</body> ' )
2022-04-05 08:37:31 +01:00
except IOError :
print ( " ### File not found ### " , filepath )
filefound = False
if request . method == ' POST ' : # If the form has been submitted...
pageform = ExpoPageForm ( request . POST ) # A form bound to the POST data
if pageform . is_valid ( ) : # Form valid therefore write file
2022-07-18 15:37:22 +01:00
#print("### \n", str(pageform)[0:300])
#print("### \n csrfmiddlewaretoken: ",request.POST['csrfmiddlewaretoken'])
2022-04-05 08:37:31 +01:00
if filefound :
headmatch = re . match ( r " (.*)<title>.*</title>(.*) " , head , re . DOTALL + re . IGNORECASE )
if headmatch :
head = headmatch . group ( 1 ) + " <title> " + pageform . cleaned_data [ " title " ] + " </title> " + headmatch . group ( 2 )
else :
head = " <title> " + pageform . cleaned_data [ " title " ] + " </title> "
else :
head = " <title> " + pageform . cleaned_data [ " title " ] + " </title> "
preheader = " <html> "
headerargs = " "
postheader = " "
bodyargs = " "
postbody = " </html> \n "
body = pageform . cleaned_data [ " html " ]
body = body . replace ( " \r " , " " )
result = " %s <head %s > %s </head> %s <body %s > \n %s </body> %s " % ( preheader , headerargs , head , postheader , bodyargs , body , postbody )
2022-07-18 15:37:22 +01:00
if result != html : # Check if content changed at all
2022-06-20 22:09:10 +01:00
try :
2022-06-23 21:31:57 +01:00
change_message = pageform . cleaned_data [ " change_message " ]
2022-07-18 15:37:22 +01:00
write_and_commit ( [ ( filepath , result , " utf-8 " ) ] , f ' { change_message } - online edit of { path } ' )
except WriteAndCommitError as e :
2022-06-20 22:09:10 +01:00
return render ( request , ' errors/generic.html ' , { ' message ' : e . message } )
2022-07-18 15:37:22 +01:00
2022-06-20 22:09:10 +01:00
return HttpResponseRedirect ( reverse ( ' expopage ' , args = [ path ] ) ) # Redirect after POST
else :
if filefound :
m = re . search ( r " <title>(.*)</title> " , head , re . DOTALL + re . IGNORECASE )
if m :
title , = m . groups ( )
else :
title = " "
2022-06-23 21:31:57 +01:00
pageform = ExpoPageForm ( initial = { " html " : body , " title " : title } )
2022-06-20 22:09:10 +01:00
else :
2022-06-23 21:31:57 +01:00
pageform = ExpoPageForm ( )
2022-06-20 22:09:10 +01:00
return render ( request , ' editexpopage.html ' , { ' path ' : path , ' form ' : pageform , } )
2022-04-05 08:37:31 +01:00
class ExpoPageForm ( forms . Form ) :
''' The form used by the editexpopage function
'''
2022-06-23 21:31:57 +01:00
title = forms . CharField ( widget = forms . TextInput ( attrs = { ' size ' : ' 60 ' , ' placeholder ' : " Enter title (displayed in tab) " } ) )
2022-06-26 21:29:46 +01:00
html = forms . CharField ( widget = HTMLarea ( attrs = { " height " : " 80 % " , " rows " : 20 , ' placeholder ' : " Enter page content (using HTML) " } ,
preview = True
) )
2022-07-18 15:37:22 +01:00
change_message = forms . CharField ( widget = forms . Textarea ( attrs = { " cols " : 80 , " rows " : 3 , ' placeholder ' : " Describe the change made (for version control records) " } ) )