fix bug in parsing bad HTML pages e.g.expo 82

This commit is contained in:
Philip Sargent 2021-04-05 14:49:06 +01:00
parent 409037bdf3
commit 9db1a8490c

View File

@ -23,6 +23,33 @@ This was NOT django.contrib.flatpages which stores HTML in the database, so the
Then it was incorporated into troggle directly, rather than being an unnecessary external package. Then it was incorporated into troggle directly, rather than being an unnecessary external package.
''' '''
default_head = '''<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
<title>CUCC Expedition - index</title>
<link rel="stylesheet" type="text/css" href="../css/main2.css" />
<link rel="stylesheet" type="text/css" href="../../css/main2.css" />
<link rel="stylesheet" type="text/css" href="../../../css/main2.css" />
</head>
<body>
<h1>Expo</h1>
<h2 id="tophead">CUCC Expedition</h2>
<ul id="links">
<li><a href="/index.htm">Home</a></li>
<li><a href="/infodx.htm">Main Index</a></li>
<li><a href="/handbook/index.htm">Handbook</a></li>
<li><a href="/handbook/computing/onlinesystems.html">Online systems</a></li>
<li><a href="/pubs.htm">Reports</a></li>
<li><a href="/areas.htm">Areas</a></li>
<li><a href="/caves">Caves</a></li>
<li><a href="/expedition/2019">Troggle</a></li>
<li><form name=P method=get action="/search" target="_top">
<input id="omega-autofocus" type=search name=P value="testing" size=8 autofocus>
<input type=submit value="Search"></li>
<li><a href="/years/1983/index.html_edit" class="editlink"><strong>Edit this page</strong></a></li>
</ul>'''
def expofiles_redirect(request, path): def expofiles_redirect(request, path):
'''This is used only when running as a test system without a local copy of /expofiles/ '''This is used only when running as a test system without a local copy of /expofiles/
''' '''
@ -57,7 +84,7 @@ def expofilesdir(request, dirpath, filepath):
return render(request, 'dirdisplay.html', { 'filepath': urlpath, 'fileitems':fileitems, 'diritems': diritems,'settings': settings }) return render(request, 'dirdisplay.html', { 'filepath': urlpath, 'fileitems':fileitems, 'diritems': diritems,'settings': settings })
def expowebpage(request, expowebpath, path): def expowebpage(request, expowebpath, path):
'''Adds memnus and serves an HTML page '''Adds menus and serves an HTML page
''' '''
if not Path(expowebpath / path).is_file(): if not Path(expowebpath / path).is_file():
return render(request, 'pagenotfound.html', {'path': path}) return render(request, 'pagenotfound.html', {'path': path})
@ -69,7 +96,7 @@ def expowebpage(request, expowebpath, path):
if m: if m:
preheader, headerattrs, head, postheader, bodyattrs, body, postbody = m.groups() preheader, headerattrs, head, postheader, bodyattrs, body, postbody = m.groups()
else: else:
return HttpResponse(html + "HTML Parsing failure: Page could not be split into header and body: failed in expowebpage in views.expo.py") return HttpResponse(default_head + html.decode() + '<h3>HTML Parsing failure:<br>Page could not be parsed into header and body:<br>failure detected in expowebpage in views.expo.py</h3> Please edit this <var>:expoweb:</var> page to be in the expected full HTML format by clicking on \'Edit this Page\' in the left hand menu.</body' )
m = re.search(rb"<title>(.*)</title>", head, re.DOTALL + re.IGNORECASE) m = re.search(rb"<title>(.*)</title>", head, re.DOTALL + re.IGNORECASE)
if m: if m:
title, = m.groups() title, = m.groups()
@ -212,7 +239,8 @@ def editexpopage(request, path):
# if re.search(r"iso-8859-1", html): # if re.search(r"iso-8859-1", html):
# body = str(body, "iso-8859-1") # body = str(body, "iso-8859-1")
else: else:
return HttpResponse("Page could not be split into header and body") #return HttpResponse("Page could not be split into header and body")
return HttpResponse(default_head + html.decode() + '<h3>HTML Parsing failure:<br>Page could not be parsed into header and body:<br>failure detected in expowebpage in views.expo.py</h3> Please edit this <var>:expoweb:</var> page to be in the expected full HTML format by clicking on \'Edit this Page\' in the left hand menu.</body' )
except IOError: except IOError:
print("### File not found ### ", filepath) print("### File not found ### ", filepath)
filefound = False filefound = False
@ -242,7 +270,7 @@ def editexpopage(request, path):
f = open(filepath, "w") f = open(filepath, "w")
f.write(result) f.write(result)
f.close() f.close()
return HttpResponseRedirect(reverse('flatpage', args=[path])) # Redirect after POST return HttpResponseRedirect(reverse('expopage', args=[path])) # Redirect after POST
else: else:
if filefound: if filefound:
m = re.search(r"<title>(.*)</title>", head, re.DOTALL + re.IGNORECASE) m = re.search(r"<title>(.*)</title>", head, re.DOTALL + re.IGNORECASE)