fix bug in parsing bad HTML pages e.g.expo 82

This commit is contained in:
Philip Sargent 2021-04-05 14:49:06 +01:00
parent 409037bdf3
commit 9db1a8490c

View File

@ -23,6 +23,33 @@ This was NOT django.contrib.flatpages which stores HTML in the database, so the
Then it was incorporated into troggle directly, rather than being an unnecessary external package.
'''
default_head = '''<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
<title>CUCC Expedition - index</title>
<link rel="stylesheet" type="text/css" href="../css/main2.css" />
<link rel="stylesheet" type="text/css" href="../../css/main2.css" />
<link rel="stylesheet" type="text/css" href="../../../css/main2.css" />
</head>
<body>
<h1>Expo</h1>
<h2 id="tophead">CUCC Expedition</h2>
<ul id="links">
<li><a href="/index.htm">Home</a></li>
<li><a href="/infodx.htm">Main Index</a></li>
<li><a href="/handbook/index.htm">Handbook</a></li>
<li><a href="/handbook/computing/onlinesystems.html">Online systems</a></li>
<li><a href="/pubs.htm">Reports</a></li>
<li><a href="/areas.htm">Areas</a></li>
<li><a href="/caves">Caves</a></li>
<li><a href="/expedition/2019">Troggle</a></li>
<li><form name=P method=get action="/search" target="_top">
<input id="omega-autofocus" type=search name=P value="testing" size=8 autofocus>
<input type=submit value="Search"></li>
<li><a href="/years/1983/index.html_edit" class="editlink"><strong>Edit this page</strong></a></li>
</ul>'''
def expofiles_redirect(request, path):
'''This is used only when running as a test system without a local copy of /expofiles/
'''
@ -57,7 +84,7 @@ def expofilesdir(request, dirpath, filepath):
return render(request, 'dirdisplay.html', { 'filepath': urlpath, 'fileitems':fileitems, 'diritems': diritems,'settings': settings })
def expowebpage(request, expowebpath, path):
'''Adds memnus and serves an HTML page
'''Adds menus and serves an HTML page
'''
if not Path(expowebpath / path).is_file():
return render(request, 'pagenotfound.html', {'path': path})
@ -69,7 +96,7 @@ def expowebpage(request, expowebpath, path):
if m:
preheader, headerattrs, head, postheader, bodyattrs, body, postbody = m.groups()
else:
return HttpResponse(html + "HTML Parsing failure: Page could not be split into header and body: failed in expowebpage in views.expo.py")
return HttpResponse(default_head + html.decode() + '<h3>HTML Parsing failure:<br>Page could not be parsed into header and body:<br>failure detected in expowebpage in views.expo.py</h3> Please edit this <var>:expoweb:</var> page to be in the expected full HTML format by clicking on \'Edit this Page\' in the left hand menu.</body' )
m = re.search(rb"<title>(.*)</title>", head, re.DOTALL + re.IGNORECASE)
if m:
title, = m.groups()
@ -212,7 +239,8 @@ def editexpopage(request, path):
# if re.search(r"iso-8859-1", html):
# body = str(body, "iso-8859-1")
else:
return HttpResponse("Page could not be split into header and body")
#return HttpResponse("Page could not be split into header and body")
return HttpResponse(default_head + html.decode() + '<h3>HTML Parsing failure:<br>Page could not be parsed into header and body:<br>failure detected in expowebpage in views.expo.py</h3> Please edit this <var>:expoweb:</var> page to be in the expected full HTML format by clicking on \'Edit this Page\' in the left hand menu.</body' )
except IOError:
print("### File not found ### ", filepath)
filefound = False
@ -242,7 +270,7 @@ def editexpopage(request, path):
f = open(filepath, "w")
f.write(result)
f.close()
return HttpResponseRedirect(reverse('flatpage', args=[path])) # Redirect after POST
return HttpResponseRedirect(reverse('expopage', args=[path])) # Redirect after POST
else:
if filefound:
m = re.search(r"<title>(.*)</title>", head, re.DOTALL + re.IGNORECASE)