fix bug in parsing bad HTML pages e.g.expo 82

2021-04-05 14:49:06 +01:00
parent 409037bdf3
commit 9db1a8490c
1 changed files with 32 additions and 4 deletions
@@ -23,6 +23,33 @@ This was NOT django.contrib.flatpages which stores HTML in the database, so the
 Then it was incorporated into troggle directly, rather than being an unnecessary external package.
 '''

+default_head = '''<head>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+<title>CUCC Expedition -  index</title>
+<link rel="stylesheet" type="text/css" href="../css/main2.css" />
+<link rel="stylesheet" type="text/css" href="../../css/main2.css" />
+<link rel="stylesheet" type="text/css" href="../../../css/main2.css" />
+</head>
+<body>
+<h1>Expo</h1>
+<h2 id="tophead">CUCC Expedition</h2>
+
+<ul id="links">
+<li><a href="/index.htm">Home</a></li>
+<li><a href="/infodx.htm">Main Index</a></li>
+<li><a href="/handbook/index.htm">Handbook</a></li>
+<li><a href="/handbook/computing/onlinesystems.html">Online systems</a></li>
+<li><a href="/pubs.htm">Reports</a></li>
+<li><a href="/areas.htm">Areas</a></li>
+<li><a href="/caves">Caves</a></li>
+<li><a href="/expedition/2019">Troggle</a></li>
+<li><form name=P method=get action="/search" target="_top">
+    <input id="omega-autofocus" type=search name=P value="testing" size=8 autofocus>
+    <input type=submit value="Search"></li>
+<li><a href="/years/1983/index.html_edit" class="editlink"><strong>Edit this page</strong></a></li>
+
+</ul>'''
+
 def expofiles_redirect(request, path):
    '''This is used only when running as a test system without a local copy of /expofiles/
    '''
@@ -57,7 +84,7 @@ def expofilesdir(request, dirpath, filepath):
    return render(request, 'dirdisplay.html', { 'filepath': urlpath, 'fileitems':fileitems, 'diritems': diritems,'settings': settings })

 def expowebpage(request, expowebpath, path):
-    '''Adds memnus and serves an HTML page
+    '''Adds menus and serves an HTML page
    '''
    if not Path(expowebpath / path).is_file():
        return render(request, 'pagenotfound.html', {'path': path})
@@ -69,7 +96,7 @@ def expowebpage(request, expowebpath, path):
    if m:
        preheader, headerattrs, head, postheader, bodyattrs, body, postbody = m.groups()
    else:
-        return HttpResponse(html + "HTML Parsing failure: Page could not be split into header and body: failed in expowebpage in views.expo.py")
+        return HttpResponse(default_head +  html.decode() + '<h3>HTML Parsing failure:<br>Page could not be parsed into header and body:<br>failure detected in expowebpage in views.expo.py</h3> Please edit this <var>:expoweb:</var> page to be in the expected full  HTML format by clicking on \'Edit this Page\' in the left hand menu.</body' )
    m = re.search(rb"<title>(.*)</title>", head, re.DOTALL + re.IGNORECASE)
    if m:
        title, = m.groups()
@@ -212,7 +239,8 @@ def editexpopage(request, path):
 #            if re.search(r"iso-8859-1", html):
 #                body = str(body, "iso-8859-1")
        else:
-            return HttpResponse("Page could not be split into header and body")
+            #return HttpResponse("Page could not be split into header and body")
+            return HttpResponse(default_head +  html.decode() + '<h3>HTML Parsing failure:<br>Page could not be parsed into header and body:<br>failure detected in expowebpage in views.expo.py</h3> Please edit this <var>:expoweb:</var> page to be in the expected full  HTML format by clicking on \'Edit this Page\' in the left hand menu.</body' )
    except IOError:
        print("### File not found ### ", filepath)
        filefound = False
@@ -242,7 +270,7 @@ def editexpopage(request, path):
            f = open(filepath, "w")
            f.write(result)
            f.close()
-            return HttpResponseRedirect(reverse('flatpage', args=[path])) # Redirect after POST
+            return HttpResponseRedirect(reverse('expopage', args=[path])) # Redirect after POST
    else:
        if filefound:
            m = re.search(r"<title>(.*)</title>", head, re.DOTALL + re.IGNORECASE)