fix bug in parsing bad HTML pages e.g.expo 82

2021-04-05 14:49:06 +01:00 · 2021-04-05 14:49:06 +01:00 · 9db1a8490c
commit 9db1a8490c
parent 409037bdf3
1 changed files with 32 additions and 4 deletions
--- a/core/views/expo.py
+++ b/core/views/expo.py
@ -23,6 +23,33 @@ This was NOT django.contrib.flatpages which stores HTML in the database, so the
 Then it was incorporated into troggle directly, rather than being an unnecessary external package.
 '''
 default_head = '''<head>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
 <title>CUCC Expedition -  index</title>
 <link rel="stylesheet" type="text/css" href="../css/main2.css" />
 <link rel="stylesheet" type="text/css" href="../../css/main2.css" />
 <link rel="stylesheet" type="text/css" href="../../../css/main2.css" />
 </head>
 <body>
 <h1>Expo</h1>
 <h2 id="tophead">CUCC Expedition</h2>
 <ul id="links">
 <li><a href="/index.htm">Home</a></li>
 <li><a href="/infodx.htm">Main Index</a></li>
 <li><a href="/handbook/index.htm">Handbook</a></li>
 <li><a href="/handbook/computing/onlinesystems.html">Online systems</a></li>
 <li><a href="/pubs.htm">Reports</a></li>
 <li><a href="/areas.htm">Areas</a></li>
 <li><a href="/caves">Caves</a></li>
 <li><a href="/expedition/2019">Troggle</a></li>
 <li><form name=P method=get action="/search" target="_top">
    <input id="omega-autofocus" type=search name=P value="testing" size=8 autofocus>
    <input type=submit value="Search"></li>
 <li><a href="/years/1983/index.html_edit" class="editlink"><strong>Edit this page</strong></a></li>
 </ul>'''
 def expofiles_redirect(request, path):
    '''This is used only when running as a test system without a local copy of /expofiles/
    '''
@ -57,7 +84,7 @@ def expofilesdir(request, dirpath, filepath):
    return render(request, 'dirdisplay.html', { 'filepath': urlpath, 'fileitems':fileitems, 'diritems': diritems,'settings': settings })
 def expowebpage(request, expowebpath, path):
-    '''Adds memnus and serves an HTML page
+    '''Adds menus and serves an HTML page
    '''
    if not Path(expowebpath / path).is_file():
        return render(request, 'pagenotfound.html', {'path': path})
@ -69,7 +96,7 @@ def expowebpage(request, expowebpath, path):
    if m:
        preheader, headerattrs, head, postheader, bodyattrs, body, postbody = m.groups()
    else:
-        return HttpResponse(html + "HTML Parsing failure: Page could not be split into header and body: failed in expowebpage in views.expo.py")
+        return HttpResponse(default_head +  html.decode() + '<h3>HTML Parsing failure:<br>Page could not be parsed into header and body:<br>failure detected in expowebpage in views.expo.py</h3> Please edit this <var>:expoweb:</var> page to be in the expected full  HTML format by clicking on \'Edit this Page\' in the left hand menu.</body' )
    m = re.search(rb"<title>(.*)</title>", head, re.DOTALL + re.IGNORECASE)
    if m:
        title, = m.groups()
@ -212,7 +239,8 @@ def editexpopage(request, path):
 #            if re.search(r"iso-8859-1", html):
 #                body = str(body, "iso-8859-1")
        else:
-            return HttpResponse("Page could not be split into header and body")
+            #return HttpResponse("Page could not be split into header and body")
            return HttpResponse(default_head +  html.decode() + '<h3>HTML Parsing failure:<br>Page could not be parsed into header and body:<br>failure detected in expowebpage in views.expo.py</h3> Please edit this <var>:expoweb:</var> page to be in the expected full  HTML format by clicking on \'Edit this Page\' in the left hand menu.</body' )
    except IOError:
        print("### File not found ### ", filepath)
        filefound = False
@ -242,7 +270,7 @@ def editexpopage(request, path):
            f = open(filepath, "w")
            f.write(result)
            f.close()
-            return HttpResponseRedirect(reverse('flatpage', args=[path])) # Redirect after POST
+            return HttpResponseRedirect(reverse('expopage', args=[path])) # Redirect after POST
    else:
        if filefound:
            m = re.search(r"<title>(.*)</title>", head, re.DOTALL + re.IGNORECASE)