runtests.py: catch HTMLParser errors in normalizer.

author: John MacFarlane <jgm@berkeley.edu> 2014-11-20 08:22:20 -0800
committer: John MacFarlane <jgm@berkeley.edu> 2014-11-20 08:22:20 -0800
commit: c9875cbbbe293e6727a7a25b79e7ea4949ef5670 (patch)
tree: 23d9aaec1026d64d117c3dfd2acdeb5ff63a4085
parent: 698dab76847e5d671cce42a0c0ce2c98c5f07776 (diff)
1 files changed, 9 insertions, 5 deletions
diff --git a/runtests.py b/runtests.py
index b3c8d98..83c331d 100755
--- a/runtests.py
+++ b/runtests.py
@@ -7,7 +7,7 @@ import platform
 from difflib import unified_diff
 from subprocess import *
 import argparse
-from HTMLParser import HTMLParser
+from HTMLParser import HTMLParser, HTMLParseError
 from htmlentitydefs import name2codepoint
 import re
 import cgi
@@ -180,10 +180,14 @@ def normalize_html(html):
     * HTMLParser just swallows CDATA.
     * HTMLParser seems to treat unknown declarations as comments.
     """
-    parser = MyHTMLParser()
-    parser.feed(html.decode(encoding='UTF-8'))
-    parser.close()
-    return parser.output
+    try:
+        parser = MyHTMLParser()
+        parser.feed(html.decode(encoding='UTF-8'))
+        parser.close()
+        return parser.output
+    except HTMLParseError as e:
+        sys.stderr.write("Normalization error: " + e.msg + "\n")
+        return html  # on error, return unnormalized HTML
 
 def print_test_header(headertext, example_number, start_line, end_line):
     print "Example %d (lines %d-%d) %s" % (example_number,start_line,end_line,headertext)
author	John MacFarlane <jgm@berkeley.edu>	2014-11-20 08:22:20 -0800
committer	John MacFarlane <jgm@berkeley.edu>	2014-11-20 08:22:20 -0800
commit	c9875cbbbe293e6727a7a25b79e7ea4949ef5670 (patch)
tree	23d9aaec1026d64d117c3dfd2acdeb5ff63a4085
parent	698dab76847e5d671cce42a0c0ce2c98c5f07776 (diff)