summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2014-11-20 08:22:20 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2014-11-20 08:22:20 -0800
commitc9875cbbbe293e6727a7a25b79e7ea4949ef5670 (patch)
tree23d9aaec1026d64d117c3dfd2acdeb5ff63a4085
parent698dab76847e5d671cce42a0c0ce2c98c5f07776 (diff)
runtests.py: catch HTMLParser errors in normalizer.
-rwxr-xr-xruntests.py14
1 files changed, 9 insertions, 5 deletions
diff --git a/runtests.py b/runtests.py
index b3c8d98..83c331d 100755
--- a/runtests.py
+++ b/runtests.py
@@ -7,7 +7,7 @@ import platform
from difflib import unified_diff
from subprocess import *
import argparse
-from HTMLParser import HTMLParser
+from HTMLParser import HTMLParser, HTMLParseError
from htmlentitydefs import name2codepoint
import re
import cgi
@@ -180,10 +180,14 @@ def normalize_html(html):
* HTMLParser just swallows CDATA.
* HTMLParser seems to treat unknown declarations as comments.
"""
- parser = MyHTMLParser()
- parser.feed(html.decode(encoding='UTF-8'))
- parser.close()
- return parser.output
+ try:
+ parser = MyHTMLParser()
+ parser.feed(html.decode(encoding='UTF-8'))
+ parser.close()
+ return parser.output
+ except HTMLParseError as e:
+ sys.stderr.write("Normalization error: " + e.msg + "\n")
+ return html # on error, return unnormalized HTML
def print_test_header(headertext, example_number, start_line, end_line):
print "Example %d (lines %d-%d) %s" % (example_number,start_line,end_line,headertext)