Don't raise exception on invalid UTF-8 output

This could be reproduced with: ./test/spec_tests.py --program kramdown --pattern Enti with Kramdown version 1.5.0. The main problem is that it would stop future tests from being carried out. After this commit it is just counted as a failure. The error message printed is of the form: Unicode error: 'utf8' codec can't decode byte 0xfc in position 8: invalid start byte '<p># \xd3\x92 \xcf\xa0 \xef\xbf\xbd</p>\n' '<p># \xd3\x92 \xcf\xa0 \xfc\x85\xb8\xb0\xa9\xb8</p>\n'
author: Ciro Santilli <ciro.santilli@gmail.com> 2014-12-21 14:33:31 +0100
committer: Ciro Santilli <ciro.santilli@gmail.com> 2014-12-21 14:37:50 +0100
commit: 46633b1f00204a310e630009f0420218186439e2 (patch)
tree: 5edb31cf540ab825d96601b46c939d1b6b0ca1ef /test/spec_tests.py
parent: b28c97c9b8af266d4f12deb5febcf28807d9f5c6 (diff)
1 files changed, 16 insertions, 6 deletions
diff --git a/test/spec_tests.py b/test/spec_tests.py
index 23ae502..99ac0dd 100755
--- a/test/spec_tests.py
+++ b/test/spec_tests.py
@@ -37,8 +37,13 @@ def do_test(test, normalize):
     [retcode, actual_html, err] = cmark.to_html(test['markdown'])
     if retcode == 0:
         expected_html = test['html']
+        unicode_error = None
         if normalize:
-            passed = normalize_html(actual_html) == normalize_html(expected_html)
+            try:
+                passed = normalize_html(actual_html) == normalize_html(expected_html)
+            except UnicodeDecodeError, e:
+                unicode_error = e
+                passed = False
         else:
             passed = actual_html == expected_html
         if passed:
@@ -46,11 +51,16 @@ def do_test(test, normalize):
         else:
             print_test_header(test['section'], test['example'], test['start_line'], test['end_line'])
             sys.stdout.write(test['markdown'])
-            expected_html_lines = expected_html.splitlines(True)
-            actual_html_lines = actual_html.splitlines(True)
-            for diffline in unified_diff(expected_html_lines, actual_html_lines,
-                            "expected HTML", "actual HTML"):
-                sys.stdout.write(diffline)
+            if unicode_error:
+                print "Unicode error: " + str(unicode_error)
+                print repr(expected_html)
+                print repr(actual_html)
+            else:
+                expected_html_lines = expected_html.splitlines(True)
+                actual_html_lines = actual_html.splitlines(True)
+                for diffline in unified_diff(expected_html_lines, actual_html_lines,
+                                "expected HTML", "actual HTML"):
+                    sys.stdout.write(diffline)
             sys.stdout.write('\n')
             return 'fail'
     else:
author	Ciro Santilli <ciro.santilli@gmail.com>	2014-12-21 14:33:31 +0100
committer	Ciro Santilli <ciro.santilli@gmail.com>	2014-12-21 14:37:50 +0100
commit	46633b1f00204a310e630009f0420218186439e2 (patch)
tree	5edb31cf540ab825d96601b46c939d1b6b0ca1ef /test/spec_tests.py
parent	b28c97c9b8af266d4f12deb5febcf28807d9f5c6 (diff)