summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYuki Izumi <kivikakk@github.com>2016-11-05 09:04:48 +1100
committerJohn MacFarlane <jgm@berkeley.edu>2016-11-04 23:04:48 +0100
commit64e1394ae76409f02b00c254f119a64a2d1ce11e (patch)
tree4ad30178f0a166666dce869ea5efeb3537cbd61e
parent14fe768690b3948c7c1f67f463eb4620fc5746c9 (diff)
Fix for non-matching entities (#161)
* Add test to illustrate issue * Provide some test fixes * Don't neglect CounterClockwiseContourIntegral * Fix ~10% of cases not matching strncmp returns 0 if the first 'len' bytes of cmark_entities[i].entity match s; we check equal length in the first if by checking if cmark_entities[i].entity[len] == 0, but we neglect the case where cmp == 0 && cmark_entities[i].entity[len] != 0. This should be treated as the same as cmp < 0, because strcmp("abc", "abcd") < 0. * Don't depend on py3.3 in tests
-rw-r--r--src/entities.inc2
-rw-r--r--src/houdini_html_u.c2
-rwxr-xr-xtest/CMakeLists.txt4
-rw-r--r--test/entity_tests.py68
-rw-r--r--tools/make_entities_inc.py2
5 files changed, 75 insertions, 3 deletions
diff --git a/src/entities.inc b/src/entities.inc
index ec3d2a9..a7c36e2 100644
--- a/src/entities.inc
+++ b/src/entities.inc
@@ -6,7 +6,7 @@ struct cmark_entity_node {
};
#define CMARK_ENTITY_MIN_LENGTH 2
-#define CMARK_ENTITY_MAX_LENGTH 31
+#define CMARK_ENTITY_MAX_LENGTH 32
#define CMARK_NUM_ENTITIES 2125
static const struct cmark_entity_node cmark_entities[] = {
diff --git a/src/houdini_html_u.c b/src/houdini_html_u.c
index 6e8d620..30d08aa 100644
--- a/src/houdini_html_u.c
+++ b/src/houdini_html_u.c
@@ -16,7 +16,7 @@ static const unsigned char *S_lookup(int i, int low, int hi,
strncmp((const char *)s, (const char *)cmark_entities[i].entity, len);
if (cmp == 0 && cmark_entities[i].entity[len] == 0) {
return (const unsigned char *)cmark_entities[i].bytes;
- } else if (cmp < 0 && i > low) {
+ } else if (cmp <= 0 && i > low) {
j = i - ((i - low) / 2);
if (j == i)
j -= 1;
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 3b23cff..2a597ab 100755
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -60,6 +60,10 @@ IF (PYTHONINTERP_FOUND)
"${CMAKE_CURRENT_BINARY_DIR}/../src/cmark"
)
+ add_test(entity_executable
+ ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/entity_tests.py"
+ "--library-dir" "${CMAKE_CURRENT_BINARY_DIR}/../src"
+ )
ELSE(PYTHONINTERP_FOUND)
diff --git a/test/entity_tests.py b/test/entity_tests.py
new file mode 100644
index 0000000..0e3daad
--- /dev/null
+++ b/test/entity_tests.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import re
+import os
+import argparse
+import sys
+import platform
+import html
+from cmark import CMark
+
+def get_entities():
+ regex = r'^{\(unsigned char\*\)"([^"]+)", \{([^}]+)\}'
+ with open(os.path.join(os.path.dirname(__file__), '..', 'src', 'entities.inc')) as f:
+ code = f.read()
+ entities = []
+ for entity, utf8 in re.findall(regex, code, re.MULTILINE):
+ utf8 = bytes(map(int, utf8.split(", ")[:-1])).decode('utf-8')
+ entities.append((entity, utf8))
+ return entities
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description='Run cmark tests.')
+ parser.add_argument('--program', dest='program', nargs='?', default=None,
+ help='program to test')
+ parser.add_argument('--library-dir', dest='library_dir', nargs='?',
+ default=None, help='directory containing dynamic library')
+ args = parser.parse_args(sys.argv[1:])
+
+cmark = CMark(prog=args.program, library_dir=args.library_dir)
+
+entities = get_entities()
+
+passed = 0
+errored = 0
+failed = 0
+
+exceptions = {
+ 'quot': '&quot;',
+ 'QUOT': '&quot;',
+
+ # These are broken, but I'm not too worried about them.
+ 'nvlt': '&lt;⃒',
+ 'nvgt': '&gt;⃒',
+}
+
+print("Testing entities:")
+for entity, utf8 in entities:
+ [rc, actual, err] = cmark.to_html("&{};".format(entity))
+ check = exceptions.get(entity, utf8)
+
+ if rc != 0:
+ errored += 1
+ print(entity, '[ERRORED (return code {})]'.format(rc))
+ print(err)
+ elif check in actual:
+ print(entity, '[PASSED]')
+ passed += 1
+ else:
+ print(entity, '[FAILED]')
+ print(repr(actual))
+ failed += 1
+
+print("{} passed, {} failed, {} errored".format(passed, failed, errored))
+if failed == 0 and errored == 0:
+ exit(0)
+else:
+ exit(1)
diff --git a/tools/make_entities_inc.py b/tools/make_entities_inc.py
index 7ed307b..7b8ee41 100644
--- a/tools/make_entities_inc.py
+++ b/tools/make_entities_inc.py
@@ -20,7 +20,7 @@ struct cmark_entity_node {
};
#define CMARK_ENTITY_MIN_LENGTH 2
-#define CMARK_ENTITY_MAX_LENGTH 31""")
+#define CMARK_ENTITY_MAX_LENGTH 32""")
print("#define CMARK_NUM_ENTITIES " + str(len(entities)));