From 64e1394ae76409f02b00c254f119a64a2d1ce11e Mon Sep 17 00:00:00 2001 From: Yuki Izumi Date: Sat, 5 Nov 2016 09:04:48 +1100 Subject: Fix for non-matching entities (#161) * Add test to illustrate issue * Provide some test fixes * Don't neglect CounterClockwiseContourIntegral * Fix ~10% of cases not matching strncmp returns 0 if the first 'len' bytes of cmark_entities[i].entity match s; we check equal length in the first if by checking if cmark_entities[i].entity[len] == 0, but we neglect the case where cmp == 0 && cmark_entities[i].entity[len] != 0. This should be treated as the same as cmp < 0, because strcmp("abc", "abcd") < 0. * Don't depend on py3.3 in tests --- src/entities.inc | 2 +- src/houdini_html_u.c | 2 +- test/CMakeLists.txt | 4 +++ test/entity_tests.py | 68 ++++++++++++++++++++++++++++++++++++++++++++++ tools/make_entities_inc.py | 2 +- 5 files changed, 75 insertions(+), 3 deletions(-) create mode 100644 test/entity_tests.py diff --git a/src/entities.inc b/src/entities.inc index ec3d2a9..a7c36e2 100644 --- a/src/entities.inc +++ b/src/entities.inc @@ -6,7 +6,7 @@ struct cmark_entity_node { }; #define CMARK_ENTITY_MIN_LENGTH 2 -#define CMARK_ENTITY_MAX_LENGTH 31 +#define CMARK_ENTITY_MAX_LENGTH 32 #define CMARK_NUM_ENTITIES 2125 static const struct cmark_entity_node cmark_entities[] = { diff --git a/src/houdini_html_u.c b/src/houdini_html_u.c index 6e8d620..30d08aa 100644 --- a/src/houdini_html_u.c +++ b/src/houdini_html_u.c @@ -16,7 +16,7 @@ static const unsigned char *S_lookup(int i, int low, int hi, strncmp((const char *)s, (const char *)cmark_entities[i].entity, len); if (cmp == 0 && cmark_entities[i].entity[len] == 0) { return (const unsigned char *)cmark_entities[i].bytes; - } else if (cmp < 0 && i > low) { + } else if (cmp <= 0 && i > low) { j = i - ((i - low) / 2); if (j == i) j -= 1; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 3b23cff..2a597ab 100755 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -60,6 +60,10 @@ IF (PYTHONINTERP_FOUND) "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark" ) + add_test(entity_executable + ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/entity_tests.py" + "--library-dir" "${CMAKE_CURRENT_BINARY_DIR}/../src" + ) ELSE(PYTHONINTERP_FOUND) diff --git a/test/entity_tests.py b/test/entity_tests.py new file mode 100644 index 0000000..0e3daad --- /dev/null +++ b/test/entity_tests.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import re +import os +import argparse +import sys +import platform +import html +from cmark import CMark + +def get_entities(): + regex = r'^{\(unsigned char\*\)"([^"]+)", \{([^}]+)\}' + with open(os.path.join(os.path.dirname(__file__), '..', 'src', 'entities.inc')) as f: + code = f.read() + entities = [] + for entity, utf8 in re.findall(regex, code, re.MULTILINE): + utf8 = bytes(map(int, utf8.split(", ")[:-1])).decode('utf-8') + entities.append((entity, utf8)) + return entities + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Run cmark tests.') + parser.add_argument('--program', dest='program', nargs='?', default=None, + help='program to test') + parser.add_argument('--library-dir', dest='library_dir', nargs='?', + default=None, help='directory containing dynamic library') + args = parser.parse_args(sys.argv[1:]) + +cmark = CMark(prog=args.program, library_dir=args.library_dir) + +entities = get_entities() + +passed = 0 +errored = 0 +failed = 0 + +exceptions = { + 'quot': '"', + 'QUOT': '"', + + # These are broken, but I'm not too worried about them. + 'nvlt': '<⃒', + 'nvgt': '>⃒', +} + +print("Testing entities:") +for entity, utf8 in entities: + [rc, actual, err] = cmark.to_html("&{};".format(entity)) + check = exceptions.get(entity, utf8) + + if rc != 0: + errored += 1 + print(entity, '[ERRORED (return code {})]'.format(rc)) + print(err) + elif check in actual: + print(entity, '[PASSED]') + passed += 1 + else: + print(entity, '[FAILED]') + print(repr(actual)) + failed += 1 + +print("{} passed, {} failed, {} errored".format(passed, failed, errored)) +if failed == 0 and errored == 0: + exit(0) +else: + exit(1) diff --git a/tools/make_entities_inc.py b/tools/make_entities_inc.py index 7ed307b..7b8ee41 100644 --- a/tools/make_entities_inc.py +++ b/tools/make_entities_inc.py @@ -20,7 +20,7 @@ struct cmark_entity_node { }; #define CMARK_ENTITY_MIN_LENGTH 2 -#define CMARK_ENTITY_MAX_LENGTH 31""") +#define CMARK_ENTITY_MAX_LENGTH 32""") print("#define CMARK_NUM_ENTITIES " + str(len(entities))); -- cgit v1.2.3