From cdcb65c5a209a1a3be4a1b9e804f5a4292127509 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 13 Jun 2015 15:49:59 -0700 Subject: Fixed entity lookup table. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The old one had many errors. The new one is derived from the list in the npm entities package. Since the sequences can now be longer (multi-code-point), we have bumped the length limit from 4 to 8, which also affects houdini_html_u.c. An example of the kind of error that was fixed in given in jgm/commonmark.js#47: `≧̸` should be rendered as "≧̸" (U+02267 U+00338), but it's actually rendered as "≧" (which is the same as `≧`). --- src/houdini_html_u.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/houdini_html_u.c') diff --git a/src/houdini_html_u.c b/src/houdini_html_u.c index e57894d..2362b04 100644 --- a/src/houdini_html_u.c +++ b/src/houdini_html_u.c @@ -69,7 +69,7 @@ houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) if (entity != NULL) { bufsize_t len = 0; - while (len < 4 && entity->utf8[len] != '\0') { + while (len < 8 && entity->utf8[len] != '\0') { ++len; } cmark_strbuf_put(ob, entity->utf8, len); -- cgit v1.2.3