From 1111672722f0805cec39076d640d9c1acd2da4c8 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Thu, 16 Apr 2015 19:51:12 +0200 Subject: Pass-through Unicode non-characters Despite their name, Unicode non-characters are valid code points. They should be passed through by a library like libcmark. --- src/utf8.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/utf8.c b/src/utf8.c index d77c5d1..b83c2a5 100644 --- a/src/utf8.c +++ b/src/utf8.c @@ -172,8 +172,7 @@ int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst) case 3: uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) << 6) + (str[2] & 0x3F); - if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000) || - (uc >= 0xFDD0 && uc < 0xFDF0)) uc = -1; + if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000)) uc = -1; break; case 4: uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12) @@ -182,7 +181,7 @@ int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst) break; } - if (uc < 0 || ((uc & 0xFFFF) >= 0xFFFE)) + if (uc < 0) return -1; *dst = uc; -- cgit v1.2.3