diff options
author | Nick Wellnhofer <wellnhofer@aevum.de> | 2015-04-16 19:51:12 +0200 |
---|---|---|
committer | Nick Wellnhofer <wellnhofer@aevum.de> | 2015-04-16 20:03:47 +0200 |
commit | 1111672722f0805cec39076d640d9c1acd2da4c8 (patch) | |
tree | b1abad8547122dddf7d6665b392b7d968f27c1bb | |
parent | 520d5a6fc6a6cf100d9414d588079f2a076801c5 (diff) |
Pass-through Unicode non-characters
Despite their name, Unicode non-characters are valid code points. They
should be passed through by a library like libcmark.
-rw-r--r-- | src/utf8.c | 5 |
1 files changed, 2 insertions, 3 deletions
@@ -172,8 +172,7 @@ int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst) case 3: uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) << 6) + (str[2] & 0x3F); - if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000) || - (uc >= 0xFDD0 && uc < 0xFDF0)) uc = -1; + if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000)) uc = -1; break; case 4: uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12) @@ -182,7 +181,7 @@ int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst) break; } - if (uc < 0 || ((uc & 0xFFFF) >= 0xFFFE)) + if (uc < 0) return -1; *dst = uc; |