summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Wellnhofer <wellnhofer@aevum.de>2015-04-16 19:51:12 +0200
committerNick Wellnhofer <wellnhofer@aevum.de>2015-04-16 20:03:47 +0200
commit1111672722f0805cec39076d640d9c1acd2da4c8 (patch)
treeb1abad8547122dddf7d6665b392b7d968f27c1bb
parent520d5a6fc6a6cf100d9414d588079f2a076801c5 (diff)
Pass-through Unicode non-characters
Despite their name, Unicode non-characters are valid code points. They should be passed through by a library like libcmark.
-rw-r--r--src/utf8.c5
1 files changed, 2 insertions, 3 deletions
diff --git a/src/utf8.c b/src/utf8.c
index d77c5d1..b83c2a5 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -172,8 +172,7 @@ int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst)
case 3:
uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) << 6)
+ (str[2] & 0x3F);
- if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000) ||
- (uc >= 0xFDD0 && uc < 0xFDF0)) uc = -1;
+ if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000)) uc = -1;
break;
case 4:
uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12)
@@ -182,7 +181,7 @@ int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst)
break;
}
- if (uc < 0 || ((uc & 0xFFFF) >= 0xFFFE))
+ if (uc < 0)
return -1;
*dst = uc;