From 1111672722f0805cec39076d640d9c1acd2da4c8 Mon Sep 17 00:00:00 2001
From: Nick Wellnhofer <wellnhofer@aevum.de>
Date: Thu, 16 Apr 2015 19:51:12 +0200
Subject: Pass-through Unicode non-characters

Despite their name, Unicode non-characters are valid code points. They
should be passed through by a library like libcmark.
---
 src/utf8.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'src/utf8.c')

diff --git a/src/utf8.c b/src/utf8.c
index d77c5d1..b83c2a5 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -172,8 +172,7 @@ int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst)
 	case 3:
 		uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) <<  6)
 		     + (str[2] & 0x3F);
-		if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000) ||
-		    (uc >= 0xFDD0 && uc < 0xFDF0)) uc = -1;
+		if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000)) uc = -1;
 		break;
 	case 4:
 		uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12)
@@ -182,7 +181,7 @@ int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst)
 		break;
 	}
 
-	if (uc < 0 || ((uc & 0xFFFF) >= 0xFFFE))
+	if (uc < 0)
 		return -1;
 
 	*dst = uc;
-- 
cgit v1.2.3