summaryrefslogtreecommitdiff
path: root/src/utf8.c
diff options
context:
space:
mode:
authorNick Wellnhofer <wellnhofer@aevum.de>2015-06-09 18:26:04 +0200
committerNick Wellnhofer <wellnhofer@aevum.de>2015-06-09 18:32:54 +0200
commit8d997c85ee1452480ed3d821ce0642f7e6e5b9e6 (patch)
treee46c0ec079ff6e62c35bd7de544f4a794caeaa27 /src/utf8.c
parent8ac509f8bf0fe9f9f0b277cb612f9deb5bd072a5 (diff)
Roll utf8proc_charlen into utf8proc_valid
Speeds up "make bench" by another percent.
Diffstat (limited to 'src/utf8.c')
-rw-r--r--src/utf8.c21
1 files changed, 18 insertions, 3 deletions
diff --git a/src/utf8.c b/src/utf8.c
index 248a199..a4449dd 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -56,13 +56,18 @@ static int utf8proc_charlen(const uint8_t *str, bufsize_t str_len)
// Validate a single UTF-8 character according to RFC 3629.
static int utf8proc_valid(const uint8_t *str, bufsize_t str_len)
{
- int length = utf8proc_charlen(str, str_len);
+ int length = utf8proc_utf8class[str[0]];
- if (length <= 0)
- return length;
+ if (!length)
+ return -1;
+
+ if ((bufsize_t)length > str_len)
+ return -str_len;
switch (length) {
case 2:
+ if ((str[1] & 0xC0) != 0x80)
+ return -1;
if (str[0] < 0xC2) {
// Overlong
return -length;
@@ -70,6 +75,10 @@ static int utf8proc_valid(const uint8_t *str, bufsize_t str_len)
break;
case 3:
+ if ((str[1] & 0xC0) != 0x80)
+ return -1;
+ if ((str[2] & 0xC0) != 0x80)
+ return -2;
if (str[0] == 0xE0) {
if (str[1] < 0xA0) {
// Overlong
@@ -84,6 +93,12 @@ static int utf8proc_valid(const uint8_t *str, bufsize_t str_len)
break;
case 4:
+ if ((str[1] & 0xC0) != 0x80)
+ return -1;
+ if ((str[2] & 0xC0) != 0x80)
+ return -2;
+ if ((str[3] & 0xC0) != 0x80)
+ return -3;
if (str[0] == 0xF0) {
if (str[1] < 0x90) {
// Overlong