summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVicent Marti <tanoku@gmail.com>2014-09-10 19:39:03 +0200
committerVicent Marti <tanoku@gmail.com>2014-09-10 19:39:03 +0200
commitc47e3a34adac00a262f72c6d17a1c87deefa33c4 (patch)
tree050271490a6d3c22e643fac0392193cd9ff19210
parentc04e1e7aef06ce0836984b17e48a1d09bb83ce04 (diff)
Fix infinite loop when case folding invalid UTF8 chars
-rw-r--r--src/utf8.c24
-rw-r--r--src/utf8.h4
2 files changed, 14 insertions, 14 deletions
diff --git a/src/utf8.c b/src/utf8.c
index c65aec6..1b0224b 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -29,9 +29,9 @@ static void encode_unknown(strbuf *buf)
strbuf_put(buf, repl, 3);
}
-ssize_t utf8proc_charlen(const uint8_t *str, ssize_t str_len)
+int utf8proc_charlen(const uint8_t *str, int str_len)
{
- ssize_t length, i;
+ int length, i;
if (!str_len)
return 0;
@@ -42,11 +42,11 @@ ssize_t utf8proc_charlen(const uint8_t *str, ssize_t str_len)
return -1;
if (str_len >= 0 && length > str_len)
- return -1;
+ return -str_len;
for (i = 1; i < length; i++) {
if ((str[i] & 0xC0) != 0x80)
- return -1;
+ return -i;
}
return length;
@@ -77,7 +77,7 @@ void utf8proc_detab(strbuf *ob, const uint8_t *line, size_t size)
i += 1;
tab += numspaces;
} else {
- ssize_t charlen = utf8proc_charlen(line + i, size - i);
+ int charlen = utf8proc_charlen(line + i, size - i);
if (charlen < 0) {
encode_unknown(ob);
@@ -92,9 +92,9 @@ void utf8proc_detab(strbuf *ob, const uint8_t *line, size_t size)
}
}
-ssize_t utf8proc_iterate(const uint8_t *str, ssize_t str_len, int32_t *dst)
+int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst)
{
- ssize_t length;
+ int length;
int32_t uc = -1;
*dst = -1;
@@ -177,15 +177,15 @@ void utf8proc_case_fold(strbuf *dest, const uint8_t *str, int len)
utf8proc_encode_char(x, dest)
while (len > 0) {
- ssize_t char_len = utf8proc_iterate(str, len, &c);
+ int char_len = utf8proc_iterate(str, len, &c);
- if (char_len < 0) {
+ if (char_len >= 0) {
+#include "case_fold_switch.inc"
+ } else {
encode_unknown(dest);
- continue;
+ char_len = -char_len;
}
-#include "case_fold_switch.inc"
-
str += char_len;
len -= char_len;
}
diff --git a/src/utf8.h b/src/utf8.h
index 9506b75..c971250 100644
--- a/src/utf8.h
+++ b/src/utf8.h
@@ -6,8 +6,8 @@
void utf8proc_case_fold(strbuf *dest, const uint8_t *str, int len);
void utf8proc_encode_char(int32_t uc, strbuf *buf);
-ssize_t utf8proc_iterate(const uint8_t *str, ssize_t str_len, int32_t *dst);
-ssize_t utf8proc_charlen(const uint8_t *str, ssize_t str_len);
+int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst);
+int utf8proc_charlen(const uint8_t *str, int str_len);
void utf8proc_detab(strbuf *dest, const uint8_t *line, size_t size);
#endif