summaryrefslogtreecommitdiff
path: root/src/utf8.c
diff options
context:
space:
mode:
authorVicent Marti <tanoku@gmail.com>2014-09-10 19:40:40 +0200
committerVicent Marti <tanoku@gmail.com>2014-09-10 19:40:40 +0200
commit79e7a4bbf7055e33b346564db769f03e85f98988 (patch)
treebc5879b6f8c27c83bb82c1b639d7ff825f190f56 /src/utf8.c
parentc47e3a34adac00a262f72c6d17a1c87deefa33c4 (diff)
Improve invalid UTF8 codepoint skipping
Diffstat (limited to 'src/utf8.c')
-rw-r--r--src/utf8.c10
1 files changed, 5 insertions, 5 deletions
diff --git a/src/utf8.c b/src/utf8.c
index 1b0224b..6b34831 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -79,14 +79,14 @@ void utf8proc_detab(strbuf *ob, const uint8_t *line, size_t size)
} else {
int charlen = utf8proc_charlen(line + i, size - i);
- if (charlen < 0) {
- encode_unknown(ob);
- i++;
- } else {
+ if (charlen >= 0) {
strbuf_put(ob, line + i, charlen);
- i += charlen;
+ } else {
+ encode_unknown(ob);
+ charlen = -charlen;
}
+ i += charlen;
tab += 1;
}
}