diff options
-rw-r--r-- | src/blocks.c | 2 | ||||
-rw-r--r-- | src/utf8.c | 31 | ||||
-rw-r--r-- | src/utf8.h | 2 |
3 files changed, 12 insertions, 23 deletions
diff --git a/src/blocks.c b/src/blocks.c index 06f6dcb..08f2e63 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -619,7 +619,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t byte cmark_chunk input; bool maybe_lazy; - cmark_strbuf_put(parser->curline, buffer, bytes); + utf8proc_check(parser->curline, buffer, bytes); parser->offset = 0; parser->column = 0; parser->blank = false; @@ -116,53 +116,42 @@ static int utf8proc_valid(const uint8_t *str, bufsize_t str_len) return length; } -void utf8proc_detab(cmark_strbuf *ob, const uint8_t *line, bufsize_t size) +void utf8proc_check(cmark_strbuf *ob, const uint8_t *line, bufsize_t size) { - static const uint8_t whitespace[] = " "; - - bufsize_t i = 0, tab = 0; + bufsize_t i = 0; while (i < size) { bufsize_t org = i; int charlen = 0; - while (i < size && line[i] != '\t') { - if (line[i] >= 0x80) { + while (i < size) { + if (line[i] < 0x80 && line[i] != 0) { + i++; + } else if (line[i] >= 0x80) { charlen = utf8proc_valid(line + i, size - i); if (charlen < 0) { charlen = -charlen; break; } i += charlen; - } else if (line[i] == '\0') { + } else if (line[i] == 0) { // ASCII NUL is technically valid but rejected // for security reasons. charlen = 1; break; - } else { - i++; } - - tab++; } - if (i > org) + if (i > org) { cmark_strbuf_put(ob, line + org, i - org); + } - if (i >= size) + if (i >= size) { break; - - if (line[i] == '\t') { - int numspaces = 4 - (tab % 4); - cmark_strbuf_put(ob, whitespace, numspaces); - i += 1; - tab += numspaces; } else { // Invalid UTF-8 encode_unknown(ob); - i += charlen; - tab += 1; } } } @@ -11,7 +11,7 @@ extern "C" { void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, bufsize_t len); void utf8proc_encode_char(int32_t uc, cmark_strbuf *buf); int utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst); -void utf8proc_detab(cmark_strbuf *dest, const uint8_t *line, bufsize_t size); +void utf8proc_check(cmark_strbuf *dest, const uint8_t *line, bufsize_t size); int utf8proc_is_space(int32_t uc); int utf8proc_is_punctuation(int32_t uc); |