From 33d0a4bc9b70b53cc7e107b77f582233b36ca1b2 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 12 Jun 2015 13:38:24 -0700 Subject: Removed utf8 validation. We now replace null characters in the line splitting code. --- src/blocks.c | 63 ++++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 27 deletions(-) diff --git a/src/blocks.c b/src/blocks.c index 08f2e63..4de5924 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -497,44 +497,53 @@ S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len, bool eof) { const unsigned char *end = buffer + len; + static const uint8_t repl[] = {239, 191, 189}; while (buffer < end) { const unsigned char *eol; - size_t line_len; - bufsize_t bufsize; + bufsize_t chunk_len; + bool process = false; for (eol = buffer; eol < end; ++eol) { - if (S_is_line_end_char(*eol)) + if (S_is_line_end_char(*eol)) { + if (eol < end && *eol == '\r') + eol++; + if (eol < end && *eol == '\n') + eol++; + process = true; break; + } + if (*eol == '\0' && eol < end - 1) { + break; + } } - if (eol >= end) - eol = NULL; - - if (eol) { - if (eol < end && *eol == '\r') - eol++; - if (eol < end && *eol == '\n') - eol++; - line_len = eol - buffer; - } else if (eof) { - line_len = end - buffer; - } else { - bufsize = cmark_strbuf_check_bufsize(end - buffer); - cmark_strbuf_put(parser->linebuf, buffer, bufsize); - break; + if (eol >= end && eof) { + process = true; } - bufsize = cmark_strbuf_check_bufsize(line_len); - if (parser->linebuf->size > 0) { - cmark_strbuf_put(parser->linebuf, buffer, bufsize); - S_process_line(parser, parser->linebuf->ptr, - parser->linebuf->size); - cmark_strbuf_clear(parser->linebuf); + chunk_len = cmark_strbuf_check_bufsize(eol - buffer); + if (process) { + if (parser->linebuf->size > 0) { + cmark_strbuf_put(parser->linebuf, buffer, chunk_len); + S_process_line(parser, parser->linebuf->ptr, + parser->linebuf->size); + cmark_strbuf_clear(parser->linebuf); + } else { + S_process_line(parser, buffer, chunk_len); + } } else { - S_process_line(parser, buffer, bufsize); + if (eol < end && *eol == '\0') { + // omit NULL byte + cmark_strbuf_put(parser->linebuf, buffer, chunk_len); + // add replacement character + cmark_strbuf_put(parser->linebuf, repl, 3); + chunk_len += 1; // so we advance the buffer past NULL + } else { + cmark_strbuf_put(parser->linebuf, buffer, chunk_len); + } } - buffer += line_len; + buffer += chunk_len; } } @@ -619,7 +628,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t byte cmark_chunk input; bool maybe_lazy; - utf8proc_check(parser->curline, buffer, bytes); + cmark_strbuf_put(parser->curline, buffer, bytes); parser->offset = 0; parser->column = 0; parser->blank = false; -- cgit v1.2.3