From 5a3241c1cec67bbdee20c18b95c5fc0695df5edf Mon Sep 17 00:00:00 2001 From: Ben Trask Date: Fri, 20 Mar 2015 20:30:20 -0400 Subject: Support for CRLF and CR line endings. --- src/blocks.c | 95 +++++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 71 insertions(+), 24 deletions(-) (limited to 'src/blocks.c') diff --git a/src/blocks.c b/src/blocks.c index a15f819..0222c0f 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -89,6 +89,10 @@ static bool is_blank(cmark_strbuf *s, int offset) { while (offset < s->size) { switch (s->ptr[offset]) { + case '\r': + if (s->ptr[offset + 1] == '\n') + offset++; + return true; case '\n': return true; case ' ': @@ -126,9 +130,10 @@ static void add_line(cmark_node* node, cmark_chunk *ch, int offset) static void remove_trailing_blank_lines(cmark_strbuf *ln) { int i; + unsigned char c; for (i = ln->size - 1; i >= 0; --i) { - unsigned char c = ln->ptr[i]; + c = ln->ptr[i]; if (c != ' ' && c != '\t' && c != '\r' && c != '\n') break; @@ -139,9 +144,20 @@ static void remove_trailing_blank_lines(cmark_strbuf *ln) return; } - i = cmark_strbuf_strchr(ln, '\n', i); - if (i >= 0) + + for(i = 0; i < ln->size; ++i) { + c = ln->ptr[i]; + + if (c != '\r' && c != '\n') + continue; + + // Don't cut a CRLF in half + if (c == '\r' && i+1 < ln->size && ln->ptr[i+1] == '\n') + ++i; + cmark_strbuf_truncate(ln, i); + break; + } } // Check to see if a node ends with a blank line, descending @@ -185,7 +201,6 @@ static int break_out_of_lists(cmark_parser *parser, cmark_node ** bptr) static cmark_node* finalize(cmark_parser *parser, cmark_node* b) { - int firstlinelen; int pos; cmark_node* item; cmark_node* subitem; @@ -204,9 +219,11 @@ finalize(cmark_parser *parser, cmark_node* b) (b->type == NODE_CODE_BLOCK && b->as.code.fenced) || (b->type == NODE_HEADER && b->as.header.setext)) { b->end_line = parser->line_number; - b->end_column = parser->curline->size - - (parser->curline->ptr[parser->curline->size - 1] == '\n' ? - 1 : 0); + b->end_column = parser->curline->size; + if (b->end_column && parser->curline->ptr[b->end_column-1] == '\n') + b->end_column -= 1; + if (b->end_column && parser->curline->ptr[b->end_column-1] == '\r') + b->end_column -= 1; } else { b->end_line = parser->line_number - 1; b->end_column = parser->last_line_length; @@ -232,19 +249,28 @@ finalize(cmark_parser *parser, cmark_node* b) } else { // first line of contents becomes info - firstlinelen = cmark_strbuf_strchr(&b->string_content, '\n', 0); + for (pos = 0; pos < b->string_content.size; ++pos) { + if (b->string_content.ptr[pos] == '\r' || + b->string_content.ptr[pos] == '\n') + break; + } + assert(pos < b->string_content.size); cmark_strbuf tmp = GH_BUF_INIT; houdini_unescape_html_f( &tmp, b->string_content.ptr, - firstlinelen + pos ); cmark_strbuf_trim(&tmp); cmark_strbuf_unescape(&tmp); b->as.code.info = cmark_chunk_buf_detach(&tmp); - cmark_strbuf_drop(&b->string_content, firstlinelen + 1); + if (b->string_content.ptr[pos] == '\r') + pos += 1; + if (b->string_content.ptr[pos] == '\n') + pos += 1; + cmark_strbuf_drop(&b->string_content, pos); } b->as.code.literal = cmark_chunk_buf_detach(&b->string_content); break; @@ -467,13 +493,22 @@ S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len, const unsigned char *end = buffer + len; while (buffer < end) { - const unsigned char *eol - = (const unsigned char *)memchr(buffer, '\n', - end - buffer); + const unsigned char *eol; size_t line_len; + for (eol = buffer; eol < end; ++eol) { + if (*eol == '\r' || *eol == '\n') + break; + } + if (eol >= end) + eol = NULL; + if (eol) { - line_len = eol + 1 - buffer; + if (eol < end && *eol == '\r') + eol++; + if (eol < end && *eol == '\n') + eol++; + line_len = eol - buffer; } else if (eof) { line_len = end - buffer; } else { @@ -533,6 +568,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) // Add a newline to the end if not present: // TODO this breaks abstraction: + // Note: we assume output is LF-only if (parser->curline->ptr[parser->curline->size - 1] != '\n') { cmark_strbuf_putc(parser->curline, '\n'); } @@ -556,7 +592,8 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) } indent = first_nonspace - offset; - blank = peek_at(&input, first_nonspace) == '\n'; + blank = peek_at(&input, first_nonspace) == '\n' || + peek_at(&input, first_nonspace) == '\r'; if (container->type == NODE_BLOCK_QUOTE) { matched = indent <= 3 && peek_at(&input, first_nonspace) == '>'; @@ -657,7 +694,8 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) first_nonspace++; indent = first_nonspace - offset; - blank = peek_at(&input, first_nonspace) == '\n'; + blank = peek_at(&input, first_nonspace) == '\n' || + peek_at(&input, first_nonspace) == '\r'; if (indent >= CODE_INDENT) { if (!maybe_lazy && !blank) { @@ -713,8 +751,10 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) } else if (container->type == NODE_PARAGRAPH && (lev = scan_setext_header_line(&input, first_nonspace)) && // check that there is only one line in the paragraph: - cmark_strbuf_strrchr(&container->string_content, '\n', - cmark_strbuf_len(&container->string_content) - 2) < 0) { + (cmark_strbuf_strrchr(&container->string_content, '\n', + cmark_strbuf_len(&container->string_content) - 2) < 0 && + cmark_strbuf_strrchr(&container->string_content, '\r', + cmark_strbuf_len(&container->string_content) - 2) < 0)) { container->type = NODE_HEADER; container->as.header.level = lev; @@ -738,7 +778,9 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) i++; } // i = number of spaces after marker, up to 5 - if (i >= 5 || i < 1 || peek_at(&input, offset) == '\n') { + if (i >= 5 || i < 1 || + peek_at(&input, offset) == '\n' || + peek_at(&input, offset) == '\r') { data->padding = matched + 1; if (i > 0) { offset += 1; @@ -786,7 +828,8 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) first_nonspace++; indent = first_nonspace - offset; - blank = peek_at(&input, first_nonspace) == '\n'; + blank = peek_at(&input, first_nonspace) == '\n' || + peek_at(&input, first_nonspace) == '\r'; if (blank && container->last_child) { container->last_child->last_line_blank = true; @@ -854,10 +897,14 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) parser->current = container; } finished: - parser->last_line_length = parser->curline->size - - (parser->curline->ptr[parser->curline->size - 1] == '\n' ? - 1 : 0); - ; + parser->last_line_length = parser->curline->size; + if (parser->last_line_length && + parser->curline->ptr[parser->last_line_length-1] == '\n') + parser->last_line_length -= 1; + if (parser->last_line_length && + parser->curline->ptr[parser->last_line_length-1] == '\r') + parser->last_line_length -= 1; + cmark_strbuf_clear(parser->curline); } -- cgit v1.2.3 From fbb0836feb25f3f5f3e3373c7c20eab62f30e036 Mon Sep 17 00:00:00 2001 From: Ben Trask Date: Fri, 20 Mar 2015 21:12:10 -0400 Subject: Fix regression in remove_trailing_blank_lines(). --- src/blocks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/blocks.c') diff --git a/src/blocks.c b/src/blocks.c index 0222c0f..338d4e9 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -145,7 +145,7 @@ static void remove_trailing_blank_lines(cmark_strbuf *ln) } - for(i = 0; i < ln->size; ++i) { + for(; i < ln->size; ++i) { c = ln->ptr[i]; if (c != '\r' && c != '\n') -- cgit v1.2.3 From b8aa64967e328d2d9fa0bc1f21c0970ec32d259f Mon Sep 17 00:00:00 2001 From: Ben Trask Date: Tue, 7 Apr 2015 05:27:45 -0400 Subject: Bug fixes for CRLF support. --- src/blocks.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'src/blocks.c') diff --git a/src/blocks.c b/src/blocks.c index 338d4e9..06521d1 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -90,9 +90,6 @@ static bool is_blank(cmark_strbuf *s, int offset) while (offset < s->size) { switch (s->ptr[offset]) { case '\r': - if (s->ptr[offset + 1] == '\n') - offset++; - return true; case '\n': return true; case ' ': @@ -151,10 +148,6 @@ static void remove_trailing_blank_lines(cmark_strbuf *ln) if (c != '\r' && c != '\n') continue; - // Don't cut a CRLF in half - if (c == '\r' && i+1 < ln->size && ln->ptr[i+1] == '\n') - ++i; - cmark_strbuf_truncate(ln, i); break; } @@ -568,10 +561,13 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) // Add a newline to the end if not present: // TODO this breaks abstraction: - // Note: we assume output is LF-only - if (parser->curline->ptr[parser->curline->size - 1] != '\n') { - cmark_strbuf_putc(parser->curline, '\n'); + if (parser->curline->ptr[parser->curline->size - 1] == '\n') { + cmark_strbuf_truncate(parser->curline, parser->curline->size - 1); + } + if (parser->curline->ptr[parser->curline->size - 1] == '\r') { + cmark_strbuf_truncate(parser->curline, parser->curline->size - 1); } + cmark_strbuf_putc(parser->curline, '\n'); input.data = parser->curline->ptr; input.len = parser->curline->size; -- cgit v1.2.3 From 271d8a16e0fa62aa8d6eace9b1931cc4beb27e06 Mon Sep 17 00:00:00 2001 From: Ben Trask Date: Tue, 7 Apr 2015 05:32:01 -0400 Subject: Try to match existing style better. --- src/blocks.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'src/blocks.c') diff --git a/src/blocks.c b/src/blocks.c index 06521d1..4acd899 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -213,10 +213,10 @@ finalize(cmark_parser *parser, cmark_node* b) (b->type == NODE_HEADER && b->as.header.setext)) { b->end_line = parser->line_number; b->end_column = parser->curline->size; - if (b->end_column && parser->curline->ptr[b->end_column-1] == '\n') - b->end_column -= 1; - if (b->end_column && parser->curline->ptr[b->end_column-1] == '\r') - b->end_column -= 1; + if (b->end_column && parser->curline->ptr[b->end_column - 1] == '\n') + b->end_column--; + if (b->end_column && parser->curline->ptr[b->end_column - 1] == '\r') + b->end_column--; } else { b->end_line = parser->line_number - 1; b->end_column = parser->last_line_length; @@ -895,11 +895,11 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) finished: parser->last_line_length = parser->curline->size; if (parser->last_line_length && - parser->curline->ptr[parser->last_line_length-1] == '\n') - parser->last_line_length -= 1; + parser->curline->ptr[parser->last_line_length - 1] == '\n') + parser->last_line_length--; if (parser->last_line_length && - parser->curline->ptr[parser->last_line_length-1] == '\r') - parser->last_line_length -= 1; + parser->curline->ptr[parser->last_line_length - 1] == '\r') + parser->last_line_length--; cmark_strbuf_clear(parser->curline); -- cgit v1.2.3 From 60d8ded6a6623ddceec76ea348879638b599ee1f Mon Sep 17 00:00:00 2001 From: Ben Trask Date: Tue, 7 Apr 2015 09:41:05 -0400 Subject: Check length before reading. --- src/blocks.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/blocks.c') diff --git a/src/blocks.c b/src/blocks.c index 4acd899..777356a 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -561,10 +561,10 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) // Add a newline to the end if not present: // TODO this breaks abstraction: - if (parser->curline->ptr[parser->curline->size - 1] == '\n') { + if (parser->curline->size && parser->curline->ptr[parser->curline->size - 1] == '\n') { cmark_strbuf_truncate(parser->curline, parser->curline->size - 1); } - if (parser->curline->ptr[parser->curline->size - 1] == '\r') { + if (parser->curline->size && parser->curline->ptr[parser->curline->size - 1] == '\r') { cmark_strbuf_truncate(parser->curline, parser->curline->size - 1); } cmark_strbuf_putc(parser->curline, '\n'); -- cgit v1.2.3