From e22d1b42ce7f860c51c261ea9d42c0b77245fde5 Mon Sep 17 00:00:00 2001 From: Yuki Izumi Date: Wed, 9 Aug 2017 17:13:16 +1000 Subject: Fix inlines spanning newlines, text in non-para --- api_test/main.c | 14 +++++----- src/blocks.c | 1 + src/inlines.c | 83 ++++++++++++++++++++++++++++++++++++++++++--------------- src/node.h | 1 + 4 files changed, 71 insertions(+), 28 deletions(-) diff --git a/api_test/main.c b/api_test/main.c index 08f3c98..c30dc71 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -885,7 +885,7 @@ static void test_feed_across_line_ending(test_batch_runner *runner) { static void source_pos(test_batch_runner *runner) { static const char markdown[] = - "Hi *there*.\n" + "# Hi *there*.\n" "\n" "Hello “ \n" "there `hi` -- [okay](www.google.com (ok)).\n" @@ -901,13 +901,13 @@ static void source_pos(test_batch_runner *runner) { STR_EQ(runner, xml, "\n" "\n" "\n" - " \n" - " Hi \n" - " \n" - " there\n" + " \n" + " Hi \n" + " \n" + " there\n" " \n" - " .\n" - " \n" + " .\n" + " \n" " \n" " Hello “ \n" " \n" diff --git a/src/blocks.c b/src/blocks.c index 5a293b2..acdbb34 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -900,6 +900,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, (*container)->as.heading.level = level; (*container)->as.heading.setext = false; + (*container)->internal_offset = matched; } else if (!indented && (matched = scan_open_code_fence( input, parser->first_nonspace))) { diff --git a/src/inlines.c b/src/inlines.c index 3f1b9ed..07f3709 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -239,6 +239,43 @@ static CMARK_INLINE cmark_chunk take_while(subject *subj, int (*f)(int)) { return cmark_chunk_dup(&subj->input, startpos, len); } +// Return the number of newlines in a given span of text in a subject. If +// the number is greater than zero, also return the number of characters +// between the last newline and the end of the span in `since_newline`. +static int count_newlines(subject *subj, bufsize_t from, bufsize_t len, int *since_newline) { + int nls = 0; + int since_nl = 0; + + while (len--) { + if (subj->input.data[from++] == '\n') { + ++nls; + since_nl = 0; + } else { + ++since_nl; + } + } + + if (!nls) + return 0; + + *since_newline = since_nl; + return nls; +} + +// Adjust `node`'s `end_line`, `end_column`, and `subj`'s `line` and +// `column_offset` according to the number of newlines in a just-matched span +// of text in `subj`. +static void adjust_subj_node_newlines(subject *subj, cmark_node *node, int matchlen, int extra) { + int since_newline; + int newlines = count_newlines(subj, subj->pos - matchlen - extra, matchlen, &since_newline); + if (newlines) { + subj->line += newlines; + node->end_line += newlines; + node->end_column = since_newline; + subj->column_offset = -subj->pos + since_newline + extra; + } +} + // Try to process a backtick code span that began with a // span of ticks of length openticklength length (already // parsed). Return 0 if you don't find matching closing @@ -302,7 +339,9 @@ static cmark_node *handle_backticks(subject *subj) { cmark_strbuf_trim(&buf); cmark_strbuf_normalize_whitespace(&buf); - return make_code(subj, startpos, endpos - openticks.len - 1, cmark_chunk_buf_detach(&buf)); + cmark_node *node = make_code(subj, startpos, endpos - openticks.len - 1, cmark_chunk_buf_detach(&buf)); + adjust_subj_node_newlines(subj, node, endpos - startpos, openticks.len); + return node; } } @@ -727,7 +766,7 @@ cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url) { return result; } - houdini_unescape_html_f(&buf, url->data, url->len); + houdini_unescape_html_f(&buf, url->data, url->len); cmark_strbuf_unescape(&buf); return cmark_chunk_buf_detach(&buf); @@ -788,7 +827,9 @@ static cmark_node *handle_pointy_brace(subject *subj) { if (matchlen > 0) { contents = cmark_chunk_dup(&subj->input, subj->pos - 1, matchlen + 1); subj->pos += matchlen; - return make_raw_html(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents); + cmark_node *node = make_raw_html(subj, subj->pos - matchlen - 1, subj->pos - 1, contents); + adjust_subj_node_newlines(subj, node, matchlen, 1); + return node; } // if nothing matches, just return the opening <: @@ -846,24 +887,24 @@ static bufsize_t manual_scan_link_url_2(cmark_chunk *input, bufsize_t offset, bufsize_t i = offset; size_t nb_p = 0; - while (i < input->len) { - if (input->data[i] == '\\' && - i + 1 < input-> len && - cmark_ispunct(input->data[i+1])) - i += 2; - else if (input->data[i] == '(') { - ++nb_p; - ++i; - } else if (input->data[i] == ')') { - if (nb_p == 0) + while (i < input->len) { + if (input->data[i] == '\\' && + i + 1 < input-> len && + cmark_ispunct(input->data[i+1])) + i += 2; + else if (input->data[i] == '(') { + ++nb_p; + ++i; + } else if (input->data[i] == ')') { + if (nb_p == 0) + break; + --nb_p; + ++i; + } else if (cmark_isspace(input->data[i])) break; - --nb_p; - ++i; - } else if (cmark_isspace(input->data[i])) - break; - else - ++i; - } + else + ++i; + } if (i >= input->len) return -1; @@ -1203,7 +1244,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) { extern void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent, cmark_reference_map *refmap, int options) { subject subj; - subject_from_buf(mem, parent->start_line, parent->start_column - 1, &subj, &parent->content, refmap); + subject_from_buf(mem, parent->start_line, parent->start_column - 1 + parent->internal_offset, &subj, &parent->content, refmap); cmark_chunk_rtrim(&subj.input); while (!is_eof(&subj) && parse_inline(&subj, parent, options)) diff --git a/src/node.h b/src/node.h index 65d857f..13901ba 100644 --- a/src/node.h +++ b/src/node.h @@ -66,6 +66,7 @@ struct cmark_node { int start_column; int end_line; int end_column; + int internal_offset; uint16_t type; uint16_t flags; -- cgit v1.2.3