diff options
author | Yuki Izumi <ashe@kivikakk.ee> | 2017-08-09 17:13:16 +1000 |
---|---|---|
committer | Yuki Izumi <ashe@kivikakk.ee> | 2017-08-10 13:29:40 +1000 |
commit | e22d1b42ce7f860c51c261ea9d42c0b77245fde5 (patch) | |
tree | d0cd5bee6b826fb6b09db44beddb48a96fff023e | |
parent | 919cdb2c583163411b3b15b2eae0ce72cf2d7981 (diff) |
Fix inlines spanning newlines, text in non-para
-rw-r--r-- | api_test/main.c | 14 | ||||
-rw-r--r-- | src/blocks.c | 1 | ||||
-rw-r--r-- | src/inlines.c | 83 | ||||
-rw-r--r-- | src/node.h | 1 |
4 files changed, 71 insertions, 28 deletions
diff --git a/api_test/main.c b/api_test/main.c index 08f3c98..c30dc71 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -885,7 +885,7 @@ static void test_feed_across_line_ending(test_batch_runner *runner) { static void source_pos(test_batch_runner *runner) { static const char markdown[] = - "Hi *there*.\n" + "# Hi *there*.\n" "\n" "Hello “ <http://www.google.com>\n" "there `hi` -- [okay](www.google.com (ok)).\n" @@ -901,13 +901,13 @@ static void source_pos(test_batch_runner *runner) { STR_EQ(runner, xml, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" "<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n" "<document sourcepos=\"1:1-10:20\" xmlns=\"http://commonmark.org/xml/1.0\">\n" - " <paragraph sourcepos=\"1:1-1:11\">\n" - " <text sourcepos=\"1:1-1:3\">Hi </text>\n" - " <emph sourcepos=\"1:4-1:10\">\n" - " <text sourcepos=\"1:5-1:9\">there</text>\n" + " <heading sourcepos=\"1:1-1:13\" level=\"1\">\n" + " <text sourcepos=\"1:3-1:5\">Hi </text>\n" + " <emph sourcepos=\"1:6-1:12\">\n" + " <text sourcepos=\"1:7-1:11\">there</text>\n" " </emph>\n" - " <text sourcepos=\"1:11-1:11\">.</text>\n" - " </paragraph>\n" + " <text sourcepos=\"1:13-1:13\">.</text>\n" + " </heading>\n" " <paragraph sourcepos=\"3:1-4:42\">\n" " <text sourcepos=\"3:1-3:14\">Hello “ </text>\n" " <link sourcepos=\"3:15-3:37\" destination=\"http://www.google.com\" title=\"\">\n" diff --git a/src/blocks.c b/src/blocks.c index 5a293b2..acdbb34 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -900,6 +900,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, (*container)->as.heading.level = level; (*container)->as.heading.setext = false; + (*container)->internal_offset = matched; } else if (!indented && (matched = scan_open_code_fence( input, parser->first_nonspace))) { diff --git a/src/inlines.c b/src/inlines.c index 3f1b9ed..07f3709 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -239,6 +239,43 @@ static CMARK_INLINE cmark_chunk take_while(subject *subj, int (*f)(int)) { return cmark_chunk_dup(&subj->input, startpos, len); } +// Return the number of newlines in a given span of text in a subject. If +// the number is greater than zero, also return the number of characters +// between the last newline and the end of the span in `since_newline`. +static int count_newlines(subject *subj, bufsize_t from, bufsize_t len, int *since_newline) { + int nls = 0; + int since_nl = 0; + + while (len--) { + if (subj->input.data[from++] == '\n') { + ++nls; + since_nl = 0; + } else { + ++since_nl; + } + } + + if (!nls) + return 0; + + *since_newline = since_nl; + return nls; +} + +// Adjust `node`'s `end_line`, `end_column`, and `subj`'s `line` and +// `column_offset` according to the number of newlines in a just-matched span +// of text in `subj`. +static void adjust_subj_node_newlines(subject *subj, cmark_node *node, int matchlen, int extra) { + int since_newline; + int newlines = count_newlines(subj, subj->pos - matchlen - extra, matchlen, &since_newline); + if (newlines) { + subj->line += newlines; + node->end_line += newlines; + node->end_column = since_newline; + subj->column_offset = -subj->pos + since_newline + extra; + } +} + // Try to process a backtick code span that began with a // span of ticks of length openticklength length (already // parsed). Return 0 if you don't find matching closing @@ -302,7 +339,9 @@ static cmark_node *handle_backticks(subject *subj) { cmark_strbuf_trim(&buf); cmark_strbuf_normalize_whitespace(&buf); - return make_code(subj, startpos, endpos - openticks.len - 1, cmark_chunk_buf_detach(&buf)); + cmark_node *node = make_code(subj, startpos, endpos - openticks.len - 1, cmark_chunk_buf_detach(&buf)); + adjust_subj_node_newlines(subj, node, endpos - startpos, openticks.len); + return node; } } @@ -727,7 +766,7 @@ cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url) { return result; } - houdini_unescape_html_f(&buf, url->data, url->len); + houdini_unescape_html_f(&buf, url->data, url->len); cmark_strbuf_unescape(&buf); return cmark_chunk_buf_detach(&buf); @@ -788,7 +827,9 @@ static cmark_node *handle_pointy_brace(subject *subj) { if (matchlen > 0) { contents = cmark_chunk_dup(&subj->input, subj->pos - 1, matchlen + 1); subj->pos += matchlen; - return make_raw_html(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents); + cmark_node *node = make_raw_html(subj, subj->pos - matchlen - 1, subj->pos - 1, contents); + adjust_subj_node_newlines(subj, node, matchlen, 1); + return node; } // if nothing matches, just return the opening <: @@ -846,24 +887,24 @@ static bufsize_t manual_scan_link_url_2(cmark_chunk *input, bufsize_t offset, bufsize_t i = offset; size_t nb_p = 0; - while (i < input->len) { - if (input->data[i] == '\\' && - i + 1 < input-> len && - cmark_ispunct(input->data[i+1])) - i += 2; - else if (input->data[i] == '(') { - ++nb_p; - ++i; - } else if (input->data[i] == ')') { - if (nb_p == 0) + while (i < input->len) { + if (input->data[i] == '\\' && + i + 1 < input-> len && + cmark_ispunct(input->data[i+1])) + i += 2; + else if (input->data[i] == '(') { + ++nb_p; + ++i; + } else if (input->data[i] == ')') { + if (nb_p == 0) + break; + --nb_p; + ++i; + } else if (cmark_isspace(input->data[i])) break; - --nb_p; - ++i; - } else if (cmark_isspace(input->data[i])) - break; - else - ++i; - } + else + ++i; + } if (i >= input->len) return -1; @@ -1203,7 +1244,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) { extern void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent, cmark_reference_map *refmap, int options) { subject subj; - subject_from_buf(mem, parent->start_line, parent->start_column - 1, &subj, &parent->content, refmap); + subject_from_buf(mem, parent->start_line, parent->start_column - 1 + parent->internal_offset, &subj, &parent->content, refmap); cmark_chunk_rtrim(&subj.input); while (!is_eof(&subj) && parse_inline(&subj, parent, options)) @@ -66,6 +66,7 @@ struct cmark_node { int start_column; int end_line; int end_column; + int internal_offset; uint16_t type; uint16_t flags; |