summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYuki Izumi <ashe@kivikakk.ee>2017-08-09 17:13:16 +1000
committerYuki Izumi <ashe@kivikakk.ee>2017-08-10 13:29:40 +1000
commite22d1b42ce7f860c51c261ea9d42c0b77245fde5 (patch)
treed0cd5bee6b826fb6b09db44beddb48a96fff023e
parent919cdb2c583163411b3b15b2eae0ce72cf2d7981 (diff)
Fix inlines spanning newlines, text in non-para
-rw-r--r--api_test/main.c14
-rw-r--r--src/blocks.c1
-rw-r--r--src/inlines.c83
-rw-r--r--src/node.h1
4 files changed, 71 insertions, 28 deletions
diff --git a/api_test/main.c b/api_test/main.c
index 08f3c98..c30dc71 100644
--- a/api_test/main.c
+++ b/api_test/main.c
@@ -885,7 +885,7 @@ static void test_feed_across_line_ending(test_batch_runner *runner) {
static void source_pos(test_batch_runner *runner) {
static const char markdown[] =
- "Hi *there*.\n"
+ "# Hi *there*.\n"
"\n"
"Hello &ldquo; <http://www.google.com>\n"
"there `hi` -- [okay](www.google.com (ok)).\n"
@@ -901,13 +901,13 @@ static void source_pos(test_batch_runner *runner) {
STR_EQ(runner, xml, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
"<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n"
"<document sourcepos=\"1:1-10:20\" xmlns=\"http://commonmark.org/xml/1.0\">\n"
- " <paragraph sourcepos=\"1:1-1:11\">\n"
- " <text sourcepos=\"1:1-1:3\">Hi </text>\n"
- " <emph sourcepos=\"1:4-1:10\">\n"
- " <text sourcepos=\"1:5-1:9\">there</text>\n"
+ " <heading sourcepos=\"1:1-1:13\" level=\"1\">\n"
+ " <text sourcepos=\"1:3-1:5\">Hi </text>\n"
+ " <emph sourcepos=\"1:6-1:12\">\n"
+ " <text sourcepos=\"1:7-1:11\">there</text>\n"
" </emph>\n"
- " <text sourcepos=\"1:11-1:11\">.</text>\n"
- " </paragraph>\n"
+ " <text sourcepos=\"1:13-1:13\">.</text>\n"
+ " </heading>\n"
" <paragraph sourcepos=\"3:1-4:42\">\n"
" <text sourcepos=\"3:1-3:14\">Hello “ </text>\n"
" <link sourcepos=\"3:15-3:37\" destination=\"http://www.google.com\" title=\"\">\n"
diff --git a/src/blocks.c b/src/blocks.c
index 5a293b2..acdbb34 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -900,6 +900,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
(*container)->as.heading.level = level;
(*container)->as.heading.setext = false;
+ (*container)->internal_offset = matched;
} else if (!indented && (matched = scan_open_code_fence(
input, parser->first_nonspace))) {
diff --git a/src/inlines.c b/src/inlines.c
index 3f1b9ed..07f3709 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -239,6 +239,43 @@ static CMARK_INLINE cmark_chunk take_while(subject *subj, int (*f)(int)) {
return cmark_chunk_dup(&subj->input, startpos, len);
}
+// Return the number of newlines in a given span of text in a subject. If
+// the number is greater than zero, also return the number of characters
+// between the last newline and the end of the span in `since_newline`.
+static int count_newlines(subject *subj, bufsize_t from, bufsize_t len, int *since_newline) {
+ int nls = 0;
+ int since_nl = 0;
+
+ while (len--) {
+ if (subj->input.data[from++] == '\n') {
+ ++nls;
+ since_nl = 0;
+ } else {
+ ++since_nl;
+ }
+ }
+
+ if (!nls)
+ return 0;
+
+ *since_newline = since_nl;
+ return nls;
+}
+
+// Adjust `node`'s `end_line`, `end_column`, and `subj`'s `line` and
+// `column_offset` according to the number of newlines in a just-matched span
+// of text in `subj`.
+static void adjust_subj_node_newlines(subject *subj, cmark_node *node, int matchlen, int extra) {
+ int since_newline;
+ int newlines = count_newlines(subj, subj->pos - matchlen - extra, matchlen, &since_newline);
+ if (newlines) {
+ subj->line += newlines;
+ node->end_line += newlines;
+ node->end_column = since_newline;
+ subj->column_offset = -subj->pos + since_newline + extra;
+ }
+}
+
// Try to process a backtick code span that began with a
// span of ticks of length openticklength length (already
// parsed). Return 0 if you don't find matching closing
@@ -302,7 +339,9 @@ static cmark_node *handle_backticks(subject *subj) {
cmark_strbuf_trim(&buf);
cmark_strbuf_normalize_whitespace(&buf);
- return make_code(subj, startpos, endpos - openticks.len - 1, cmark_chunk_buf_detach(&buf));
+ cmark_node *node = make_code(subj, startpos, endpos - openticks.len - 1, cmark_chunk_buf_detach(&buf));
+ adjust_subj_node_newlines(subj, node, endpos - startpos, openticks.len);
+ return node;
}
}
@@ -727,7 +766,7 @@ cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url) {
return result;
}
- houdini_unescape_html_f(&buf, url->data, url->len);
+ houdini_unescape_html_f(&buf, url->data, url->len);
cmark_strbuf_unescape(&buf);
return cmark_chunk_buf_detach(&buf);
@@ -788,7 +827,9 @@ static cmark_node *handle_pointy_brace(subject *subj) {
if (matchlen > 0) {
contents = cmark_chunk_dup(&subj->input, subj->pos - 1, matchlen + 1);
subj->pos += matchlen;
- return make_raw_html(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents);
+ cmark_node *node = make_raw_html(subj, subj->pos - matchlen - 1, subj->pos - 1, contents);
+ adjust_subj_node_newlines(subj, node, matchlen, 1);
+ return node;
}
// if nothing matches, just return the opening <:
@@ -846,24 +887,24 @@ static bufsize_t manual_scan_link_url_2(cmark_chunk *input, bufsize_t offset,
bufsize_t i = offset;
size_t nb_p = 0;
- while (i < input->len) {
- if (input->data[i] == '\\' &&
- i + 1 < input-> len &&
- cmark_ispunct(input->data[i+1]))
- i += 2;
- else if (input->data[i] == '(') {
- ++nb_p;
- ++i;
- } else if (input->data[i] == ')') {
- if (nb_p == 0)
+ while (i < input->len) {
+ if (input->data[i] == '\\' &&
+ i + 1 < input-> len &&
+ cmark_ispunct(input->data[i+1]))
+ i += 2;
+ else if (input->data[i] == '(') {
+ ++nb_p;
+ ++i;
+ } else if (input->data[i] == ')') {
+ if (nb_p == 0)
+ break;
+ --nb_p;
+ ++i;
+ } else if (cmark_isspace(input->data[i]))
break;
- --nb_p;
- ++i;
- } else if (cmark_isspace(input->data[i]))
- break;
- else
- ++i;
- }
+ else
+ ++i;
+ }
if (i >= input->len)
return -1;
@@ -1203,7 +1244,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
extern void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent,
cmark_reference_map *refmap, int options) {
subject subj;
- subject_from_buf(mem, parent->start_line, parent->start_column - 1, &subj, &parent->content, refmap);
+ subject_from_buf(mem, parent->start_line, parent->start_column - 1 + parent->internal_offset, &subj, &parent->content, refmap);
cmark_chunk_rtrim(&subj.input);
while (!is_eof(&subj) && parse_inline(&subj, parent, options))
diff --git a/src/node.h b/src/node.h
index 65d857f..13901ba 100644
--- a/src/node.h
+++ b/src/node.h
@@ -66,6 +66,7 @@ struct cmark_node {
int start_column;
int end_line;
int end_column;
+ int internal_offset;
uint16_t type;
uint16_t flags;