Fix inlines spanning newlines, text in non-para

author: Yuki Izumi <ashe@kivikakk.ee> 2017-08-09 17:13:16 +1000
committer: Yuki Izumi <ashe@kivikakk.ee> 2017-08-10 13:29:40 +1000
commit: e22d1b42ce7f860c51c261ea9d42c0b77245fde5 (patch)
tree: d0cd5bee6b826fb6b09db44beddb48a96fff023e
parent: 919cdb2c583163411b3b15b2eae0ce72cf2d7981 (diff)
4 files changed, 71 insertions, 28 deletions
diff --git a/api_test/main.c b/api_test/main.c
index 08f3c98..c30dc71 100644
--- a/api_test/main.c
+++ b/api_test/main.c
@@ -885,7 +885,7 @@ static void test_feed_across_line_ending(test_batch_runner *runner) {
 
 static void source_pos(test_batch_runner *runner) {
   static const char markdown[] =
-    "Hi *there*.\n"
+    "# Hi *there*.\n"
     "\n"
     "Hello &ldquo; <http://www.google.com>\n"
     "there `hi` -- [okay](www.google.com (ok)).\n"
@@ -901,13 +901,13 @@ static void source_pos(test_batch_runner *runner) {
   STR_EQ(runner, xml, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
                       "<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n"
                       "<document sourcepos=\"1:1-10:20\" xmlns=\"http://commonmark.org/xml/1.0\">\n"
-                      "  <paragraph sourcepos=\"1:1-1:11\">\n"
-                      "    <text sourcepos=\"1:1-1:3\">Hi </text>\n"
-                      "    <emph sourcepos=\"1:4-1:10\">\n"
-                      "      <text sourcepos=\"1:5-1:9\">there</text>\n"
+                      "  <heading sourcepos=\"1:1-1:13\" level=\"1\">\n"
+                      "    <text sourcepos=\"1:3-1:5\">Hi </text>\n"
+                      "    <emph sourcepos=\"1:6-1:12\">\n"
+                      "      <text sourcepos=\"1:7-1:11\">there</text>\n"
                       "    </emph>\n"
-                      "    <text sourcepos=\"1:11-1:11\">.</text>\n"
-                      "  </paragraph>\n"
+                      "    <text sourcepos=\"1:13-1:13\">.</text>\n"
+                      "  </heading>\n"
                       "  <paragraph sourcepos=\"3:1-4:42\">\n"
                       "    <text sourcepos=\"3:1-3:14\">Hello “ </text>\n"
                       "    <link sourcepos=\"3:15-3:37\" destination=\"http://www.google.com\" title=\"\">\n"
diff --git a/src/blocks.c b/src/blocks.c
index 5a293b2..acdbb34 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -900,6 +900,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
 
       (*container)->as.heading.level = level;
       (*container)->as.heading.setext = false;
+      (*container)->internal_offset = matched;
 
     } else if (!indented && (matched = scan_open_code_fence(
                                  input, parser->first_nonspace))) {
diff --git a/src/inlines.c b/src/inlines.c
index 3f1b9ed..07f3709 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -239,6 +239,43 @@ static CMARK_INLINE cmark_chunk take_while(subject *subj, int (*f)(int)) {
   return cmark_chunk_dup(&subj->input, startpos, len);
 }
 
+// Return the number of newlines in a given span of text in a subject.  If
+// the number is greater than zero, also return the number of characters
+// between the last newline and the end of the span in `since_newline`.
+static int count_newlines(subject *subj, bufsize_t from, bufsize_t len, int *since_newline) {
+  int nls = 0;
+  int since_nl = 0;
+
+  while (len--) {
+    if (subj->input.data[from++] == '\n') {
+      ++nls;
+      since_nl = 0;
+    } else {
+      ++since_nl;
+    }
+  }
+
+  if (!nls)
+    return 0;
+
+  *since_newline = since_nl;
+  return nls;
+}
+
+// Adjust `node`'s `end_line`, `end_column`, and `subj`'s `line` and
+// `column_offset` according to the number of newlines in a just-matched span
+// of text in `subj`.
+static void adjust_subj_node_newlines(subject *subj, cmark_node *node, int matchlen, int extra) {
+  int since_newline;
+  int newlines = count_newlines(subj, subj->pos - matchlen - extra, matchlen, &since_newline);
+  if (newlines) {
+    subj->line += newlines;
+    node->end_line += newlines;
+    node->end_column = since_newline;
+    subj->column_offset = -subj->pos + since_newline + extra;
+  }
+}
+
 // Try to process a backtick code span that began with a
 // span of ticks of length openticklength length (already
 // parsed).  Return 0 if you don't find matching closing
@@ -302,7 +339,9 @@ static cmark_node *handle_backticks(subject *subj) {
     cmark_strbuf_trim(&buf);
     cmark_strbuf_normalize_whitespace(&buf);
 
-    return make_code(subj, startpos, endpos - openticks.len - 1, cmark_chunk_buf_detach(&buf));
+    cmark_node *node = make_code(subj, startpos, endpos - openticks.len - 1, cmark_chunk_buf_detach(&buf));
+    adjust_subj_node_newlines(subj, node, endpos - startpos, openticks.len);
+    return node;
   }
 }
 
@@ -727,7 +766,7 @@ cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url) {
     return result;
   }
 
-  houdini_unescape_html_f(&buf, url->data, url->len);
+    houdini_unescape_html_f(&buf, url->data, url->len);
 
   cmark_strbuf_unescape(&buf);
   return cmark_chunk_buf_detach(&buf);
@@ -788,7 +827,9 @@ static cmark_node *handle_pointy_brace(subject *subj) {
   if (matchlen > 0) {
     contents = cmark_chunk_dup(&subj->input, subj->pos - 1, matchlen + 1);
     subj->pos += matchlen;
-    return make_raw_html(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents);
+    cmark_node *node = make_raw_html(subj, subj->pos - matchlen - 1, subj->pos - 1, contents);
+    adjust_subj_node_newlines(subj, node, matchlen, 1);
+    return node;
   }
 
   // if nothing matches, just return the opening <:
@@ -846,24 +887,24 @@ static bufsize_t manual_scan_link_url_2(cmark_chunk *input, bufsize_t offset,
   bufsize_t i = offset;
   size_t nb_p = 0;
 
-  while (i < input->len) {
-    if (input->data[i] == '\\' &&
-        i + 1 < input-> len &&
-        cmark_ispunct(input->data[i+1]))
-      i += 2;
-    else if (input->data[i] == '(') {
-      ++nb_p;
-      ++i;
-    } else if (input->data[i] == ')') {
-      if (nb_p == 0)
+    while (i < input->len) {
+      if (input->data[i] == '\\' &&
+	  i + 1 < input-> len &&
+          cmark_ispunct(input->data[i+1]))
+        i += 2;
+      else if (input->data[i] == '(') {
+        ++nb_p;
+        ++i;
+      } else if (input->data[i] == ')') {
+        if (nb_p == 0)
+          break;
+        --nb_p;
+        ++i;
+      } else if (cmark_isspace(input->data[i]))
         break;
-      --nb_p;
-      ++i;
-    } else if (cmark_isspace(input->data[i]))
-      break;
-    else
-      ++i;
-  }
+      else
+        ++i;
+    }
 
   if (i >= input->len)
     return -1;
@@ -1203,7 +1244,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
 extern void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent,
                                 cmark_reference_map *refmap, int options) {
   subject subj;
-  subject_from_buf(mem, parent->start_line, parent->start_column - 1, &subj, &parent->content, refmap);
+  subject_from_buf(mem, parent->start_line, parent->start_column - 1 + parent->internal_offset, &subj, &parent->content, refmap);
   cmark_chunk_rtrim(&subj.input);
 
   while (!is_eof(&subj) && parse_inline(&subj, parent, options))
diff --git a/src/node.h b/src/node.h
index 65d857f..13901ba 100644
--- a/src/node.h
+++ b/src/node.h
@@ -66,6 +66,7 @@ struct cmark_node {
   int start_column;
   int end_line;
   int end_column;
+  int internal_offset;
   uint16_t type;
   uint16_t flags;
author	Yuki Izumi <ashe@kivikakk.ee>	2017-08-09 17:13:16 +1000
committer	Yuki Izumi <ashe@kivikakk.ee>	2017-08-10 13:29:40 +1000
commit	e22d1b42ce7f860c51c261ea9d42c0b77245fde5 (patch)
tree	d0cd5bee6b826fb6b09db44beddb48a96fff023e
parent	919cdb2c583163411b3b15b2eae0ce72cf2d7981 (diff)