summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2014-12-12 22:12:09 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2014-12-12 22:55:11 -0800
commitc41bf11bb38ef513fa53f88b2c80afd1504aaeaf (patch)
tree87447007f83e40b627037e15108576113dc020c9
parent5c20df20af6be9444f27a8c1bbfa5b027a1fa8d8 (diff)
Rewrote HTML renderer using cmark_walk.
This version is shorter, more readable, and more regular. It should serve as a template for creating new writers. Performance is the same. All tests pass.
-rw-r--r--src/html.c418
-rw-r--r--src/node.c10
2 files changed, 180 insertions, 248 deletions
diff --git a/src/html.c b/src/html.c
index 60229cc..e6ae63e 100644
--- a/src/html.c
+++ b/src/html.c
@@ -11,9 +11,6 @@
// Functions to convert cmark_nodes to HTML strings.
-static bool
-finish_node(strbuf *html, cmark_node *node, bool tight);
-
static void escape_html(strbuf *dest, const unsigned char *source, int length)
{
if (length < 0)
@@ -36,322 +33,251 @@ static inline void cr(strbuf *html)
strbuf_putc(html, '\n');
}
-// Convert the inline children of a node to a plain string.
-static void inlines_to_plain_html(strbuf *html, cmark_node* node)
+struct render_state {
+ strbuf* html;
+ cmark_node *plain;
+};
+
+static int
+S_render_node(cmark_node *node, int entering, void *vstate)
{
- cmark_node* cur = node->first_child;
+ struct render_state *state = vstate;
+ cmark_node *parent;
+ cmark_node *grandparent;
+ strbuf *html = state->html;
+ char start_header[] = "<h0>";
+ char end_header[] = "</h0>";
+ strbuf *info;
+ bool tight;
- if (cur == NULL) {
- return;
+ if (state->plain == node) { // back at original node
+ state->plain = NULL;
}
- while (true) {
- switch(cur->type) {
- case NODE_TEXT:
- case NODE_INLINE_CODE:
- case NODE_INLINE_HTML:
- escape_html(html, cur->as.literal.data, cur->as.literal.len);
+ if (state->plain != NULL) {
+ switch(node->type) {
+ case CMARK_NODE_TEXT:
+ case CMARK_NODE_INLINE_CODE:
+ case CMARK_NODE_INLINE_HTML:
+ escape_html(html, node->as.literal.data,
+ node->as.literal.len);
break;
- case NODE_LINEBREAK:
- case NODE_SOFTBREAK:
+ case CMARK_NODE_LINEBREAK:
+ case CMARK_NODE_SOFTBREAK:
strbuf_putc(html, ' ');
break;
default:
break;
}
-
- if (cur->first_child) {
- cur = cur->first_child;
- continue;
- }
-
- next_sibling:
- if (cur->next) {
- cur = cur->next;
- continue;
- }
- cur = cur->parent;
- if (cur == node) {
- break;
- }
- goto next_sibling;
- }
-}
-
-
-// Convert a cmark_node to HTML.
-static void node_to_html(strbuf *html, cmark_node *node)
-{
- cmark_node *cur;
- char start_header[] = "<h0>";
- bool tight = false;
- bool visit_children;
- strbuf *info;
-
- if (node == NULL) {
- return;
+ return 1;
}
- cur = node;
- while (true) {
- // Only NODE_IMAGE wants to skip its children.
- visit_children = true;
-
- switch(cur->type) {
- case NODE_DOCUMENT:
- break;
-
- case NODE_PARAGRAPH:
- if (!tight) {
- cr(html);
- strbuf_puts(html, "<p>");
- }
- break;
-
- case NODE_BLOCK_QUOTE:
+ switch (node->type) {
+ case CMARK_NODE_BLOCK_QUOTE:
+ if (entering) {
cr(html);
strbuf_puts(html, "<blockquote>\n");
- // BLOCK_QUOTE doesn't use any of the 'as' structs,
- // so the 'list' member can be used to store the
- // current value of 'tight'.
- cur->as.list.tight = tight;
- tight = false;
- break;
-
- case NODE_LIST_ITEM:
+ } else {
cr(html);
- strbuf_puts(html, "<li>");
- break;
+ strbuf_puts(html, "</blockquote>\n");
+ }
+ break;
- case NODE_LIST: {
- cmark_list *list = &cur->as.list;
- bool tmp;
+ case CMARK_NODE_LIST: {
+ cmark_list_type list_type = node->as.list.list_type;
+ int start = node->as.list.start;
- // make sure a list starts at the beginning of the line:
+ if (entering) {
cr(html);
-
- if (list->list_type == CMARK_BULLET_LIST) {
+ if (list_type == CMARK_BULLET_LIST) {
strbuf_puts(html, "<ul>\n");
}
- else if (list->start == 1) {
+ else if (start == 1) {
strbuf_puts(html, "<ol>\n");
}
else {
strbuf_printf(html, "<ol start=\"%d\">\n",
- list->start);
+ start);
}
-
- // Store the current value of 'tight' by swapping.
- tmp = list->tight;
- list->tight = tight;
- tight = tmp;
- break;
+ } else {
+ strbuf_puts(html,
+ list_type == CMARK_BULLET_LIST ?
+ "</ul>\n" : "</ol>\n");
}
+ break;
+ }
- case NODE_HEADER:
- cr(html);
- start_header[2] = '0' + cur->as.header.level;
- strbuf_puts(html, start_header);
- break;
-
- case NODE_CODE_BLOCK:
- info = &cur->as.code.info;
- cr(html);
-
- if (&cur->as.code.fence_length == 0
- || strbuf_len(info) == 0) {
- strbuf_puts(html, "<pre><code>");
- }
- else {
- int first_tag = strbuf_strchr(info, ' ', 0);
- if (first_tag < 0)
- first_tag = strbuf_len(info);
-
- strbuf_puts(html,
- "<pre><code class=\"language-");
- escape_html(html, info->ptr, first_tag);
- strbuf_puts(html, "\">");
- }
-
- escape_html(html, cur->string_content.ptr, cur->string_content.size);
- break;
-
- case NODE_HTML:
+ case CMARK_NODE_LIST_ITEM:
+ if (entering) {
cr(html);
- strbuf_put(html, cur->string_content.ptr, cur->string_content.size);
- break;
+ strbuf_puts(html, "<li>");
+ } else {
+ strbuf_puts(html, "</li>\n");
+ }
+ break;
- case NODE_HRULE:
+ case CMARK_NODE_HEADER:
+ if (entering) {
cr(html);
- strbuf_puts(html, "<hr />\n");
- break;
-
- case NODE_REFERENCE_DEF:
- break;
-
- case NODE_TEXT:
- escape_html(html, cur->as.literal.data, cur->as.literal.len);
- break;
-
- case NODE_LINEBREAK:
- strbuf_puts(html, "<br />\n");
- break;
-
- case NODE_SOFTBREAK:
+ start_header[2] = '0' + node->as.header.level;
+ strbuf_puts(html, start_header);
+ } else {
+ end_header[3] = '0' + node->as.header.level;
+ strbuf_puts(html, end_header);
strbuf_putc(html, '\n');
- break;
-
- case NODE_INLINE_CODE:
- strbuf_puts(html, "<code>");
- escape_html(html, cur->as.literal.data, cur->as.literal.len);
- break;
-
- case NODE_INLINE_HTML:
- strbuf_put(html,
- cur->as.literal.data,
- cur->as.literal.len);
- break;
+ }
+ break;
- case NODE_LINK:
- strbuf_puts(html, "<a href=\"");
- if (cur->as.link.url)
- escape_href(html, cur->as.link.url, -1);
+ case CMARK_NODE_CODE_BLOCK:
+ info = &node->as.code.info;
+ cr(html);
- if (cur->as.link.title) {
- strbuf_puts(html, "\" title=\"");
- escape_html(html, cur->as.link.title, -1);
- }
+ if (&node->as.code.fence_length == 0
+ || strbuf_len(info) == 0) {
+ strbuf_puts(html, "<pre><code>");
+ }
+ else {
+ int first_tag = strbuf_strchr(info, ' ', 0);
+ if (first_tag < 0)
+ first_tag = strbuf_len(info);
+ strbuf_puts(html, "<pre><code class=\"language-");
+ escape_html(html, info->ptr, first_tag);
strbuf_puts(html, "\">");
- break;
-
- case NODE_IMAGE:
- strbuf_puts(html, "<img src=\"");
- if (cur->as.link.url)
- escape_href(html, cur->as.link.url, -1);
-
- strbuf_puts(html, "\" alt=\"");
- inlines_to_plain_html(html, cur);
-
- if (cur->as.link.title) {
- strbuf_puts(html, "\" title=\"");
- escape_html(html, cur->as.link.title, -1);
- }
-
- strbuf_puts(html, "\" />");
- visit_children = false;
- break;
+ }
- case NODE_STRONG:
- strbuf_puts(html, "<strong>");
- break;
+ escape_html(html, node->string_content.ptr, node->string_content.size);
+ strbuf_puts(html, "</code></pre>\n");
+ break;
- case NODE_EMPH:
- strbuf_puts(html, "<em>");
- break;
+ case CMARK_NODE_HTML:
+ cr(html);
+ strbuf_put(html, node->string_content.ptr,
+ node->string_content.size);
+ break;
- default:
- assert(false);
- }
+ case CMARK_NODE_HRULE:
+ cr(html);
+ strbuf_puts(html, "<hr />\n");
+ break;
- if (visit_children && cur->first_child) {
- cur = cur->first_child;
- continue;
- }
+ case CMARK_NODE_REFERENCE_DEF:
+ break;
- next_sibling:
- tight = finish_node(html, cur, tight);
- if (cur == node) {
- break;
- }
- if (cur->next) {
- cur = cur->next;
- continue;
+ case CMARK_NODE_PARAGRAPH:
+ parent = cmark_node_parent(node);
+ grandparent = cmark_node_parent(parent);
+ if (grandparent != NULL &&
+ grandparent->type == CMARK_NODE_LIST) {
+ tight = grandparent->as.list.tight;
+ } else {
+ tight = false;
}
- cur = cur->parent;
- goto next_sibling;
- }
-}
-
-// Returns the restored value of 'tight'.
-static bool
-finish_node(strbuf *html, cmark_node *node, bool tight)
-{
- char end_header[] = "</h0>\n";
-
- switch (node->type) {
- case NODE_PARAGRAPH:
if (!tight) {
- strbuf_puts(html, "</p>\n");
+ if (entering) {
+ cr(html);
+ strbuf_puts(html, "<p>");
+ } else {
+ strbuf_puts(html, "</p>\n");
+ }
}
break;
- case NODE_BLOCK_QUOTE: {
- cmark_list *list = &node->as.list;
- strbuf_puts(html, "</blockquote>\n");
- // Restore old 'tight' value.
- tight = list->tight;
- list->tight = false;
+ case CMARK_NODE_TEXT:
+ escape_html(html, node->as.literal.data,
+ node->as.literal.len);
break;
- }
- case NODE_LIST_ITEM:
- strbuf_puts(html, "</li>\n");
+ case CMARK_NODE_LINEBREAK:
+ strbuf_puts(html, "<br />\n");
break;
- case NODE_LIST: {
- cmark_list *list = &node->as.list;
- bool tmp;
- strbuf_puts(html,
- list->list_type == CMARK_BULLET_LIST ?
- "</ul>\n" : "</ol>\n");
- // Restore old 'tight' value.
- tmp = tight;
- tight = list->tight;
- list->tight = tmp;
+ case CMARK_NODE_SOFTBREAK:
+ strbuf_putc(html, '\n');
break;
- }
- case NODE_HEADER:
- end_header[3] = '0' + node->as.header.level;
- strbuf_puts(html, end_header);
+ case CMARK_NODE_INLINE_CODE:
+ strbuf_puts(html, "<code>");
+ escape_html(html, node->as.literal.data, node->as.literal.len);
+ strbuf_puts(html, "</code>");
break;
- case NODE_CODE_BLOCK:
- strbuf_puts(html, "</code></pre>\n");
+ case CMARK_NODE_INLINE_HTML:
+ strbuf_put(html, node->as.literal.data, node->as.literal.len);
break;
- case NODE_INLINE_CODE:
- strbuf_puts(html, "</code>");
+ case CMARK_NODE_STRONG:
+ if (entering) {
+ strbuf_puts(html, "<strong>");
+ } else {
+ strbuf_puts(html, "</strong>");
+ }
break;
- case NODE_LINK:
- strbuf_puts(html, "</a>");
+ case CMARK_NODE_EMPH:
+ if (entering) {
+ strbuf_puts(html, "<em>");
+ } else {
+ strbuf_puts(html, "</em>");
+ }
break;
- case NODE_STRONG:
- strbuf_puts(html, "</strong>");
+ case CMARK_NODE_LINK:
+ if (entering) {
+ strbuf_puts(html, "<a href=\"");
+ if (node->as.link.url)
+ escape_href(html, node->as.link.url, -1);
+
+ if (node->as.link.title) {
+ strbuf_puts(html, "\" title=\"");
+ escape_html(html, node->as.link.title, -1);
+ }
+
+ strbuf_puts(html, "\">");
+ } else {
+ strbuf_puts(html, "</a>");
+ }
break;
- case NODE_EMPH:
- strbuf_puts(html, "</em>");
+ case CMARK_NODE_IMAGE:
+ if (entering) {
+ strbuf_puts(html, "<img src=\"");
+ if (node->as.link.url)
+ escape_href(html, node->as.link.url, -1);
+
+ strbuf_puts(html, "\" alt=\"");
+ state->plain = node;
+ } else {
+ if (node->as.link.title) {
+ strbuf_puts(html, "\" title=\"");
+ escape_html(html, node->as.link.title, -1);
+ }
+
+ strbuf_puts(html, "\" />");
+ }
break;
default:
+ assert(false);
break;
}
- return tight;
+ // strbuf_putc(html, 'x');
+ return 1;
}
char *cmark_render_html(cmark_node *root)
{
char *result;
strbuf html = GH_BUF_INIT;
- node_to_html(&html, root);
- result = (char *)strbuf_detach(&html);
- strbuf_free(&html);
- return result;
+ struct render_state state = { &html, NULL };
+ if (cmark_walk(root, S_render_node, &state)) {
+ result = (char *)strbuf_detach(&html);
+ strbuf_free(&html);
+ return result;
+ } else {
+ return NULL;
+ }
}
diff --git a/src/node.c b/src/node.c
index 040aeda..980229e 100644
--- a/src/node.c
+++ b/src/node.c
@@ -773,6 +773,7 @@ int S_is_leaf_node(cmark_node *current_node)
switch (cmark_node_get_type(current_node)) {
case CMARK_NODE_HTML:
case CMARK_NODE_HRULE:
+ case CMARK_NODE_CODE_BLOCK:
case CMARK_NODE_REFERENCE_DEF:
case CMARK_NODE_TEXT:
case CMARK_NODE_SOFTBREAK:
@@ -815,8 +816,13 @@ int cmark_walk(cmark_node *root, cmark_node_handler handler, void *state)
parent = current_node->parent;
}
if (next) {
- begin = 1;
- current_node = next;
+ // don't go past root:
+ if (current_node == root) {
+ return 1;
+ } else {
+ begin = 1;
+ current_node = next;
+ }
} else {
begin = 0;
depth -= 1;