From c41bf11bb38ef513fa53f88b2c80afd1504aaeaf Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 12 Dec 2014 22:12:09 -0800 Subject: Rewrote HTML renderer using cmark_walk. This version is shorter, more readable, and more regular. It should serve as a template for creating new writers. Performance is the same. All tests pass. --- src/html.c | 418 +++++++++++++++++++++++++------------------------------------ 1 file changed, 172 insertions(+), 246 deletions(-) (limited to 'src/html.c') diff --git a/src/html.c b/src/html.c index 60229cc..e6ae63e 100644 --- a/src/html.c +++ b/src/html.c @@ -11,9 +11,6 @@ // Functions to convert cmark_nodes to HTML strings. -static bool -finish_node(strbuf *html, cmark_node *node, bool tight); - static void escape_html(strbuf *dest, const unsigned char *source, int length) { if (length < 0) @@ -36,322 +33,251 @@ static inline void cr(strbuf *html) strbuf_putc(html, '\n'); } -// Convert the inline children of a node to a plain string. -static void inlines_to_plain_html(strbuf *html, cmark_node* node) +struct render_state { + strbuf* html; + cmark_node *plain; +}; + +static int +S_render_node(cmark_node *node, int entering, void *vstate) { - cmark_node* cur = node->first_child; + struct render_state *state = vstate; + cmark_node *parent; + cmark_node *grandparent; + strbuf *html = state->html; + char start_header[] = ""; + char end_header[] = ""; + strbuf *info; + bool tight; - if (cur == NULL) { - return; + if (state->plain == node) { // back at original node + state->plain = NULL; } - while (true) { - switch(cur->type) { - case NODE_TEXT: - case NODE_INLINE_CODE: - case NODE_INLINE_HTML: - escape_html(html, cur->as.literal.data, cur->as.literal.len); + if (state->plain != NULL) { + switch(node->type) { + case CMARK_NODE_TEXT: + case CMARK_NODE_INLINE_CODE: + case CMARK_NODE_INLINE_HTML: + escape_html(html, node->as.literal.data, + node->as.literal.len); break; - case NODE_LINEBREAK: - case NODE_SOFTBREAK: + case CMARK_NODE_LINEBREAK: + case CMARK_NODE_SOFTBREAK: strbuf_putc(html, ' '); break; default: break; } - - if (cur->first_child) { - cur = cur->first_child; - continue; - } - - next_sibling: - if (cur->next) { - cur = cur->next; - continue; - } - cur = cur->parent; - if (cur == node) { - break; - } - goto next_sibling; - } -} - - -// Convert a cmark_node to HTML. -static void node_to_html(strbuf *html, cmark_node *node) -{ - cmark_node *cur; - char start_header[] = ""; - bool tight = false; - bool visit_children; - strbuf *info; - - if (node == NULL) { - return; + return 1; } - cur = node; - while (true) { - // Only NODE_IMAGE wants to skip its children. - visit_children = true; - - switch(cur->type) { - case NODE_DOCUMENT: - break; - - case NODE_PARAGRAPH: - if (!tight) { - cr(html); - strbuf_puts(html, "

"); - } - break; - - case NODE_BLOCK_QUOTE: + switch (node->type) { + case CMARK_NODE_BLOCK_QUOTE: + if (entering) { cr(html); strbuf_puts(html, "

\n"); - // BLOCK_QUOTE doesn't use any of the 'as' structs, - // so the 'list' member can be used to store the - // current value of 'tight'. - cur->as.list.tight = tight; - tight = false; - break; - - case NODE_LIST_ITEM: + } else { cr(html); - strbuf_puts(html, "
  • "); - break; + strbuf_puts(html, "
  • \n"); + } + break; - case NODE_LIST: { - cmark_list *list = &cur->as.list; - bool tmp; + case CMARK_NODE_LIST: { + cmark_list_type list_type = node->as.list.list_type; + int start = node->as.list.start; - // make sure a list starts at the beginning of the line: + if (entering) { cr(html); - - if (list->list_type == CMARK_BULLET_LIST) { + if (list_type == CMARK_BULLET_LIST) { strbuf_puts(html, "\n" : "\n"); } + break; + } - case NODE_HEADER: - cr(html); - start_header[2] = '0' + cur->as.header.level; - strbuf_puts(html, start_header); - break; - - case NODE_CODE_BLOCK: - info = &cur->as.code.info; - cr(html); - - if (&cur->as.code.fence_length == 0 - || strbuf_len(info) == 0) { - strbuf_puts(html, "
    ");
    -			}
    -			else {
    -				int first_tag = strbuf_strchr(info, ' ', 0);
    -				if (first_tag < 0)
    -					first_tag = strbuf_len(info);
    -
    -				strbuf_puts(html,
    -					    "
    ptr, first_tag);
    -				strbuf_puts(html, "\">");
    -			}
    -
    -			escape_html(html, cur->string_content.ptr, cur->string_content.size);
    -			break;
    -
    -		case NODE_HTML:
    +	case CMARK_NODE_LIST_ITEM:
    +		if (entering) {
     			cr(html);
    -			strbuf_put(html, cur->string_content.ptr, cur->string_content.size);
    -			break;
    +			strbuf_puts(html, "
  • "); + } else { + strbuf_puts(html, "
  • \n"); + } + break; - case NODE_HRULE: + case CMARK_NODE_HEADER: + if (entering) { cr(html); - strbuf_puts(html, "
    \n"); - break; - - case NODE_REFERENCE_DEF: - break; - - case NODE_TEXT: - escape_html(html, cur->as.literal.data, cur->as.literal.len); - break; - - case NODE_LINEBREAK: - strbuf_puts(html, "
    \n"); - break; - - case NODE_SOFTBREAK: + start_header[2] = '0' + node->as.header.level; + strbuf_puts(html, start_header); + } else { + end_header[3] = '0' + node->as.header.level; + strbuf_puts(html, end_header); strbuf_putc(html, '\n'); - break; - - case NODE_INLINE_CODE: - strbuf_puts(html, ""); - escape_html(html, cur->as.literal.data, cur->as.literal.len); - break; - - case NODE_INLINE_HTML: - strbuf_put(html, - cur->as.literal.data, - cur->as.literal.len); - break; + } + break; - case NODE_LINK: - strbuf_puts(html, "as.link.url) - escape_href(html, cur->as.link.url, -1); + case CMARK_NODE_CODE_BLOCK: + info = &node->as.code.info; + cr(html); - if (cur->as.link.title) { - strbuf_puts(html, "\" title=\""); - escape_html(html, cur->as.link.title, -1); - } + if (&node->as.code.fence_length == 0 + || strbuf_len(info) == 0) { + strbuf_puts(html, "
    ");
    +		}
    +		else {
    +			int first_tag = strbuf_strchr(info, ' ', 0);
    +			if (first_tag < 0)
    +				first_tag = strbuf_len(info);
     
    +			strbuf_puts(html, "
    ptr, first_tag);
     			strbuf_puts(html, "\">");
    -			break;
    -
    -		case NODE_IMAGE:
    -			strbuf_puts(html, "as.link.url)
    -				escape_href(html, cur->as.link.url, -1);
    -
    -			strbuf_puts(html, "\" alt=\"");
    -			inlines_to_plain_html(html, cur);
    -
    -			if (cur->as.link.title) {
    -				strbuf_puts(html, "\" title=\"");
    -				escape_html(html, cur->as.link.title, -1);
    -			}
    -
    -			strbuf_puts(html, "\" />");
    -			visit_children = false;
    -			break;
    +		}
     
    -		case NODE_STRONG:
    -			strbuf_puts(html, "");
    -			break;
    +		escape_html(html, node->string_content.ptr, node->string_content.size);
    +		strbuf_puts(html, "
    \n"); + break; - case NODE_EMPH: - strbuf_puts(html, ""); - break; + case CMARK_NODE_HTML: + cr(html); + strbuf_put(html, node->string_content.ptr, + node->string_content.size); + break; - default: - assert(false); - } + case CMARK_NODE_HRULE: + cr(html); + strbuf_puts(html, "
    \n"); + break; - if (visit_children && cur->first_child) { - cur = cur->first_child; - continue; - } + case CMARK_NODE_REFERENCE_DEF: + break; - next_sibling: - tight = finish_node(html, cur, tight); - if (cur == node) { - break; - } - if (cur->next) { - cur = cur->next; - continue; + case CMARK_NODE_PARAGRAPH: + parent = cmark_node_parent(node); + grandparent = cmark_node_parent(parent); + if (grandparent != NULL && + grandparent->type == CMARK_NODE_LIST) { + tight = grandparent->as.list.tight; + } else { + tight = false; } - cur = cur->parent; - goto next_sibling; - } -} - -// Returns the restored value of 'tight'. -static bool -finish_node(strbuf *html, cmark_node *node, bool tight) -{ - char end_header[] = "\n"; - - switch (node->type) { - case NODE_PARAGRAPH: if (!tight) { - strbuf_puts(html, "

    \n"); + if (entering) { + cr(html); + strbuf_puts(html, "

    "); + } else { + strbuf_puts(html, "

    \n"); + } } break; - case NODE_BLOCK_QUOTE: { - cmark_list *list = &node->as.list; - strbuf_puts(html, "\n"); - // Restore old 'tight' value. - tight = list->tight; - list->tight = false; + case CMARK_NODE_TEXT: + escape_html(html, node->as.literal.data, + node->as.literal.len); break; - } - case NODE_LIST_ITEM: - strbuf_puts(html, "\n"); + case CMARK_NODE_LINEBREAK: + strbuf_puts(html, "
    \n"); break; - case NODE_LIST: { - cmark_list *list = &node->as.list; - bool tmp; - strbuf_puts(html, - list->list_type == CMARK_BULLET_LIST ? - "\n" : "\n"); - // Restore old 'tight' value. - tmp = tight; - tight = list->tight; - list->tight = tmp; + case CMARK_NODE_SOFTBREAK: + strbuf_putc(html, '\n'); break; - } - case NODE_HEADER: - end_header[3] = '0' + node->as.header.level; - strbuf_puts(html, end_header); + case CMARK_NODE_INLINE_CODE: + strbuf_puts(html, ""); + escape_html(html, node->as.literal.data, node->as.literal.len); + strbuf_puts(html, ""); break; - case NODE_CODE_BLOCK: - strbuf_puts(html, "
    \n"); + case CMARK_NODE_INLINE_HTML: + strbuf_put(html, node->as.literal.data, node->as.literal.len); break; - case NODE_INLINE_CODE: - strbuf_puts(html, "
    "); + case CMARK_NODE_STRONG: + if (entering) { + strbuf_puts(html, ""); + } else { + strbuf_puts(html, ""); + } break; - case NODE_LINK: - strbuf_puts(html, ""); + case CMARK_NODE_EMPH: + if (entering) { + strbuf_puts(html, ""); + } else { + strbuf_puts(html, ""); + } break; - case NODE_STRONG: - strbuf_puts(html, ""); + case CMARK_NODE_LINK: + if (entering) { + strbuf_puts(html, "as.link.url) + escape_href(html, node->as.link.url, -1); + + if (node->as.link.title) { + strbuf_puts(html, "\" title=\""); + escape_html(html, node->as.link.title, -1); + } + + strbuf_puts(html, "\">"); + } else { + strbuf_puts(html, ""); + } break; - case NODE_EMPH: - strbuf_puts(html, ""); + case CMARK_NODE_IMAGE: + if (entering) { + strbuf_puts(html, "as.link.url) + escape_href(html, node->as.link.url, -1); + + strbuf_puts(html, "\" alt=\""); + state->plain = node; + } else { + if (node->as.link.title) { + strbuf_puts(html, "\" title=\""); + escape_html(html, node->as.link.title, -1); + } + + strbuf_puts(html, "\" />"); + } break; default: + assert(false); break; } - return tight; + // strbuf_putc(html, 'x'); + return 1; } char *cmark_render_html(cmark_node *root) { char *result; strbuf html = GH_BUF_INIT; - node_to_html(&html, root); - result = (char *)strbuf_detach(&html); - strbuf_free(&html); - return result; + struct render_state state = { &html, NULL }; + if (cmark_walk(root, S_render_node, &state)) { + result = (char *)strbuf_detach(&html); + strbuf_free(&html); + return result; + } else { + return NULL; + } } -- cgit v1.2.3