From f4afff1ce6c59a9a6ad7a5d370aab902a8cdb4c9 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 25 Feb 2015 22:43:31 -0800 Subject: Added commonmark renderer. This is still incomplete. (See TODOs in the source.) --- man/man3/cmark.3 | 8 +- src/CMakeLists.txt | 1 + src/cmark.h | 5 + src/commonmark.c | 336 +++++++++++++++++++++++++++++++++++++++++++++++++++++ src/main.c | 8 +- 5 files changed, 356 insertions(+), 2 deletions(-) create mode 100644 src/commonmark.c diff --git a/man/man3/cmark.3 b/man/man3/cmark.3 index 2a55774..9ebdaf9 100644 --- a/man/man3/cmark.3 +++ b/man/man3/cmark.3 @@ -1,4 +1,4 @@ -.TH cmark 3 "March 15, 2015" "LOCAL" "Library Functions Manual" +.TH cmark 3 "March 18, 2015" "LOCAL" "Library Functions Manual" .SH NAME .PP @@ -473,6 +473,12 @@ to add an appropriate header and footer. .PP Render a \f[I]node\f[] tree as a groff man page, without the header. +.PP +\fIchar *\f[] \fBcmark_render_commonmark\f[](\fIcmark_node *root\f[], \fIint options\f[]) + +.PP +Render a \f[I]node\f[] tree as a commonmark document. + .PP .nf \fC diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index cf9e17e..14ed306 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -32,6 +32,7 @@ set(LIBRARY_SOURCES man.c xml.c html.c + commonmark.c html_unescape.gperf houdini_href_e.c houdini_html_e.c diff --git a/src/cmark.h b/src/cmark.h index 12e1f14..1c06125 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -481,6 +481,11 @@ char *cmark_render_html(cmark_node *root, int options); CMARK_EXPORT char *cmark_render_man(cmark_node *root, int options); +/** Render a 'node' tree as a commonmark document. + */ +CMARK_EXPORT +char *cmark_render_commonmark(cmark_node *root, int options); + /** Default writer options. */ #define CMARK_OPT_DEFAULT 0 diff --git a/src/commonmark.c b/src/commonmark.c new file mode 100644 index 0000000..6c0de88 --- /dev/null +++ b/src/commonmark.c @@ -0,0 +1,336 @@ +#include +#include +#include +#include +#include + +#include "config.h" +#include "cmark.h" +#include "node.h" +#include "buffer.h" +#include "utf8.h" + +// Functions to convert cmark_nodes to commonmark strings. + +struct render_state { + cmark_strbuf* buffer; + cmark_strbuf* prefix; + int column; + int width; + int need_cr; + int last_breakable; + bool begin_line; +}; + +static inline void cr(struct render_state *state) +{ + if (state->need_cr < 1) { + state->need_cr = 1; + } +} + +static inline void blankline(struct render_state *state) +{ + if (state->need_cr < 2) { + state->need_cr = 2; + } +} + +static inline bool needs_escaping(int32_t c, unsigned char d) +{ + // TODO escape potential list markers at beginning of line + // (add param) + return (c == '*' || c == '_' || c == '[' || c == ']' || + c == '<' || c == '>' || c == '\\' || + (c == '&' && isalpha(d)) || + (c == '!' && d == '[')); +} + +static inline void out(struct render_state *state, + cmark_chunk str, + bool wrap, + bool escape) +{ + unsigned char* source = str.data; + int length = str.len; + unsigned char nextc; + int32_t c; + int i = 0; + int len; + cmark_chunk remainder = cmark_chunk_literal(""); + int k = state->buffer->size - 1; + + while (state->need_cr) { + if (k < 0 || state->buffer->ptr[k] == '\n') { + k -= 1; + } else { + cmark_strbuf_putc(state->buffer, '\n'); + if (state->need_cr > 1) { + cmark_strbuf_put(state->buffer, state->prefix->ptr, + state->prefix->size); + } + } + state->column = 0; + state->begin_line = true; + state->need_cr -= 1; + } + + while (i < length) { + if (state->begin_line) { + cmark_strbuf_put(state->buffer, state->prefix->ptr, + state->prefix->size); + // note: this assumes prefix is ascii: + state->column = state->prefix->size; + } + + len = utf8proc_iterate(source + i, length - i, &c); + nextc = source[i + len]; + if (c == 32 && wrap) { + if (!state->begin_line) { + cmark_strbuf_putc(state->buffer, ' '); + state->column += 1; + state->begin_line = false; + state->last_breakable = state->buffer->size - + 1; + // skip following spaces + while (source[i + 1] == ' ') { + i++; + } + } + + } else if (c == 10) { + cmark_strbuf_putc(state->buffer, '\n'); + state->column = 0; + state->begin_line = true; + state->last_breakable = 0; + } else if (escape && needs_escaping(c, nextc)) { + cmark_strbuf_putc(state->buffer, '\\'); + utf8proc_encode_char(c, state->buffer); + state->column += 2; + state->begin_line = false; + } else { + utf8proc_encode_char(c, state->buffer); + state->column += 1; + state->begin_line = false; + } + + // If adding the character went beyond width, look for an + // earlier place where the line could be broken: + if (state->width > 0 && + state->column > state->width && + !state->begin_line && + state->last_breakable > 0) { + + // copy from last_breakable to remainder + cmark_chunk_set_cstr(&remainder, (char *) state->buffer->ptr + state->last_breakable + 1); + // truncate at last_breakable + cmark_strbuf_truncate(state->buffer, state->last_breakable); + // add newline, prefix, and remainder + cmark_strbuf_putc(state->buffer, '\n'); + cmark_strbuf_put(state->buffer, state->prefix->ptr, + state->prefix->size); + cmark_strbuf_put(state->buffer, remainder.data, remainder.len); + state->column = state->prefix->size + remainder.len; + cmark_chunk_free(&remainder); + state->last_breakable = 0; + state->begin_line = false; + } + + i += len; + } +} + +static void lit(struct render_state *state, char *s, bool wrap) +{ + cmark_chunk str = cmark_chunk_literal(s); + out(state, str, wrap, false); +} + + +static int +S_render_node(cmark_node *node, cmark_event_type ev_type, + struct render_state *state) +{ + cmark_node *tmp; + int list_number; + bool entering = (ev_type == CMARK_EVENT_ENTER); + + switch (node->type) { + case CMARK_NODE_DOCUMENT: + if (!entering) { + cmark_strbuf_putc(state->buffer, '\n'); + } + break; + + case CMARK_NODE_BLOCK_QUOTE: + if (entering) { + lit(state, "> ", false); + cmark_strbuf_puts(state->prefix, "> "); + } else { + cmark_strbuf_truncate(state->prefix, state->prefix->size - 2); + blankline(state); + } + break; + + case CMARK_NODE_LIST: + break; + + case CMARK_NODE_ITEM: + if (entering) { + if (cmark_node_get_list_type(node->parent) == + CMARK_BULLET_LIST) { + lit(state, "- ", false); + cmark_strbuf_puts(state->prefix, " "); + } else { + list_number = cmark_node_get_list_start(node->parent); + tmp = node; + while (tmp->prev) { + tmp = tmp->prev; + list_number += 1; + } + lit(state, "1. ", false); + cmark_strbuf_puts(state->prefix, " "); + } + } else { + cmark_strbuf_truncate(state->prefix, state->prefix->size - + (cmark_node_get_list_type(node->parent) == + CMARK_BULLET_LIST ? 2 : 4)); + cr(state); + } + break; + + case CMARK_NODE_HEADER: + if (entering) { + for (int i = cmark_node_get_header_level(node); i > 0; i--) { + lit(state, "#", false); + } + lit(state, " ", false); + // TODO set a "nowrap" variable in state, and refer to this in out() + } else { + blankline(state); + } + break; + + case CMARK_NODE_CODE_BLOCK: + blankline(state); + // TODO variable number of ticks + lit(state, "```", false); + cr(state); + // TODO info string + // TODO use indented form if no info string? + out(state, node->as.code.literal, false, true); + cr(state); + lit(state, "```", false); + blankline(state); + break; + + case CMARK_NODE_HTML: + blankline(state); + out(state, node->as.code.literal, false, false); + blankline(state); + break; + + case CMARK_NODE_HRULE: + blankline(state); + lit(state, "-----", false); + blankline(state); + break; + + case CMARK_NODE_PARAGRAPH: + if (!entering) { + blankline(state); + } + break; + + case CMARK_NODE_TEXT: + out(state, node->as.literal, true, true); + break; + + case CMARK_NODE_LINEBREAK: + lit(state, "\\", false); + cr(state); + break; + + case CMARK_NODE_SOFTBREAK: + lit(state, " ", true); + break; + + case CMARK_NODE_CODE: + // TODO variable number of ticks + lit(state, "`", false); + out(state, node->as.literal, true, false); + lit(state, "`", false); + break; + + case CMARK_NODE_INLINE_HTML: + out(state, node->as.literal, true, false); + break; + + case CMARK_NODE_STRONG: + if (entering) { + lit(state, "**", false); + } else { + lit(state, "**", false); + } + break; + + case CMARK_NODE_EMPH: + if (entering) { + lit(state, "*", false); + } else { + lit(state, "*", false); + } + break; + + case CMARK_NODE_LINK: + if (entering) { + lit(state, "[", false); + } else { + lit(state, "](", false); + out(state, cmark_chunk_literal(cmark_node_get_url(node)), false, true); + // TODO title + lit(state, ")", false); + } + break; + + case CMARK_NODE_IMAGE: + if (entering) { + lit(state, "![", false); + } else { + lit(state, "](", false); + out(state, cmark_chunk_literal(cmark_node_get_url(node)), false, true); + // TODO title + lit(state, ")", false); + } + break; + + default: + assert(false); + break; + } + + return 1; +} + +char *cmark_render_commonmark(cmark_node *root, int options) +{ + char *result; + cmark_strbuf commonmark = GH_BUF_INIT; + cmark_strbuf prefix = GH_BUF_INIT; + struct render_state state = { &commonmark, &prefix, 0, 65, 0, 0, true }; + cmark_node *cur; + cmark_event_type ev_type; + cmark_iter *iter = cmark_iter_new(root); + + if (options == 0) options = 0; // avoid warning about unused parameters + + while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { + cur = cmark_iter_get_node(iter); + S_render_node(cur, ev_type, &state); + } + result = (char *)cmark_strbuf_detach(&commonmark); + + cmark_strbuf_free(&prefix); + cmark_iter_free(iter); + return result; +} diff --git a/src/main.c b/src/main.c index 9a8fd98..c9b9013 100644 --- a/src/main.c +++ b/src/main.c @@ -17,13 +17,14 @@ typedef enum { FORMAT_HTML, FORMAT_XML, FORMAT_MAN, + FORMAT_COMMONMARK } writer_format; void print_usage() { printf("Usage: cmark [FILE*]\n"); printf("Options:\n"); - printf(" --to, -t FORMAT Specify output format (html, xml, man)\n"); + printf(" --to, -t FORMAT Specify output format (html, xml, man, commonmark)\n"); printf(" --sourcepos Include source position attribute\n"); printf(" --hardbreaks Treat newlines as hard line breaks\n"); printf(" --smart Use smart punctuation\n"); @@ -46,6 +47,9 @@ static void print_document(cmark_node *document, writer_format writer, case FORMAT_MAN: result = cmark_render_man(document, options); break; + case FORMAT_COMMONMARK: + result = cmark_render_commonmark(document, options); + break; default: fprintf(stderr, "Unknown format %d\n", writer); exit(1); @@ -98,6 +102,8 @@ int main(int argc, char *argv[]) writer = FORMAT_HTML; } else if (strcmp(argv[i], "xml") == 0) { writer = FORMAT_XML; + } else if (strcmp(argv[i], "commonmark") == 0) { + writer = FORMAT_COMMONMARK; } else { fprintf(stderr, "Unknown format %s\n", argv[i]); -- cgit v1.2.3