From e79cc4d68e6333e13d53e5262572e99c6877cbc6 Mon Sep 17 00:00:00 2001 From: KatolaZ Date: Wed, 2 Sep 2020 09:11:08 +0100 Subject: add support for groff mom filter --- src/CMakeLists.txt | 1 + src/cmark.h | 6 ++ src/main.c | 10 +- src/mom.c | 275 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 290 insertions(+), 2 deletions(-) create mode 100644 src/mom.c diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 48ddd01..d4e2a57 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -40,6 +40,7 @@ set(LIBRARY_SOURCES houdini_html_e.c houdini_html_u.c cmark_ctype.c + mom.c ${HEADERS} ) diff --git a/src/cmark.h b/src/cmark.h index a37c185..afd90e9 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -536,6 +536,12 @@ char *cmark_render_commonmark(cmark_node *root, int options, int width); CMARK_EXPORT char *cmark_render_latex(cmark_node *root, int options, int width); +/** Render a 'node' tree as a groff mom document. + * It is the caller's responsibility to free the returned buffer. + */ +CMARK_EXPORT +char *cmark_render_mom(cmark_node *root, int options, int width); + /** * ## Options */ diff --git a/src/main.c b/src/main.c index 0cc2b3d..17bfcfa 100644 --- a/src/main.c +++ b/src/main.c @@ -25,14 +25,15 @@ typedef enum { FORMAT_XML, FORMAT_MAN, FORMAT_COMMONMARK, - FORMAT_LATEX + FORMAT_LATEX, + FORMAT_MOM } writer_format; void print_usage() { printf("Usage: cmark [FILE*]\n"); printf("Options:\n"); printf(" --to, -t FORMAT Specify output format (html, xml, man, " - "commonmark, latex)\n"); + "commonmark, latex, mom)\n"); printf(" --width WIDTH Specify wrap width (default 0 = nowrap)\n"); printf(" --sourcepos Include source position attribute\n"); printf(" --hardbreaks Treat newlines as hard line breaks\n"); @@ -65,6 +66,9 @@ static void print_document(cmark_node *document, writer_format writer, case FORMAT_LATEX: result = cmark_render_latex(document, options, width); break; + case FORMAT_MOM: + result = cmark_render_mom(document, options, width); + break; default: fprintf(stderr, "Unknown format %d\n", writer); exit(1); @@ -148,6 +152,8 @@ int main(int argc, char *argv[]) { writer = FORMAT_COMMONMARK; } else if (strcmp(argv[i], "latex") == 0) { writer = FORMAT_LATEX; + } else if (strcmp(argv[i], "mom") == 0) { + writer = FORMAT_MOM; } else { fprintf(stderr, "Unknown format %s\n", argv[i]); exit(1); diff --git a/src/mom.c b/src/mom.c new file mode 100644 index 0000000..aa866f0 --- /dev/null +++ b/src/mom.c @@ -0,0 +1,275 @@ +#include +#include +#include +#include + +#include "config.h" +#include "cmark.h" +#include "node.h" +#include "buffer.h" +#include "utf8.h" +#include "render.h" + +#define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping) +#define LIT(s) renderer->out(renderer, s, false, LITERAL) +#define CR() renderer->cr(renderer) +#define BLANKLINE() renderer->blankline(renderer) +#define LIST_NUMBER_SIZE 20 + +// Functions to convert cmark_nodes to groff man strings. +static void S_outc(cmark_renderer *renderer, cmark_escaping escape, int32_t c, + unsigned char nextc) { + (void)(nextc); + + if (escape == LITERAL) { + cmark_render_code_point(renderer, c); + return; + } + + switch (c) { + case 46: + if (renderer->begin_line) { + cmark_render_ascii(renderer, "\\&."); + } else { + cmark_render_code_point(renderer, c); + } + break; + case 39: + if (renderer->begin_line) { + cmark_render_ascii(renderer, "\\&'"); + } else { + cmark_render_code_point(renderer, c); + } + break; + case 45: + cmark_render_ascii(renderer, "\\-"); + break; + case 92: + cmark_render_ascii(renderer, "\\e"); + break; + case 8216: // left single quote + cmark_render_ascii(renderer, "\\[oq]"); + break; + case 8217: // right single quote + cmark_render_ascii(renderer, "\\[cq]"); + break; + case 8220: // left double quote + cmark_render_ascii(renderer, "\\[lq]"); + break; + case 8221: // right double quote + cmark_render_ascii(renderer, "\\[rq]"); + break; + case 8212: // em dash + cmark_render_ascii(renderer, "\\[em]"); + break; + case 8211: // en dash + cmark_render_ascii(renderer, "\\[en]"); + break; + default: + cmark_render_code_point(renderer, c); + } +} + +static int S_render_node(cmark_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + bool entering = (ev_type == CMARK_EVENT_ENTER); + bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options); + char s_tmp[128]; + + + // avoid unused parameter error: + (void)(options); + + switch (node->type) { + case CMARK_NODE_DOCUMENT: + if (entering) { + LIT(".DOCTYPE DEFAULT"); + CR(); + LIT(".PRINTSTYLE TYPESET"); + CR(); + LIT(".JUSTIFY"); + CR(); + LIT(".START"); + CR(); + CR(); + } + break; + + case CMARK_NODE_BLOCK_QUOTE: + if (entering) { + CR(); + LIT(".BLOCKQUOTE"); + CR(); + } else { + CR(); + LIT(".BLOCKQUOTE END"); + CR(); + } + break; + + case CMARK_NODE_LIST: + if (entering) { + CR(); + LIT(".LIST "); + CR(); + LIT(".SHIFT_LIST 20p"); + if (cmark_node_get_list_type(node) == CMARK_BULLET_LIST) { + LIT("BULLET"); + } else { + LIT("DIGIT"); + } + CR(); + } else { + CR(); + LIT(".LIST OFF"); + CR(); + } + break; + + case CMARK_NODE_ITEM: + if (entering) { + CR(); + LIT(".ITEM"); + CR(); + } else { + CR(); + } + break; + + case CMARK_NODE_HEADING: + if (entering) { + CR(); + sprintf(s_tmp, ".HEADING %d \"", cmark_node_get_heading_level(node)); + LIT(s_tmp); + } else { + LIT("\""); + CR(); + } + break; + + + case CMARK_NODE_CODE_BLOCK: + CR(); + LIT(".QUOTE"); + CR(); + LIT(".CODE BR"); + CR(); + OUT(cmark_node_get_literal(node), false, NORMAL); + CR(); + LIT(".QUOTE OFF"); + CR(); + break; + + case CMARK_NODE_HTML_BLOCK: + break; + + case CMARK_NODE_CUSTOM_BLOCK: + CR(); + OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), + false, LITERAL); + CR(); + break; + + case CMARK_NODE_THEMATIC_BREAK: + CR(); + LIT(".PP\n * * * * *"); + CR(); + break; + + case CMARK_NODE_PARAGRAPH: + if (entering) { + // no blank line if first paragraph in list: + CR(); + LIT(".PP"); + CR(); + } else { + CR(); + } + break; + + case CMARK_NODE_TEXT: + OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); + break; + + case CMARK_NODE_LINEBREAK: + CR(); + LIT(".BR"); + CR(); + break; + + case CMARK_NODE_SOFTBREAK: + if (options & CMARK_OPT_HARDBREAKS) { + LIT(".PD 0\n.P\n.PD"); + CR(); + } else if (renderer->width == 0 && !(CMARK_OPT_NOBREAKS & options)) { + CR(); + } else { + OUT(" ", allow_wrap, LITERAL); + } + break; + + case CMARK_NODE_CODE: + LIT("\\f[C]"); + OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); + LIT("\\f[]"); + break; + + case CMARK_NODE_HTML_INLINE: + break; + + case CMARK_NODE_CUSTOM_INLINE: + OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), + false, LITERAL); + break; + + case CMARK_NODE_STRONG: + if (entering) { + CR(); + LIT(".SETBOLDER"); + CR(); + } else { + CR(); + LIT(".SETBOLDER RESET"); + CR(); + } + break; + + case CMARK_NODE_EMPH: + if (entering) { + CR(); + LIT(".SETSLANT"); + CR(); + } else { + CR(); + LIT(".SETSLANT RESET"); + CR(); + } + break; + + case CMARK_NODE_LINK: + if (!entering) { + LIT(" ("); + OUT(cmark_node_get_url(node), allow_wrap, URL); + LIT(")"); + } + break; + + case CMARK_NODE_IMAGE: + if (entering) { + LIT("[IMAGE: "); + } else { + LIT("]"); + } + break; + + default: + assert(false); + break; + } + + return 1; +} + +char *cmark_render_mom(cmark_node *root, int options, int width) { + return cmark_render(root, options, width, S_outc, S_render_node); +} -- cgit v1.2.3