From bb26b18173df983c57459809e8b1691b89907a58 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 14 Feb 2015 16:36:43 -0800 Subject: Added CMARK_OPT_SMARTPUNCT and --smart option. So far this is only implemented for the HTML renderer. Ultimately some of this should be factored out into a form that can be used in other renderers. --- src/cmark.h | 4 +++ src/html.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- src/main.c | 3 +++ 3 files changed, 93 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/cmark.h b/src/cmark.h index 9f312bc..f106371 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -496,6 +496,10 @@ char *cmark_render_man(cmark_node *root, long options); */ #define CMARK_OPT_NORMALIZE 4 +/** Convert straight quotes to curly, --- to em dashes, -- to en dashes. + */ +#define CMARK_OPT_SMARTPUNCT 8 + /** * ## Version information */ diff --git a/src/html.c b/src/html.c index 8ccb495..c8cc9fb 100644 --- a/src/html.c +++ b/src/html.c @@ -6,6 +6,7 @@ #include "config.h" #include "cmark.h" #include "node.h" +#include "utf8.h" #include "buffer.h" #include "houdini.h" @@ -60,6 +61,10 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, char start_header[] = "as.literal.data, - node->as.literal.len); + if (options & CMARK_OPT_SMARTPUNCT) { + lastout = 0; + i = 0; + lit = node->as.literal; + while (i < lit.len) { + c = lit.data[i]; + // replace with efficient lookup table: + if (c != '"' && c != '-' && c != '\'' && c != '.') { + i++; + continue; + } + escape_html(html, lit.data + lastout, + i - lastout); + if (c == '\'' || c == '"') { + before_char = i == 0 ? ',' : lit.data[i - 1]; + after_char = i == lit.len - 1 ? ',' : lit.data[i + 1]; + left_flanking = !utf8proc_is_space(after_char) && + !(utf8proc_is_punctuation(after_char) && + !utf8proc_is_space(before_char) && + !utf8proc_is_punctuation(before_char)); + right_flanking = !utf8proc_is_space(before_char) && + !(utf8proc_is_punctuation(before_char) && + !utf8proc_is_space(after_char) && + !utf8proc_is_punctuation(after_char)); + } + switch (lit.data[i]) { + case '"': + if (right_flanking) { + cmark_strbuf_puts(html, "”"); + } else { + cmark_strbuf_puts(html, "“"); + } + i += 1; + break; + case '\'': + if (left_flanking && !right_flanking) { + cmark_strbuf_puts(html, "‘"); + } else { + cmark_strbuf_puts(html, "’"); + } + i += 1; + break; + case '-': + if (i < lit.len - 1 && lit.data[i + 1] == '-') { + if (lit.data[i + 2] == '-') { + cmark_strbuf_puts(html, + "—"); + i += 3; + } else { + cmark_strbuf_puts(html, "–"); + i += 2; + } + } else { + cmark_strbuf_putc(html, c); + i += 1; + } + break; + case '.': + if (i < lit.len - 2 && lit.data[i + 1] == '.' && + lit.data[i + 2] == '.') { + cmark_strbuf_puts(html, + "…"); + i += 3; + } else { + cmark_strbuf_putc(html, c); + i += 1; + } + break; + default: + cmark_strbuf_putc(html, c); + i++; + } + lastout = i; + } + escape_html(html, node->as.literal.data + lastout, + i - lastout); + + } else { + escape_html(html, node->as.literal.data, + node->as.literal.len); + } break; case CMARK_NODE_LINEBREAK: diff --git a/src/main.c b/src/main.c index ef40a88..3834c1f 100644 --- a/src/main.c +++ b/src/main.c @@ -26,6 +26,7 @@ void print_usage() printf(" --to, -t FORMAT Specify output format (html, xml, man)\n"); printf(" --sourcepos Include source position attribute\n"); printf(" --hardbreaks Treat newlines as hard line breaks\n"); + printf(" --smart Use smart punctuation\n"); printf(" --normalize Consolidate adjacent text nodes\n"); printf(" --help, -h Print usage information\n"); printf(" --version Print version\n"); @@ -80,6 +81,8 @@ int main(int argc, char *argv[]) options |= CMARK_OPT_SOURCEPOS; } else if (strcmp(argv[i], "--hardbreaks") == 0) { options |= CMARK_OPT_HARDBREAKS; + } else if (strcmp(argv[i], "--smart") == 0) { + options |= CMARK_OPT_SMARTPUNCT; } else if (strcmp(argv[i], "--normalize") == 0) { options |= CMARK_OPT_NORMALIZE; } else if ((strcmp(argv[i], "--help") == 0) || -- cgit v1.2.3