From bb26b18173df983c57459809e8b1691b89907a58 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 14 Feb 2015 16:36:43 -0800 Subject: Added CMARK_OPT_SMARTPUNCT and --smart option. So far this is only implemented for the HTML renderer. Ultimately some of this should be factored out into a form that can be used in other renderers. --- src/html.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 86 insertions(+), 2 deletions(-) (limited to 'src/html.c') diff --git a/src/html.c b/src/html.c index 8ccb495..c8cc9fb 100644 --- a/src/html.c +++ b/src/html.c @@ -6,6 +6,7 @@ #include "config.h" #include "cmark.h" #include "node.h" +#include "utf8.h" #include "buffer.h" #include "houdini.h" @@ -60,6 +61,10 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, char start_header[] = "as.literal.data, - node->as.literal.len); + if (options & CMARK_OPT_SMARTPUNCT) { + lastout = 0; + i = 0; + lit = node->as.literal; + while (i < lit.len) { + c = lit.data[i]; + // replace with efficient lookup table: + if (c != '"' && c != '-' && c != '\'' && c != '.') { + i++; + continue; + } + escape_html(html, lit.data + lastout, + i - lastout); + if (c == '\'' || c == '"') { + before_char = i == 0 ? ',' : lit.data[i - 1]; + after_char = i == lit.len - 1 ? ',' : lit.data[i + 1]; + left_flanking = !utf8proc_is_space(after_char) && + !(utf8proc_is_punctuation(after_char) && + !utf8proc_is_space(before_char) && + !utf8proc_is_punctuation(before_char)); + right_flanking = !utf8proc_is_space(before_char) && + !(utf8proc_is_punctuation(before_char) && + !utf8proc_is_space(after_char) && + !utf8proc_is_punctuation(after_char)); + } + switch (lit.data[i]) { + case '"': + if (right_flanking) { + cmark_strbuf_puts(html, "”"); + } else { + cmark_strbuf_puts(html, "“"); + } + i += 1; + break; + case '\'': + if (left_flanking && !right_flanking) { + cmark_strbuf_puts(html, "‘"); + } else { + cmark_strbuf_puts(html, "’"); + } + i += 1; + break; + case '-': + if (i < lit.len - 1 && lit.data[i + 1] == '-') { + if (lit.data[i + 2] == '-') { + cmark_strbuf_puts(html, + "—"); + i += 3; + } else { + cmark_strbuf_puts(html, "–"); + i += 2; + } + } else { + cmark_strbuf_putc(html, c); + i += 1; + } + break; + case '.': + if (i < lit.len - 2 && lit.data[i + 1] == '.' && + lit.data[i + 2] == '.') { + cmark_strbuf_puts(html, + "…"); + i += 3; + } else { + cmark_strbuf_putc(html, c); + i += 1; + } + break; + default: + cmark_strbuf_putc(html, c); + i++; + } + lastout = i; + } + escape_html(html, node->as.literal.data + lastout, + i - lastout); + + } else { + escape_html(html, node->as.literal.data, + node->as.literal.len); + } break; case CMARK_NODE_LINEBREAK: -- cgit v1.2.3