summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2015-02-14 16:36:43 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2015-02-14 17:52:55 -0800
commitbb26b18173df983c57459809e8b1691b89907a58 (patch)
treea7878c0960755c79bf79a310363f20a91c2c87d4
parentc6417fc0b9cd240eb175501d44f68ea9d4406ec4 (diff)
Added CMARK_OPT_SMARTPUNCT and --smart option.
So far this is only implemented for the HTML renderer. Ultimately some of this should be factored out into a form that can be used in other renderers.
-rw-r--r--man/man3/cmark.314
-rw-r--r--src/cmark.h4
-rw-r--r--src/html.c88
-rw-r--r--src/main.c3
4 files changed, 106 insertions, 3 deletions
diff --git a/man/man3/cmark.3 b/man/man3/cmark.3
index 2c4dd14..c8fc4c7 100644
--- a/man/man3/cmark.3
+++ b/man/man3/cmark.3
@@ -1,4 +1,4 @@
-.TH cmark 3 "January 28, 2015" "LOCAL" "Library Functions Manual"
+.TH cmark 3 "February 14, 2015" "LOCAL" "Library Functions Manual"
.SH
NAME
.PP
@@ -520,6 +520,18 @@ Render \f[C]softbreak\f[] elements as hard line breaks.
.PP
Normalize tree by consolidating adjacent text nodes.
+.PP
+.nf
+\fC
+.RS 0n
+#define CMARK_OPT_SMARTPUNCT 8
+.RE
+\f[]
+.fi
+
+.PP
+Convert straight quotes to curly, \-\-\- to em dashes, \-\- to en dashes.
+
.SS
Version information
diff --git a/src/cmark.h b/src/cmark.h
index 9f312bc..f106371 100644
--- a/src/cmark.h
+++ b/src/cmark.h
@@ -496,6 +496,10 @@ char *cmark_render_man(cmark_node *root, long options);
*/
#define CMARK_OPT_NORMALIZE 4
+/** Convert straight quotes to curly, --- to em dashes, -- to en dashes.
+ */
+#define CMARK_OPT_SMARTPUNCT 8
+
/**
* ## Version information
*/
diff --git a/src/html.c b/src/html.c
index 8ccb495..c8cc9fb 100644
--- a/src/html.c
+++ b/src/html.c
@@ -6,6 +6,7 @@
#include "config.h"
#include "cmark.h"
#include "node.h"
+#include "utf8.h"
#include "buffer.h"
#include "houdini.h"
@@ -60,6 +61,10 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
char start_header[] = "<h0";
char end_header[] = "</h0";
bool tight;
+ int lastout, i;
+ cmark_chunk lit;
+ char before_char, after_char, c;
+ bool left_flanking, right_flanking;
bool entering = (ev_type == CMARK_EVENT_ENTER);
@@ -217,8 +222,87 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
break;
case CMARK_NODE_TEXT:
- escape_html(html, node->as.literal.data,
- node->as.literal.len);
+ if (options & CMARK_OPT_SMARTPUNCT) {
+ lastout = 0;
+ i = 0;
+ lit = node->as.literal;
+ while (i < lit.len) {
+ c = lit.data[i];
+ // replace with efficient lookup table:
+ if (c != '"' && c != '-' && c != '\'' && c != '.') {
+ i++;
+ continue;
+ }
+ escape_html(html, lit.data + lastout,
+ i - lastout);
+ if (c == '\'' || c == '"') {
+ before_char = i == 0 ? ',' : lit.data[i - 1];
+ after_char = i == lit.len - 1 ? ',' : lit.data[i + 1];
+ left_flanking = !utf8proc_is_space(after_char) &&
+ !(utf8proc_is_punctuation(after_char) &&
+ !utf8proc_is_space(before_char) &&
+ !utf8proc_is_punctuation(before_char));
+ right_flanking = !utf8proc_is_space(before_char) &&
+ !(utf8proc_is_punctuation(before_char) &&
+ !utf8proc_is_space(after_char) &&
+ !utf8proc_is_punctuation(after_char));
+ }
+ switch (lit.data[i]) {
+ case '"':
+ if (right_flanking) {
+ cmark_strbuf_puts(html, "&rdquo;");
+ } else {
+ cmark_strbuf_puts(html, "&ldquo;");
+ }
+ i += 1;
+ break;
+ case '\'':
+ if (left_flanking && !right_flanking) {
+ cmark_strbuf_puts(html, "&lsquo;");
+ } else {
+ cmark_strbuf_puts(html, "&rsquo;");
+ }
+ i += 1;
+ break;
+ case '-':
+ if (i < lit.len - 1 && lit.data[i + 1] == '-') {
+ if (lit.data[i + 2] == '-') {
+ cmark_strbuf_puts(html,
+ "&mdash;");
+ i += 3;
+ } else {
+ cmark_strbuf_puts(html, "&ndash;");
+ i += 2;
+ }
+ } else {
+ cmark_strbuf_putc(html, c);
+ i += 1;
+ }
+ break;
+ case '.':
+ if (i < lit.len - 2 && lit.data[i + 1] == '.' &&
+ lit.data[i + 2] == '.') {
+ cmark_strbuf_puts(html,
+ "&hellip;");
+ i += 3;
+ } else {
+ cmark_strbuf_putc(html, c);
+ i += 1;
+ }
+ break;
+ default:
+ cmark_strbuf_putc(html, c);
+ i++;
+ }
+ lastout = i;
+ }
+ escape_html(html, node->as.literal.data + lastout,
+ i - lastout);
+
+ } else {
+ escape_html(html, node->as.literal.data,
+ node->as.literal.len);
+ }
break;
case CMARK_NODE_LINEBREAK:
diff --git a/src/main.c b/src/main.c
index ef40a88..3834c1f 100644
--- a/src/main.c
+++ b/src/main.c
@@ -26,6 +26,7 @@ void print_usage()
printf(" --to, -t FORMAT Specify output format (html, xml, man)\n");
printf(" --sourcepos Include source position attribute\n");
printf(" --hardbreaks Treat newlines as hard line breaks\n");
+ printf(" --smart Use smart punctuation\n");
printf(" --normalize Consolidate adjacent text nodes\n");
printf(" --help, -h Print usage information\n");
printf(" --version Print version\n");
@@ -80,6 +81,8 @@ int main(int argc, char *argv[])
options |= CMARK_OPT_SOURCEPOS;
} else if (strcmp(argv[i], "--hardbreaks") == 0) {
options |= CMARK_OPT_HARDBREAKS;
+ } else if (strcmp(argv[i], "--smart") == 0) {
+ options |= CMARK_OPT_SMARTPUNCT;
} else if (strcmp(argv[i], "--normalize") == 0) {
options |= CMARK_OPT_NORMALIZE;
} else if ((strcmp(argv[i], "--help") == 0) ||