diff options
| -rw-r--r-- | man/man3/cmark.3 | 14 | ||||
| -rw-r--r-- | src/cmark.h | 4 | ||||
| -rw-r--r-- | src/html.c | 88 | ||||
| -rw-r--r-- | src/main.c | 3 | 
4 files changed, 106 insertions, 3 deletions
diff --git a/man/man3/cmark.3 b/man/man3/cmark.3 index 2c4dd14..c8fc4c7 100644 --- a/man/man3/cmark.3 +++ b/man/man3/cmark.3 @@ -1,4 +1,4 @@ -.TH cmark 3 "January 28, 2015" "LOCAL" "Library Functions Manual" +.TH cmark 3 "February 14, 2015" "LOCAL" "Library Functions Manual"  .SH  NAME  .PP @@ -520,6 +520,18 @@ Render \f[C]softbreak\f[] elements as hard line breaks.  .PP  Normalize tree by consolidating adjacent text nodes. +.PP +.nf +\fC +.RS 0n +#define CMARK_OPT_SMARTPUNCT 8 +.RE +\f[] +.fi + +.PP +Convert straight quotes to curly, \-\-\- to em dashes, \-\- to en dashes. +  .SS  Version information diff --git a/src/cmark.h b/src/cmark.h index 9f312bc..f106371 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -496,6 +496,10 @@ char *cmark_render_man(cmark_node *root, long options);   */  #define CMARK_OPT_NORMALIZE 4 +/** Convert straight quotes to curly, --- to em dashes, -- to en dashes. + */ +#define CMARK_OPT_SMARTPUNCT 8 +  /**   * ## Version information   */ @@ -6,6 +6,7 @@  #include "config.h"  #include "cmark.h"  #include "node.h" +#include "utf8.h"  #include "buffer.h"  #include "houdini.h" @@ -60,6 +61,10 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,  	char start_header[] = "<h0";  	char end_header[] = "</h0";  	bool tight; +	int lastout, i; +	cmark_chunk lit; +	char before_char, after_char, c; +	bool left_flanking, right_flanking;  	bool entering = (ev_type == CMARK_EVENT_ENTER); @@ -217,8 +222,87 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,  		break;  	case CMARK_NODE_TEXT: -		escape_html(html, node->as.literal.data, -		            node->as.literal.len); +		if (options & CMARK_OPT_SMARTPUNCT) { +			lastout = 0; +			i = 0; +			lit = node->as.literal; +			while (i < lit.len) { +				c = lit.data[i]; +				// replace with efficient lookup table: +				if (c != '"' && c != '-' && c != '\'' && c != '.') { +					i++; +					continue; +				} +				escape_html(html, lit.data + lastout, +					    i - lastout); +				if (c == '\'' || c == '"') { +					before_char = i == 0 ? ',' : lit.data[i - 1]; +					after_char = i == lit.len - 1 ? ',' : lit.data[i + 1]; +					left_flanking = !utf8proc_is_space(after_char) && +						!(utf8proc_is_punctuation(after_char) && +						  !utf8proc_is_space(before_char) && +						  !utf8proc_is_punctuation(before_char)); +					right_flanking = !utf8proc_is_space(before_char) && +						!(utf8proc_is_punctuation(before_char) && +						  !utf8proc_is_space(after_char) && +						  !utf8proc_is_punctuation(after_char)); +				} +				switch (lit.data[i]) { +				case '"': +					if (right_flanking) { +						cmark_strbuf_puts(html, "”"); +					} else { +						cmark_strbuf_puts(html, "“"); +					} +					i += 1; +					break; +				case '\'': +					if (left_flanking && !right_flanking) { +						cmark_strbuf_puts(html, "‘"); +					} else { +						cmark_strbuf_puts(html, "’"); +					} +					i += 1; +					break; +				case '-': +					if (i < lit.len - 1 && lit.data[i + 1] == '-') { +						if (lit.data[i + 2] == '-') { +							cmark_strbuf_puts(html, +									  "—"); +							i += 3; +						} else { +							cmark_strbuf_puts(html, "–"); +							i += 2; +						} +					} else { +						cmark_strbuf_putc(html, c); +						i += 1; +					} +					break; +				case '.': +					if (i < lit.len - 2 && lit.data[i + 1] == '.' && +					    lit.data[i + 2] == '.') { +							cmark_strbuf_puts(html, +									  "…"); +							i += 3; +					} else { +						cmark_strbuf_putc(html, c); +						i += 1; +					} +					break; +				default: +					cmark_strbuf_putc(html, c); +					i++; +				} +				lastout = i; +			} +			escape_html(html, node->as.literal.data + lastout, +				    i - lastout); + +		} else { +			escape_html(html, node->as.literal.data, +				    node->as.literal.len); +		}  		break;  	case CMARK_NODE_LINEBREAK: @@ -26,6 +26,7 @@ void print_usage()  	printf("  --to, -t FORMAT  Specify output format (html, xml, man)\n");  	printf("  --sourcepos      Include source position attribute\n");  	printf("  --hardbreaks     Treat newlines as hard line breaks\n"); +	printf("  --smart          Use smart punctuation\n");  	printf("  --normalize      Consolidate adjacent text nodes\n");  	printf("  --help, -h       Print usage information\n");  	printf("  --version        Print version\n"); @@ -80,6 +81,8 @@ int main(int argc, char *argv[])  			options |= CMARK_OPT_SOURCEPOS;  		} else if (strcmp(argv[i], "--hardbreaks") == 0) {  			options |= CMARK_OPT_HARDBREAKS; +		} else if (strcmp(argv[i], "--smart") == 0) { +			options |= CMARK_OPT_SMARTPUNCT;  		} else if (strcmp(argv[i], "--normalize") == 0) {  			options |= CMARK_OPT_NORMALIZE;  		} else if ((strcmp(argv[i], "--help") == 0) ||  | 
