summaryrefslogtreecommitdiff
path: root/src/smart.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/smart.c')
-rw-r--r--src/smart.c146
1 files changed, 146 insertions, 0 deletions
diff --git a/src/smart.c b/src/smart.c
new file mode 100644
index 0000000..54c9740
--- /dev/null
+++ b/src/smart.c
@@ -0,0 +1,146 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "config.h"
+#include "cmark.h"
+#include "node.h"
+#include "utf8.h"
+#include "buffer.h"
+#include "chunk.h"
+
+void escape_with_smart(cmark_strbuf *buf,
+ cmark_node *node,
+ void (*escape)(cmark_strbuf *, const unsigned char *, int),
+ const char *left_double_quote,
+ const char *right_double_quote,
+ const char *left_single_quote,
+ const char *right_single_quote,
+ const char *em_dash,
+ const char *en_dash,
+ const char *ellipses)
+{
+ int32_t c = 0;
+ int32_t after_char = 0;
+ int32_t before_char = 0;
+ int len;
+ bool left_flanking, right_flanking;
+ int lastout = 0;
+ int i = 0;
+ cmark_chunk lit = node->as.literal;
+
+ // set before_char based on previous text node if there is one:
+ if (node->prev) {
+ if (node->prev->type == CMARK_NODE_TEXT) {
+
+ // walk back to the beginning of the UTF_8 sequence:
+ i = node->prev->as.literal.len - 1;
+ while (i > 0 && node->prev->as.literal.data[i] >> 6 == 2) {
+ i -= 1;
+ }
+ len = utf8proc_iterate(node->prev->as.literal.data + i,
+ node->prev->as.literal.len - i,
+ &before_char);
+ if (len == -1) {
+ before_char = 10;
+ }
+
+ } else if (node->prev->type == CMARK_NODE_SOFTBREAK ||
+ node->prev->type == CMARK_NODE_LINEBREAK) {
+ before_char = 10;
+
+ } else {
+ before_char = 65;
+ }
+ } else {
+ before_char = 10;
+ }
+
+ while (i < lit.len) {
+ len = utf8proc_iterate(lit.data + i, lit.len - i, &c);
+ i += len;
+
+ // replace with efficient lookup table:
+ if (!(c == 34 || c == 39 || c == 45 || c == 46)) {
+ before_char = c;
+ continue;
+ }
+ (*escape)(buf, lit.data + lastout, i - len - lastout);
+
+ if (c == 34 || c == 39) {
+
+ if (i >= lit.len) {
+ if (node->next) {
+ if (node->next->type == CMARK_NODE_TEXT) {
+ utf8proc_iterate(node->next->as.literal.data,
+ node->next->as.literal.len,
+ &after_char);
+ } else if (node->next->type == CMARK_NODE_SOFTBREAK ||
+ node->next->type == CMARK_NODE_LINEBREAK) {
+ after_char = 10;
+ } else {
+ after_char = 65;
+ }
+ } else {
+ after_char = 10;
+ }
+ } else {
+ utf8proc_iterate(lit.data + i, lit.len - i, &after_char);
+ }
+
+ left_flanking = !utf8proc_is_space(after_char) &&
+ !(utf8proc_is_punctuation(after_char) &&
+ !utf8proc_is_space(before_char) &&
+ !utf8proc_is_punctuation(before_char));
+ right_flanking = !utf8proc_is_space(before_char) &&
+ !(utf8proc_is_punctuation(before_char) &&
+ !utf8proc_is_space(after_char) &&
+ !utf8proc_is_punctuation(after_char));
+ }
+
+ switch (c) {
+ case 34: // "
+ if (right_flanking) {
+ cmark_strbuf_puts(buf, right_double_quote);
+ } else {
+ cmark_strbuf_puts(buf, left_double_quote);
+ }
+ break;
+ case 39: // '
+ if (left_flanking && !right_flanking) {
+ cmark_strbuf_puts(buf, left_single_quote);
+ } else {
+ cmark_strbuf_puts(buf, right_single_quote);
+ }
+ break;
+ case 45: // -
+ if (i < lit.len && lit.data[i] == '-') {
+ if (lit.data[i + 1] == '-') {
+ cmark_strbuf_puts(buf, em_dash);
+ i += 2;
+ } else {
+ cmark_strbuf_puts(buf, en_dash);
+ i += 1;
+ }
+ } else {
+ cmark_strbuf_putc(buf, c);
+ }
+ break;
+ case 46: // .
+ if (i < lit.len - 1 && lit.data[i] == '.' &&
+ lit.data[i + 1] == '.') {
+ cmark_strbuf_puts(buf, ellipses);
+ i += 2;
+ } else {
+ cmark_strbuf_putc(buf, c);
+ }
+ break;
+ default:
+ cmark_strbuf_putc(buf, c);
+ }
+ lastout = i;
+ }
+ (*escape)(buf, node->as.literal.data + lastout, lit.len - lastout);
+
+}