summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2015-01-07 11:17:24 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2015-01-07 11:23:29 -0800
commit52c591d75433b16cf32f4fae319ccb60b20f6ae7 (patch)
treeaf0e956b72dfbc3883252f48d3f2138b55aca367 /src
parenta29c16c5e283fb50ecd318477072687caf987d4a (diff)
cmark: Add function & option to normalize text nodes.
So, instead of <text>Hi</text> <text>&amp;</text> <text>lo</text> we get <text>Hi&amp;lo</text> * Added exported `cmark_consolidate_text_nodes` function. * Added `CMARK_OPT_NORMALIZE` to options. * Added optional normalization in XML writer. * Added `--normalize` option to command-line program. * Updated man page.
Diffstat (limited to 'src')
-rw-r--r--src/cmark.h9
-rw-r--r--src/iterator.c31
-rw-r--r--src/main.c3
-rw-r--r--src/xml.c5
4 files changed, 48 insertions, 0 deletions
diff --git a/src/cmark.h b/src/cmark.h
index 2051a04..45bc338 100644
--- a/src/cmark.h
+++ b/src/cmark.h
@@ -356,6 +356,11 @@ cmark_node_prepend_child(cmark_node *node, cmark_node *child);
CMARK_EXPORT int
cmark_node_append_child(cmark_node *node, cmark_node *child);
+/** Consolidates adjacent text nodes.
+ */
+CMARK_EXPORT void
+cmark_consolidate_text_nodes(cmark_node *root);
+
/**
* ## Parsing
*
@@ -441,6 +446,10 @@ char *cmark_render_man(cmark_node *root, long options);
*/
#define CMARK_OPT_HARDBREAKS 2
+/** Normalize tree by consolidating adjacent text nodes.
+ */
+#define CMARK_OPT_NORMALIZE 4
+
/** # AUTHORS
*
* John MacFarlane, Vicent Marti, Kārlis Gaņģis, Nick Wellnhofer.
diff --git a/src/iterator.c b/src/iterator.c
index 6ebc9af..b0ac9d2 100644
--- a/src/iterator.c
+++ b/src/iterator.c
@@ -84,3 +84,34 @@ cmark_iter_get_node(cmark_iter *iter)
return cur;
}
+
+
+void cmark_consolidate_text_nodes(cmark_node *root)
+{
+ cmark_iter *iter = cmark_iter_new(root);
+ cmark_strbuf buf = GH_BUF_INIT;
+ cmark_event_type ev_type;
+ cmark_node *cur, *tmp, *next;
+
+ while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
+ cur = cmark_iter_get_node(iter);
+ if (ev_type == CMARK_EVENT_ENTER &&
+ cur->type == CMARK_NODE_TEXT &&
+ cur->next &&
+ cur->next->type == CMARK_NODE_TEXT) {
+ cmark_strbuf_clear(&buf);
+ cmark_strbuf_puts(&buf, cmark_node_get_literal(cur));
+ tmp = cur->next;
+ while (tmp && tmp->type == CMARK_NODE_TEXT) {
+ cmark_iter_get_node(iter); // advance pointer
+ cmark_strbuf_puts(&buf, cmark_node_get_literal(tmp));
+ next = tmp->next;
+ cmark_node_free(tmp);
+ tmp = next;
+ }
+ cmark_node_set_literal(cur, (char *)cmark_strbuf_detach(&buf));
+ }
+ }
+
+ cmark_iter_free(iter);
+}
diff --git a/src/main.c b/src/main.c
index eaef246..5855868 100644
--- a/src/main.c
+++ b/src/main.c
@@ -26,6 +26,7 @@ void print_usage()
printf(" --to, -t FORMAT Specify output format (html, xml, man)\n");
printf(" --sourcepos Include source position attribute\n");
printf(" --hardbreaks Treat newlines as hard line breaks\n");
+ printf(" --normalize Consolidate adjacent text nodes\n");
printf(" --help, -h Print usage information\n");
printf(" --version Print version\n");
}
@@ -79,6 +80,8 @@ int main(int argc, char *argv[])
options |= CMARK_OPT_SOURCEPOS;
} else if (strcmp(argv[i], "--hardbreaks") == 0) {
options |= CMARK_OPT_HARDBREAKS;
+ } else if (strcmp(argv[i], "--normalize") == 0) {
+ options |= CMARK_OPT_NORMALIZE;
} else if ((strcmp(argv[i], "--help") == 0) ||
(strcmp(argv[i], "-h") == 0)) {
print_usage();
diff --git a/src/xml.c b/src/xml.c
index 2ae900c..93a6eda 100644
--- a/src/xml.c
+++ b/src/xml.c
@@ -123,6 +123,11 @@ char *cmark_render_xml(cmark_node *root, long options)
cmark_event_type ev_type;
cmark_node *cur;
struct render_state state = { &xml, 0 };
+
+ if (options & CMARK_OPT_NORMALIZE) {
+ cmark_consolidate_text_nodes(root);
+ }
+
cmark_iter *iter = cmark_iter_new(root);
cmark_strbuf_puts(state.xml,