From 52c591d75433b16cf32f4fae319ccb60b20f6ae7 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 7 Jan 2015 11:17:24 -0800 Subject: cmark: Add function & option to normalize text nodes. So, instead of Hi & lo we get Hi&lo * Added exported `cmark_consolidate_text_nodes` function. * Added `CMARK_OPT_NORMALIZE` to options. * Added optional normalization in XML writer. * Added `--normalize` option to command-line program. * Updated man page. --- man/man1/cmark.1 | 9 +++++++++ src/cmark.h | 9 +++++++++ src/iterator.c | 31 +++++++++++++++++++++++++++++++ src/main.c | 3 +++ src/xml.c | 5 +++++ 5 files changed, 57 insertions(+) diff --git a/man/man1/cmark.1 b/man/man1/cmark.1 index 2d5af77..c425b8c 100644 --- a/man/man1/cmark.1 +++ b/man/man1/cmark.1 @@ -23,6 +23,15 @@ concatenated before parsing. \-\-to, \-t \f[I]FORMAT\f[] Specify output format (\f[C]html\f[], \f[C]man\f[], \f[C]ast\f[]). .TP 12n +\-\-sourcepos +Include source position attribute. +.TP 12n +\-\-hardbreaks +Treat newlines as hard line breaks. +.TP 12n +\-\-normalize +Consolidate adjacent text nodes. +.TP 12n \-\-help Print usage information. .TP 12n diff --git a/src/cmark.h b/src/cmark.h index 2051a04..45bc338 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -356,6 +356,11 @@ cmark_node_prepend_child(cmark_node *node, cmark_node *child); CMARK_EXPORT int cmark_node_append_child(cmark_node *node, cmark_node *child); +/** Consolidates adjacent text nodes. + */ +CMARK_EXPORT void +cmark_consolidate_text_nodes(cmark_node *root); + /** * ## Parsing * @@ -441,6 +446,10 @@ char *cmark_render_man(cmark_node *root, long options); */ #define CMARK_OPT_HARDBREAKS 2 +/** Normalize tree by consolidating adjacent text nodes. + */ +#define CMARK_OPT_NORMALIZE 4 + /** # AUTHORS * * John MacFarlane, Vicent Marti, Kārlis Gaņģis, Nick Wellnhofer. diff --git a/src/iterator.c b/src/iterator.c index 6ebc9af..b0ac9d2 100644 --- a/src/iterator.c +++ b/src/iterator.c @@ -84,3 +84,34 @@ cmark_iter_get_node(cmark_iter *iter) return cur; } + + +void cmark_consolidate_text_nodes(cmark_node *root) +{ + cmark_iter *iter = cmark_iter_new(root); + cmark_strbuf buf = GH_BUF_INIT; + cmark_event_type ev_type; + cmark_node *cur, *tmp, *next; + + while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { + cur = cmark_iter_get_node(iter); + if (ev_type == CMARK_EVENT_ENTER && + cur->type == CMARK_NODE_TEXT && + cur->next && + cur->next->type == CMARK_NODE_TEXT) { + cmark_strbuf_clear(&buf); + cmark_strbuf_puts(&buf, cmark_node_get_literal(cur)); + tmp = cur->next; + while (tmp && tmp->type == CMARK_NODE_TEXT) { + cmark_iter_get_node(iter); // advance pointer + cmark_strbuf_puts(&buf, cmark_node_get_literal(tmp)); + next = tmp->next; + cmark_node_free(tmp); + tmp = next; + } + cmark_node_set_literal(cur, (char *)cmark_strbuf_detach(&buf)); + } + } + + cmark_iter_free(iter); +} diff --git a/src/main.c b/src/main.c index eaef246..5855868 100644 --- a/src/main.c +++ b/src/main.c @@ -26,6 +26,7 @@ void print_usage() printf(" --to, -t FORMAT Specify output format (html, xml, man)\n"); printf(" --sourcepos Include source position attribute\n"); printf(" --hardbreaks Treat newlines as hard line breaks\n"); + printf(" --normalize Consolidate adjacent text nodes\n"); printf(" --help, -h Print usage information\n"); printf(" --version Print version\n"); } @@ -79,6 +80,8 @@ int main(int argc, char *argv[]) options |= CMARK_OPT_SOURCEPOS; } else if (strcmp(argv[i], "--hardbreaks") == 0) { options |= CMARK_OPT_HARDBREAKS; + } else if (strcmp(argv[i], "--normalize") == 0) { + options |= CMARK_OPT_NORMALIZE; } else if ((strcmp(argv[i], "--help") == 0) || (strcmp(argv[i], "-h") == 0)) { print_usage(); diff --git a/src/xml.c b/src/xml.c index 2ae900c..93a6eda 100644 --- a/src/xml.c +++ b/src/xml.c @@ -123,6 +123,11 @@ char *cmark_render_xml(cmark_node *root, long options) cmark_event_type ev_type; cmark_node *cur; struct render_state state = { &xml, 0 }; + + if (options & CMARK_OPT_NORMALIZE) { + cmark_consolidate_text_nodes(root); + } + cmark_iter *iter = cmark_iter_new(root); cmark_strbuf_puts(state.xml, -- cgit v1.2.3