summaryrefslogtreecommitdiff
path: root/src/xml.c
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2014-12-27 21:51:30 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2014-12-28 16:27:00 -0800
commitd57f3952ca8b9aac16db8243539f4c1c5dbf3c93 (patch)
tree21ff4ae66cc5d6130963172df2badb3a77a4930e /src/xml.c
parentbf44064d09afd04039058a00c32c1532fb5e2b61 (diff)
Added xml writer, to dump the AST in XML.
This is a work-in-progress. CommonMark.dtd gives the DTD for the generated XML. Closes #53.
Diffstat (limited to 'src/xml.c')
-rw-r--r--src/xml.c140
1 files changed, 140 insertions, 0 deletions
diff --git a/src/xml.c b/src/xml.c
new file mode 100644
index 0000000..86fb6d4
--- /dev/null
+++ b/src/xml.c
@@ -0,0 +1,140 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "config.h"
+#include "cmark.h"
+#include "node.h"
+#include "buffer.h"
+#include "houdini.h"
+
+// Functions to convert cmark_nodes to XML strings.
+
+static void escape_xml(cmark_strbuf *dest, const unsigned char *source, int length)
+{
+ if (source != NULL) {
+ if (length < 0)
+ length = strlen((char *)source);
+
+ houdini_escape_html0(dest, source, (size_t)length, 0);
+ }
+}
+
+struct render_state {
+ cmark_strbuf* xml;
+ int indent;
+};
+
+static inline void indent(struct render_state *state)
+{
+ int i;
+ for (i = 0; i < state->indent; i++) {
+ cmark_strbuf_putc(state->xml, ' ');
+ }
+}
+
+static int
+S_render_node(cmark_node *node, cmark_event_type ev_type, void *vstate)
+{
+ struct render_state *state = vstate;
+ cmark_strbuf *xml = state->xml;
+ bool literal = false;
+
+ bool entering = (ev_type == CMARK_EVENT_ENTER);
+
+ if (entering) {
+ indent(state);
+ cmark_strbuf_printf(xml, "<%s", cmark_node_type_string(node));
+
+ if (node->start_line != 0) {
+ cmark_strbuf_printf(xml, " sourcepos=\"%d:%d-%d\"",
+ node->start_line,
+ node->start_column,
+ node->end_line);
+ }
+
+ literal = false;
+
+ switch (node->type) {
+ case CMARK_NODE_TEXT:
+ case CMARK_NODE_CODE:
+ case CMARK_NODE_HTML:
+ case CMARK_NODE_INLINE_HTML:
+ cmark_strbuf_puts(xml, ">");
+ escape_xml(xml, node->as.literal.data,
+ node->as.literal.len);
+ cmark_strbuf_puts(xml, "</");
+ cmark_strbuf_puts(xml, cmark_node_type_string(node));
+ literal = true;
+ break;
+ case CMARK_NODE_CODE_BLOCK:
+ if (node->as.code.info.len > 0) {
+ cmark_strbuf_puts(xml, " info=\"");
+ escape_xml(xml, node->as.code.info.data,
+ node->as.code.info.len);
+ cmark_strbuf_putc(xml, '"');
+ }
+ cmark_strbuf_puts(xml, ">");
+ escape_xml(xml, node->as.code.literal.data,
+ node->as.code.literal.len);
+ cmark_strbuf_puts(xml, "</");
+ cmark_strbuf_puts(xml, cmark_node_type_string(node));
+ literal = true;
+ break;
+ case CMARK_NODE_LINK:
+ case CMARK_NODE_IMAGE:
+ cmark_strbuf_puts(xml, " url=\"");
+ escape_xml(xml, node->as.link.url, -1);
+ cmark_strbuf_putc(xml, '"');
+ cmark_strbuf_puts(xml, " title=\"");
+ escape_xml(xml, node->as.link.title, -1);
+ cmark_strbuf_putc(xml, '"');
+ break;
+ default:
+ break;
+ }
+ if (node->first_child) {
+ state->indent += 2;
+ } else if (!literal) {
+ cmark_strbuf_puts(xml, " /");
+ }
+
+ } else {
+ if (node->first_child) {
+ state->indent -= 2;
+ }
+ indent(state);
+ cmark_strbuf_printf(xml, "</%s", cmark_node_type_string(node));
+ }
+
+ // TODO print attributes
+
+ cmark_strbuf_puts(xml, ">\n");
+
+ return 1;
+}
+
+char *cmark_render_xml(cmark_node *root)
+{
+ char *result;
+ cmark_strbuf xml = GH_BUF_INIT;
+ cmark_event_type ev_type;
+ cmark_node *cur;
+ struct render_state state = { &xml, 0 };
+ cmark_iter *iter = cmark_iter_new(root);
+
+ cmark_strbuf_puts(state.xml,
+ "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
+ cmark_strbuf_puts(state.xml,
+ "<!DOCTYPE CommonMark SYSTEM \"CommonMark.dtd\">\n");
+ while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
+ cur = cmark_iter_get_node(iter);
+ S_render_node(cur, ev_type, &state);
+ }
+ result = (char *)cmark_strbuf_detach(&xml);
+
+ cmark_iter_free(iter);
+ cmark_strbuf_free(&xml);
+ return result;
+}