From 325a1471d2a32bcc1e2d2580b973ff4ba1df85e8 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 17 Mar 2019 22:43:38 -0700 Subject: Make rendering safe by default. Removes CMARK_OPT_SAFE from options. Adds CMARK_OPT_UNSAFE, with the opposite meaning. The new default behavior is to suppress raw HTML and potentially dangerous links. The CMARK_OPT_UNSAFE option has to be set explicitly to prevent this. -------------------------------------------------------- NOTE: This change will require modifications in bindings for cmark and in most libraries and programs that use cmark. -------------------------------------------------------- Closes #239, #273. Borrows heavily from @kivikakk's patch in github/cmark-gfm#123. --- README.md | 12 ++++++------ api_test/main.c | 5 +++-- man/man3/cmark.3 | 10 +++++----- src/cmark.h | 10 +++++----- src/html.c | 12 ++++++------ src/main.c | 6 +++--- test/cmark-fuzz.c | 2 +- test/cmark.py | 4 +++- 8 files changed, 32 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index bd3694e..5f31ccc 100644 --- a/README.md +++ b/README.md @@ -156,14 +156,14 @@ be found in the man pages in the `man` subdirectory. Security -------- -By default, the library will pass through raw HTML and potentially +By default, the library will scrub raw HTML and potentially dangerous links (`javascript:`, `vbscript:`, `data:`, `file:`). -It is recommended that users either disable this potentially unsafe -feature by using the option `CMARK_OPT_SAFE` (or `--safe` with the -command-line program), or run the output through an HTML sanitizer -to protect against -[XSS attacks](http://en.wikipedia.org/wiki/Cross-site_scripting). +To allow these, use the option `CMARK_OPT_UNSAFE` (or +`--unsafe`) with the command line program. If doing so, we +recommend you use a HTML sanitizer specific to your needs to +protect against [XSS +attacks](http://en.wikipedia.org/wiki/Cross-site_scripting). Contributing ------------ diff --git a/api_test/main.c b/api_test/main.c index 1f1f77f..83afbff 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -177,7 +177,8 @@ static void accessors(test_batch_runner *runner) { OK(runner, cmark_node_set_literal(string, literal + sizeof("prefix")), "set_literal suffix"); - char *rendered_html = cmark_render_html(doc, CMARK_OPT_DEFAULT); + char *rendered_html = cmark_render_html(doc, + CMARK_OPT_DEFAULT | CMARK_OPT_UNSAFE); static const char expected_html[] = "

Header

\n" "
    \n" @@ -859,7 +860,7 @@ static void test_safe(test_batch_runner *runner) { "a>\n[link](JAVAscript:alert('hi'))\n![image](" "file:my.js)\n"; char *html = cmark_markdown_to_html(raw_html, sizeof(raw_html) - 1, - CMARK_OPT_DEFAULT | CMARK_OPT_SAFE); + CMARK_OPT_DEFAULT); STR_EQ(runner, html, "\n

    hi\nlink\n\"image\""); } else { cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len); @@ -242,7 +242,7 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, break; case CMARK_NODE_HTML_INLINE: - if (options & CMARK_OPT_SAFE) { + if (!(options & CMARK_OPT_UNSAFE)) { cmark_strbuf_puts(html, ""); } else { cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len); @@ -278,8 +278,8 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, case CMARK_NODE_LINK: if (entering) { cmark_strbuf_puts(html, "as.link.url, 0))) { + if ((options & CMARK_OPT_UNSAFE) || + !(scan_dangerous_url(&node->as.link.url, 0))) { houdini_escape_href(html, node->as.link.url.data, node->as.link.url.len); } @@ -296,8 +296,8 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, case CMARK_NODE_IMAGE: if (entering) { cmark_strbuf_puts(html, "as.link.url, 0))) { + if ((options & CMARK_OPT_UNSAFE) || + !(scan_dangerous_url(&node->as.link.url, 0))) { houdini_escape_href(html, node->as.link.url.data, node->as.link.url.len); } diff --git a/src/main.c b/src/main.c index 1094fee..29360dc 100644 --- a/src/main.c +++ b/src/main.c @@ -38,7 +38,7 @@ void print_usage() { printf(" --sourcepos Include source position attribute\n"); printf(" --hardbreaks Treat newlines as hard line breaks\n"); printf(" --nobreaks Render soft line breaks as spaces\n"); - printf(" --safe Suppress raw HTML and dangerous URLs\n"); + printf(" --unsafe Render raw HTML and dangerous URLs\n"); printf(" --smart Use smart punctuation\n"); printf(" --validate-utf8 Replace UTF-8 invalid sequences with U+FFFD\n"); printf(" --help, -h Print usage information\n"); @@ -112,8 +112,8 @@ int main(int argc, char *argv[]) { options |= CMARK_OPT_NOBREAKS; } else if (strcmp(argv[i], "--smart") == 0) { options |= CMARK_OPT_SMART; - } else if (strcmp(argv[i], "--safe") == 0) { - options |= CMARK_OPT_SAFE; + } else if (strcmp(argv[i], "--unsafe") == 0) { + options |= CMARK_OPT_UNSAFE; } else if (strcmp(argv[i], "--validate-utf8") == 0) { options |= CMARK_OPT_VALIDATE_UTF8; } else if ((strcmp(argv[i], "--help") == 0) || diff --git a/test/cmark-fuzz.c b/test/cmark-fuzz.c index 9bdd3a5..02c05bc 100644 --- a/test/cmark-fuzz.c +++ b/test/cmark-fuzz.c @@ -13,7 +13,7 @@ int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { memcpy(&fuzz_config, data, sizeof(fuzz_config)); /* Mask off valid option bits */ - fuzz_config.options &= (CMARK_OPT_SOURCEPOS | CMARK_OPT_HARDBREAKS | CMARK_OPT_SAFE | CMARK_OPT_NOBREAKS | CMARK_OPT_NORMALIZE | CMARK_OPT_VALIDATE_UTF8 | CMARK_OPT_SMART); + fuzz_config.options &= (CMARK_OPT_SOURCEPOS | CMARK_OPT_HARDBREAKS | CMARK_OPT_UNSAFE | CMARK_OPT_NOBREAKS | CMARK_OPT_NORMALIZE | CMARK_OPT_VALIDATE_UTF8 | CMARK_OPT_SMART); /* Remainder of input is the markdown */ const char *markdown = (const char *)(data + sizeof(fuzz_config)); diff --git a/test/cmark.py b/test/cmark.py index 4be85a3..38d2f59 100644 --- a/test/cmark.py +++ b/test/cmark.py @@ -17,7 +17,8 @@ def to_html(lib, text): markdown.argtypes = [c_char_p, c_size_t, c_int] textbytes = text.encode('utf-8') textlen = len(textbytes) - result = markdown(textbytes, textlen, 0).decode('utf-8') + # 1 << 17 == CMARK_OPT_UNSAFE + result = markdown(textbytes, textlen, 1 << 17).decode('utf-8') return [0, result, ''] def to_commonmark(lib, text): @@ -37,6 +38,7 @@ class CMark: def __init__(self, prog=None, library_dir=None): self.prog = prog if prog: + prog += ' --unsafe' self.to_html = lambda x: pipe_through_prog(prog, x) self.to_commonmark = lambda x: pipe_through_prog(prog + ' -t commonmark', x) else: -- cgit v1.2.3