#include #include #include #define CMARK_NO_SHORT_NAMES #include "cmark.h" #include "node.h" #include "harness.h" #include "cplusplus.h" #define UTF8_REPL "\xEF\xBF\xBD" static const cmark_node_type node_types[] = { CMARK_NODE_DOCUMENT, CMARK_NODE_BLOCK_QUOTE, CMARK_NODE_LIST, CMARK_NODE_ITEM, CMARK_NODE_CODE_BLOCK, CMARK_NODE_HTML_BLOCK, CMARK_NODE_PARAGRAPH, CMARK_NODE_HEADING, CMARK_NODE_THEMATIC_BREAK, CMARK_NODE_TEXT, CMARK_NODE_SOFTBREAK, CMARK_NODE_LINEBREAK, CMARK_NODE_CODE, CMARK_NODE_HTML_INLINE, CMARK_NODE_EMPH, CMARK_NODE_STRONG, CMARK_NODE_LINK, CMARK_NODE_IMAGE}; static const int num_node_types = sizeof(node_types) / sizeof(*node_types); static void test_md_to_html(test_batch_runner *runner, const char *markdown, const char *expected_html, const char *msg); static void test_content(test_batch_runner *runner, cmark_node_type type, int allowed_content); static void test_char(test_batch_runner *runner, int valid, const char *utf8, const char *msg); static void test_incomplete_char(test_batch_runner *runner, const char *utf8, const char *msg); static void test_continuation_byte(test_batch_runner *runner, const char *utf8); static void version(test_batch_runner *runner) { INT_EQ(runner, cmark_version(), CMARK_VERSION, "cmark_version"); STR_EQ(runner, cmark_version_string(), CMARK_VERSION_STRING, "cmark_version_string"); } static void constructor(test_batch_runner *runner) { for (int i = 0; i < num_node_types; ++i) { cmark_node_type type = node_types[i]; cmark_node *node = cmark_node_new(type); OK(runner, node != NULL, "new type %d", type); INT_EQ(runner, cmark_node_get_type(node), type, "get_type %d", type); switch (node->type) { case CMARK_NODE_HEADING: INT_EQ(runner, cmark_node_get_heading_level(node), 1, "default heading level is 1"); node->as.heading.level = 1; break; case CMARK_NODE_LIST: INT_EQ(runner, cmark_node_get_list_type(node), CMARK_BULLET_LIST, "default is list type is bullet"); INT_EQ(runner, cmark_node_get_list_delim(node), CMARK_NO_DELIM, "default is list delim is NO_DELIM"); INT_EQ(runner, cmark_node_get_list_start(node), 1, "default is list start is 1"); INT_EQ(runner, cmark_node_get_list_tight(node), 0, "default is list is loose"); break; default: break; } cmark_node_free(node); } } static void accessors(test_batch_runner *runner) { static const char markdown[] = "## Header\n" "\n" "* Item 1\n" "* Item 2\n" "\n" "2. Item 1\n" "\n" "3. Item 2\n" "\n" "\n" " code\n" "\n" "``` lang\n" "fenced\n" "```\n" "\n" "
html
\n" "\n" "[link](url 'title')\n"; cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); // Getters cmark_node *heading = cmark_node_first_child(doc); INT_EQ(runner, cmark_node_get_heading_level(heading), 2, "get_heading_level"); cmark_node *bullet_list = cmark_node_next(heading); INT_EQ(runner, cmark_node_get_list_type(bullet_list), CMARK_BULLET_LIST, "get_list_type bullet"); INT_EQ(runner, cmark_node_get_list_tight(bullet_list), 1, "get_list_tight tight"); cmark_node *ordered_list = cmark_node_next(bullet_list); INT_EQ(runner, cmark_node_get_list_type(ordered_list), CMARK_ORDERED_LIST, "get_list_type ordered"); INT_EQ(runner, cmark_node_get_list_delim(ordered_list), CMARK_PERIOD_DELIM, "get_list_delim ordered"); INT_EQ(runner, cmark_node_get_list_start(ordered_list), 2, "get_list_start"); INT_EQ(runner, cmark_node_get_list_tight(ordered_list), 0, "get_list_tight loose"); cmark_node *code = cmark_node_next(ordered_list); STR_EQ(runner, cmark_node_get_literal(code), "code\n", "get_literal indented code"); cmark_node *fenced = cmark_node_next(code); STR_EQ(runner, cmark_node_get_literal(fenced), "fenced\n", "get_literal fenced code"); STR_EQ(runner, cmark_node_get_fence_info(fenced), "lang", "get_fence_info"); cmark_node *html = cmark_node_next(fenced); STR_EQ(runner, cmark_node_get_literal(html), "
html
\n", "get_literal html"); cmark_node *paragraph = cmark_node_next(html); INT_EQ(runner, cmark_node_get_start_line(paragraph), 19, "get_start_line"); INT_EQ(runner, cmark_node_get_start_column(paragraph), 1, "get_start_column"); INT_EQ(runner, cmark_node_get_end_line(paragraph), 19, "get_end_line"); cmark_node *link = cmark_node_first_child(paragraph); STR_EQ(runner, cmark_node_get_url(link), "url", "get_url"); STR_EQ(runner, cmark_node_get_title(link), "title", "get_title"); cmark_node *string = cmark_node_first_child(link); STR_EQ(runner, cmark_node_get_literal(string), "link", "get_literal string"); // Setters OK(runner, cmark_node_set_heading_level(heading, 3), "set_heading_level"); OK(runner, cmark_node_set_list_type(bullet_list, CMARK_ORDERED_LIST), "set_list_type ordered"); OK(runner, cmark_node_set_list_delim(bullet_list, CMARK_PAREN_DELIM), "set_list_delim paren"); OK(runner, cmark_node_set_list_start(bullet_list, 3), "set_list_start"); OK(runner, cmark_node_set_list_tight(bullet_list, 0), "set_list_tight loose"); OK(runner, cmark_node_set_list_type(ordered_list, CMARK_BULLET_LIST), "set_list_type bullet"); OK(runner, cmark_node_set_list_tight(ordered_list, 1), "set_list_tight tight"); OK(runner, cmark_node_set_literal(code, "CODE\n"), "set_literal indented code"); OK(runner, cmark_node_set_literal(fenced, "FENCED\n"), "set_literal fenced code"); OK(runner, cmark_node_set_fence_info(fenced, "LANG"), "set_fence_info"); OK(runner, cmark_node_set_literal(html, "
HTML
\n"), "set_literal html"); OK(runner, cmark_node_set_url(link, "URL"), "set_url"); OK(runner, cmark_node_set_title(link, "TITLE"), "set_title"); OK(runner, cmark_node_set_literal(string, "LINK"), "set_literal string"); char *rendered_html = cmark_render_html(doc, CMARK_OPT_DEFAULT); static const char expected_html[] = "

Header

\n" "
    \n" "
  1. \n" "

    Item 1

    \n" "
  2. \n" "
  3. \n" "

    Item 2

    \n" "
  4. \n" "
\n" "
    \n" "
  • Item 1
  • \n" "
  • Item 2
  • \n" "
\n" "
CODE\n"
      "
\n" "
FENCED\n"
      "
\n" "
HTML
\n" "

LINK

\n"; STR_EQ(runner, rendered_html, expected_html, "setters work"); free(rendered_html); // Getter errors INT_EQ(runner, cmark_node_get_heading_level(bullet_list), 0, "get_heading_level error"); INT_EQ(runner, cmark_node_get_list_type(heading), CMARK_NO_LIST, "get_list_type error"); INT_EQ(runner, cmark_node_get_list_start(code), 0, "get_list_start error"); INT_EQ(runner, cmark_node_get_list_tight(fenced), 0, "get_list_tight error"); OK(runner, cmark_node_get_literal(ordered_list) == NULL, "get_literal error"); OK(runner, cmark_node_get_fence_info(paragraph) == NULL, "get_fence_info error"); OK(runner, cmark_node_get_url(html) == NULL, "get_url error"); OK(runner, cmark_node_get_title(heading) == NULL, "get_title error"); // Setter errors OK(runner, !cmark_node_set_heading_level(bullet_list, 3), "set_heading_level error"); OK(runner, !cmark_node_set_list_type(heading, CMARK_ORDERED_LIST), "set_list_type error"); OK(runner, !cmark_node_set_list_start(code, 3), "set_list_start error"); OK(runner, !cmark_node_set_list_tight(fenced, 0), "set_list_tight error"); OK(runner, !cmark_node_set_literal(ordered_list, "content\n"), "set_literal error"); OK(runner, !cmark_node_set_fence_info(paragraph, "lang"), "set_fence_info error"); OK(runner, !cmark_node_set_url(html, "url"), "set_url error"); OK(runner, !cmark_node_set_title(heading, "title"), "set_title error"); OK(runner, !cmark_node_set_heading_level(heading, 0), "set_heading_level too small"); OK(runner, !cmark_node_set_heading_level(heading, 7), "set_heading_level too large"); OK(runner, !cmark_node_set_list_type(bullet_list, CMARK_NO_LIST), "set_list_type invalid"); OK(runner, !cmark_node_set_list_start(bullet_list, -1), "set_list_start negative"); cmark_node_free(doc); } static void node_check(test_batch_runner *runner) { // Construct an incomplete tree. cmark_node *doc = cmark_node_new(CMARK_NODE_DOCUMENT); cmark_node *p1 = cmark_node_new(CMARK_NODE_PARAGRAPH); cmark_node *p2 = cmark_node_new(CMARK_NODE_PARAGRAPH); doc->first_child = p1; p1->next = p2; INT_EQ(runner, cmark_node_check(doc, NULL), 4, "node_check works"); INT_EQ(runner, cmark_node_check(doc, NULL), 0, "node_check fixes tree"); cmark_node_free(doc); } static void iterator(test_batch_runner *runner) { cmark_node *doc = cmark_parse_document("> a *b*\n\nc", 10, CMARK_OPT_DEFAULT); int parnodes = 0; cmark_event_type ev_type; cmark_iter *iter = cmark_iter_new(doc); cmark_node *cur; while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); if (cur->type == CMARK_NODE_PARAGRAPH && ev_type == CMARK_EVENT_ENTER) { parnodes += 1; } } INT_EQ(runner, parnodes, 2, "iterate correctly counts paragraphs"); cmark_iter_free(iter); cmark_node_free(doc); } static void iterator_delete(test_batch_runner *runner) { static const char md[] = "a *b* c\n" "\n" "* item1\n" "* item2\n" "\n" "a `b` c\n" "\n" "* item1\n" "* item2\n"; cmark_node *doc = cmark_parse_document(md, sizeof(md) - 1, CMARK_OPT_DEFAULT); cmark_iter *iter = cmark_iter_new(doc); cmark_event_type ev_type; while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cmark_node *node = cmark_iter_get_node(iter); // Delete list, emph, and code nodes. if ((ev_type == CMARK_EVENT_EXIT && node->type == CMARK_NODE_LIST) || (ev_type == CMARK_EVENT_EXIT && node->type == CMARK_NODE_EMPH) || (ev_type == CMARK_EVENT_ENTER && node->type == CMARK_NODE_CODE)) { cmark_node_free(node); } } char *html = cmark_render_html(doc, CMARK_OPT_DEFAULT); static const char expected[] = "

a c

\n" "

a c

\n"; STR_EQ(runner, html, expected, "iterate and delete nodes"); free(html); cmark_iter_free(iter); cmark_node_free(doc); } static void create_tree(test_batch_runner *runner) { char *html; cmark_node *doc = cmark_node_new(CMARK_NODE_DOCUMENT); cmark_node *p = cmark_node_new(CMARK_NODE_PARAGRAPH); OK(runner, !cmark_node_insert_before(doc, p), "insert before root fails"); OK(runner, !cmark_node_insert_after(doc, p), "insert after root fails"); OK(runner, cmark_node_append_child(doc, p), "append1"); INT_EQ(runner, cmark_node_check(doc, NULL), 0, "append1 consistent"); OK(runner, cmark_node_parent(p) == doc, "node_parent"); cmark_node *emph = cmark_node_new(CMARK_NODE_EMPH); OK(runner, cmark_node_prepend_child(p, emph), "prepend1"); INT_EQ(runner, cmark_node_check(doc, NULL), 0, "prepend1 consistent"); cmark_node *str1 = cmark_node_new(CMARK_NODE_TEXT); cmark_node_set_literal(str1, "Hello, "); OK(runner, cmark_node_prepend_child(p, str1), "prepend2"); INT_EQ(runner, cmark_node_check(doc, NULL), 0, "prepend2 consistent"); cmark_node *str3 = cmark_node_new(CMARK_NODE_TEXT); cmark_node_set_literal(str3, "!"); OK(runner, cmark_node_append_child(p, str3), "append2"); INT_EQ(runner, cmark_node_check(doc, NULL), 0, "append2 consistent"); cmark_node *str2 = cmark_node_new(CMARK_NODE_TEXT); cmark_node_set_literal(str2, "world"); OK(runner, cmark_node_append_child(emph, str2), "append3"); INT_EQ(runner, cmark_node_check(doc, NULL), 0, "append3 consistent"); html = cmark_render_html(doc, CMARK_OPT_DEFAULT); STR_EQ(runner, html, "

Hello, world!

\n", "render_html"); free(html); OK(runner, cmark_node_insert_before(str1, str3), "ins before1"); INT_EQ(runner, cmark_node_check(doc, NULL), 0, "ins before1 consistent"); // 31e OK(runner, cmark_node_first_child(p) == str3, "ins before1 works"); OK(runner, cmark_node_insert_before(str1, emph), "ins before2"); INT_EQ(runner, cmark_node_check(doc, NULL), 0, "ins before2 consistent"); // 3e1 OK(runner, cmark_node_last_child(p) == str1, "ins before2 works"); OK(runner, cmark_node_insert_after(str1, str3), "ins after1"); INT_EQ(runner, cmark_node_check(doc, NULL), 0, "ins after1 consistent"); // e13 OK(runner, cmark_node_next(str1) == str3, "ins after1 works"); OK(runner, cmark_node_insert_after(str1, emph), "ins after2"); INT_EQ(runner, cmark_node_check(doc, NULL), 0, "ins after2 consistent"); // 1e3 OK(runner, cmark_node_previous(emph) == str1, "ins after2 works"); cmark_node_unlink(emph); html = cmark_render_html(doc, CMARK_OPT_DEFAULT); STR_EQ(runner, html, "

Hello, !

\n", "render_html after shuffling"); free(html); cmark_node_free(doc); // TODO: Test that the contents of an unlinked inline are valid // after the parent block was destroyed. This doesn't work so far. cmark_node_free(emph); } static void custom_nodes(test_batch_runner *runner) { char *html; char *man; cmark_node *doc = cmark_node_new(CMARK_NODE_DOCUMENT); cmark_node *p = cmark_node_new(CMARK_NODE_PARAGRAPH); cmark_node_append_child(doc, p); cmark_node *ci = cmark_node_new(CMARK_NODE_CUSTOM_INLINE); cmark_node *str1 = cmark_node_new(CMARK_NODE_TEXT); cmark_node_set_literal(str1, "Hello"); OK(runner, cmark_node_append_child(ci, str1), "append1"); OK(runner, cmark_node_set_on_enter(ci, ""), "set_on_exit"); STR_EQ(runner, cmark_node_get_on_enter(ci), "", "get_on_exit"); cmark_node_append_child(p, ci); cmark_node *cb = cmark_node_new(CMARK_NODE_CUSTOM_BLOCK); cmark_node_set_on_enter(cb, "

\n\n CMARK_NODE_LAST_INLINE ? CMARK_NODE_LAST_BLOCK : CMARK_NODE_LAST_INLINE; OK(runner, max_node_type < 32, "all node types < 32"); int list_item_flag = 1 << CMARK_NODE_ITEM; int top_level_blocks = (1 << CMARK_NODE_BLOCK_QUOTE) | (1 << CMARK_NODE_LIST) | (1 << CMARK_NODE_CODE_BLOCK) | (1 << CMARK_NODE_HTML_BLOCK) | (1 << CMARK_NODE_PARAGRAPH) | (1 << CMARK_NODE_HEADING) | (1 << CMARK_NODE_THEMATIC_BREAK); int all_inlines = (1 << CMARK_NODE_TEXT) | (1 << CMARK_NODE_SOFTBREAK) | (1 << CMARK_NODE_LINEBREAK) | (1 << CMARK_NODE_CODE) | (1 << CMARK_NODE_HTML_INLINE) | (1 << CMARK_NODE_EMPH) | (1 << CMARK_NODE_STRONG) | (1 << CMARK_NODE_LINK) | (1 << CMARK_NODE_IMAGE); test_content(runner, CMARK_NODE_DOCUMENT, top_level_blocks); test_content(runner, CMARK_NODE_BLOCK_QUOTE, top_level_blocks); test_content(runner, CMARK_NODE_LIST, list_item_flag); test_content(runner, CMARK_NODE_ITEM, top_level_blocks); test_content(runner, CMARK_NODE_CODE_BLOCK, 0); test_content(runner, CMARK_NODE_HTML_BLOCK, 0); test_content(runner, CMARK_NODE_PARAGRAPH, all_inlines); test_content(runner, CMARK_NODE_HEADING, all_inlines); test_content(runner, CMARK_NODE_THEMATIC_BREAK, 0); test_content(runner, CMARK_NODE_TEXT, 0); test_content(runner, CMARK_NODE_SOFTBREAK, 0); test_content(runner, CMARK_NODE_LINEBREAK, 0); test_content(runner, CMARK_NODE_CODE, 0); test_content(runner, CMARK_NODE_HTML_INLINE, 0); test_content(runner, CMARK_NODE_EMPH, all_inlines); test_content(runner, CMARK_NODE_STRONG, all_inlines); test_content(runner, CMARK_NODE_LINK, all_inlines); test_content(runner, CMARK_NODE_IMAGE, all_inlines); } static void test_content(test_batch_runner *runner, cmark_node_type type, int allowed_content) { cmark_node *node = cmark_node_new(type); for (int i = 0; i < num_node_types; ++i) { cmark_node_type child_type = node_types[i]; cmark_node *child = cmark_node_new(child_type); int got = cmark_node_append_child(node, child); int expected = (allowed_content >> child_type) & 1; INT_EQ(runner, got, expected, "add %d as child of %d", child_type, type); cmark_node_free(child); } cmark_node_free(node); } static void parser(test_batch_runner *runner) { test_md_to_html(runner, "No newline", "

No newline

\n", "document without trailing newline"); } static void render_html(test_batch_runner *runner) { char *html; static const char markdown[] = "foo *bar*\n" "\n" "paragraph 2\n"; cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); cmark_node *paragraph = cmark_node_first_child(doc); html = cmark_render_html(paragraph, CMARK_OPT_DEFAULT); STR_EQ(runner, html, "

foo bar

\n", "render single paragraph"); free(html); cmark_node *string = cmark_node_first_child(paragraph); html = cmark_render_html(string, CMARK_OPT_DEFAULT); STR_EQ(runner, html, "foo ", "render single inline"); free(html); cmark_node *emph = cmark_node_next(string); html = cmark_render_html(emph, CMARK_OPT_DEFAULT); STR_EQ(runner, html, "bar", "render inline with children"); free(html); cmark_node_free(doc); } static void render_man(test_batch_runner *runner) { char *man; static const char markdown[] = "foo *bar*\n" "\n" "- Lorem ipsum dolor sit amet,\n" " consectetur adipiscing elit,\n" "- sed do eiusmod tempor incididunt\n" " ut labore et dolore magna aliqua.\n"; cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); man = cmark_render_man(doc, CMARK_OPT_DEFAULT, 20); STR_EQ(runner, man, ".PP\n" "foo \\f[I]bar\\f[]\n" ".IP \\[bu] 2\n" "Lorem ipsum dolor\n" "sit amet,\n" "consectetur\n" "adipiscing elit,\n" ".IP \\[bu] 2\n" "sed do eiusmod\n" "tempor incididunt ut\n" "labore et dolore\n" "magna aliqua.\n", "render document with wrapping"); free(man); man = cmark_render_man(doc, CMARK_OPT_DEFAULT, 0); STR_EQ(runner, man, ".PP\n" "foo \\f[I]bar\\f[]\n" ".IP \\[bu] 2\n" "Lorem ipsum dolor sit amet,\n" "consectetur adipiscing elit,\n" ".IP \\[bu] 2\n" "sed do eiusmod tempor incididunt\n" "ut labore et dolore magna aliqua.\n", "render document without wrapping"); free(man); cmark_node_free(doc); } static void render_latex(test_batch_runner *runner) { char *latex; static const char markdown[] = "foo *bar* $%\n" "\n" "- Lorem ipsum dolor sit amet,\n" " consectetur adipiscing elit,\n" "- sed do eiusmod tempor incididunt\n" " ut labore et dolore magna aliqua.\n"; cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); latex = cmark_render_latex(doc, CMARK_OPT_DEFAULT, 20); STR_EQ(runner, latex, "foo \\emph{bar} \\$\\%\n" "\n" "\\begin{itemize}\n" "\\item Lorem ipsum\n" "dolor sit amet,\n" "consectetur\n" "adipiscing elit,\n" "\n" "\\item sed do eiusmod\n" "tempor incididunt ut\n" "labore et dolore\n" "magna aliqua.\n" "\n" "\\end{itemize}\n", "render document with wrapping"); free(latex); latex = cmark_render_latex(doc, CMARK_OPT_DEFAULT, 0); STR_EQ(runner, latex, "foo \\emph{bar} \\$\\%\n" "\n" "\\begin{itemize}\n" "\\item Lorem ipsum dolor sit amet,\n" "consectetur adipiscing elit,\n" "\n" "\\item sed do eiusmod tempor incididunt\n" "ut labore et dolore magna aliqua.\n" "\n" "\\end{itemize}\n", "render document without wrapping"); free(latex); cmark_node_free(doc); } static void render_commonmark(test_batch_runner *runner) { char *commonmark; static const char markdown[] = "> \\- foo *bar* \\*bar\\*\n" "\n" "- Lorem ipsum dolor sit - amet,\n" " consectetur adipiscing elit,\n" "- sed do eiusmod tempor incididunt\n" " ut labore et dolore magna aliqua.\n"; cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); commonmark = cmark_render_commonmark(doc, CMARK_OPT_DEFAULT, 24); STR_EQ(runner, commonmark, "> \\- foo *bar* \\*bar\\*\n" "\n" "* Lorem ipsum dolor sit\n" " \\- amet, consectetur\n" " adipiscing elit,\n" "* sed do eiusmod tempor\n" " incididunt ut labore\n" " et dolore magna\n" " aliqua.\n", "render document with wrapping"); free(commonmark); commonmark = cmark_render_commonmark(doc, CMARK_OPT_DEFAULT, 0); STR_EQ(runner, commonmark, "> \\- foo *bar* \\*bar\\*\n" "\n" "* Lorem ipsum dolor sit - amet,\n" " consectetur adipiscing elit,\n" "* sed do eiusmod tempor incididunt\n" " ut labore et dolore magna aliqua.\n", "render document without wrapping"); free(commonmark); cmark_node_free(doc); } static void utf8(test_batch_runner *runner) { // Ranges test_char(runner, 1, "\x01", "valid utf8 01"); test_char(runner, 1, "\x7F", "valid utf8 7F"); test_char(runner, 0, "\x80", "invalid utf8 80"); test_char(runner, 0, "\xBF", "invalid utf8 BF"); test_char(runner, 0, "\xC0\x80", "invalid utf8 C080"); test_char(runner, 0, "\xC1\xBF", "invalid utf8 C1BF"); test_char(runner, 1, "\xC2\x80", "valid utf8 C280"); test_char(runner, 1, "\xDF\xBF", "valid utf8 DFBF"); test_char(runner, 0, "\xE0\x80\x80", "invalid utf8 E08080"); test_char(runner, 0, "\xE0\x9F\xBF", "invalid utf8 E09FBF"); test_char(runner, 1, "\xE0\xA0\x80", "valid utf8 E0A080"); test_char(runner, 1, "\xED\x9F\xBF", "valid utf8 ED9FBF"); test_char(runner, 0, "\xED\xA0\x80", "invalid utf8 EDA080"); test_char(runner, 0, "\xED\xBF\xBF", "invalid utf8 EDBFBF"); test_char(runner, 0, "\xF0\x80\x80\x80", "invalid utf8 F0808080"); test_char(runner, 0, "\xF0\x8F\xBF\xBF", "invalid utf8 F08FBFBF"); test_char(runner, 1, "\xF0\x90\x80\x80", "valid utf8 F0908080"); test_char(runner, 1, "\xF4\x8F\xBF\xBF", "valid utf8 F48FBFBF"); test_char(runner, 0, "\xF4\x90\x80\x80", "invalid utf8 F4908080"); test_char(runner, 0, "\xF7\xBF\xBF\xBF", "invalid utf8 F7BFBFBF"); test_char(runner, 0, "\xF8", "invalid utf8 F8"); test_char(runner, 0, "\xFF", "invalid utf8 FF"); // Incomplete byte sequences at end of input test_incomplete_char(runner, "\xE0\xA0", "invalid utf8 E0A0"); test_incomplete_char(runner, "\xF0\x90\x80", "invalid utf8 F09080"); // Invalid continuation bytes test_continuation_byte(runner, "\xC2\x80"); test_continuation_byte(runner, "\xE0\xA0\x80"); test_continuation_byte(runner, "\xF0\x90\x80\x80"); // Test string containing null character static const char string_with_null[] = "((((\0))))"; char *html = cmark_markdown_to_html( string_with_null, sizeof(string_with_null) - 1, CMARK_OPT_DEFAULT); STR_EQ(runner, html, "

((((" UTF8_REPL "))))

\n", "utf8 with U+0000"); free(html); } static void test_char(test_batch_runner *runner, int valid, const char *utf8, const char *msg) { char buf[20]; sprintf(buf, "((((%s))))", utf8); if (valid) { char expected[30]; sprintf(expected, "

((((%s))))

\n", utf8); test_md_to_html(runner, buf, expected, msg); } else { test_md_to_html(runner, buf, "

((((" UTF8_REPL "))))

\n", msg); } } static void test_incomplete_char(test_batch_runner *runner, const char *utf8, const char *msg) { char buf[20]; sprintf(buf, "----%s", utf8); test_md_to_html(runner, buf, "

----" UTF8_REPL "

\n", msg); } static void test_continuation_byte(test_batch_runner *runner, const char *utf8) { size_t len = strlen(utf8); for (size_t pos = 1; pos < len; ++pos) { char buf[20]; sprintf(buf, "((((%s))))", utf8); buf[4 + pos] = '\x20'; char expected[50]; strcpy(expected, "

((((" UTF8_REPL "\x20"); for (size_t i = pos + 1; i < len; ++i) { strcat(expected, UTF8_REPL); } strcat(expected, "))))

\n"); char *html = cmark_markdown_to_html(buf, strlen(buf), CMARK_OPT_VALIDATE_UTF8); STR_EQ(runner, html, expected, "invalid utf8 continuation byte %d/%d", pos, len); free(html); } } static void line_endings(test_batch_runner *runner) { // Test list with different line endings static const char list_with_endings[] = "- a\n- b\r\n- c\r- d"; char *html = cmark_markdown_to_html( list_with_endings, sizeof(list_with_endings) - 1, CMARK_OPT_DEFAULT); STR_EQ(runner, html, "
    \n
  • a
  • \n
  • b
  • \n
  • c
  • \n
  • d
  • \n
\n", "list with different line endings"); free(html); static const char crlf_lines[] = "line\r\nline\r\n"; html = cmark_markdown_to_html(crlf_lines, sizeof(crlf_lines) - 1, CMARK_OPT_DEFAULT | CMARK_OPT_HARDBREAKS); STR_EQ(runner, html, "

line
\nline

\n", "crlf endings with CMARK_OPT_HARDBREAKS"); free(html); static const char no_line_ending[] = "```\nline\n```"; html = cmark_markdown_to_html(no_line_ending, sizeof(no_line_ending) - 1, CMARK_OPT_DEFAULT); STR_EQ(runner, html, "
line\n
\n", "fenced code block with no final newline"); free(html); } static void numeric_entities(test_batch_runner *runner) { test_md_to_html(runner, "�", "

" UTF8_REPL "

\n", "Invalid numeric entity 0"); test_md_to_html(runner, "퟿", "

\xED\x9F\xBF

\n", "Valid numeric entity 0xD7FF"); test_md_to_html(runner, "�", "

" UTF8_REPL "

\n", "Invalid numeric entity 0xD800"); test_md_to_html(runner, "�", "

" UTF8_REPL "

\n", "Invalid numeric entity 0xDFFF"); test_md_to_html(runner, "", "

\xEE\x80\x80

\n", "Valid numeric entity 0xE000"); test_md_to_html(runner, "􏿿", "

\xF4\x8F\xBF\xBF

\n", "Valid numeric entity 0x10FFFF"); test_md_to_html(runner, "�", "

" UTF8_REPL "

\n", "Invalid numeric entity 0x110000"); test_md_to_html(runner, "�", "

" UTF8_REPL "

\n", "Invalid numeric entity 0x80000000"); test_md_to_html(runner, "�", "

" UTF8_REPL "

\n", "Invalid numeric entity 0xFFFFFFFF"); test_md_to_html(runner, "�", "

" UTF8_REPL "

\n", "Invalid numeric entity 99999999"); test_md_to_html(runner, "&#;", "

&#;

\n", "Min decimal entity length"); test_md_to_html(runner, "&#x;", "

&#x;

\n", "Min hexadecimal entity length"); test_md_to_html(runner, "�", "

&#999999999;

\n", "Max decimal entity length"); test_md_to_html(runner, "A", "

&#x000000041;

\n", "Max hexadecimal entity length"); } static void test_safe(test_batch_runner *runner) { // Test safe mode static const char raw_html[] = "
\nhi\n
\n\nhi\n[link](JAVAscript:alert('hi'))\n![image](" "file:my.js)\n"; char *html = cmark_markdown_to_html(raw_html, sizeof(raw_html) - 1, CMARK_OPT_DEFAULT | CMARK_OPT_SAFE); STR_EQ(runner, html, "\n

hi\nlink\n\"image\"

\n", "input with raw HTML and dangerous links"); free(html); } static void test_md_to_html(test_batch_runner *runner, const char *markdown, const char *expected_html, const char *msg) { char *html = cmark_markdown_to_html(markdown, strlen(markdown), CMARK_OPT_VALIDATE_UTF8); STR_EQ(runner, html, expected_html, msg); free(html); } int main() { int retval; test_batch_runner *runner = test_batch_runner_new(); version(runner); constructor(runner); accessors(runner); node_check(runner); iterator(runner); iterator_delete(runner); create_tree(runner); custom_nodes(runner); hierarchy(runner); parser(runner); render_html(runner); render_man(runner); render_latex(runner); render_commonmark(runner); utf8(runner); line_endings(runner); numeric_entities(runner); test_cplusplus(runner); test_safe(runner); test_print_summary(runner); retval = test_ok(runner) ? 0 : 1; free(runner); return retval; }