diff options
-rw-r--r-- | api_test/main.c | 37 | ||||
-rw-r--r-- | src/blocks.c | 48 | ||||
-rw-r--r-- | src/buffer.c | 32 | ||||
-rw-r--r-- | src/buffer.h | 20 | ||||
-rw-r--r-- | src/cmark.c | 3 | ||||
-rw-r--r-- | src/cmark.h | 32 | ||||
-rw-r--r-- | src/inlines.c | 2 | ||||
-rw-r--r-- | src/inlines.h | 2 | ||||
-rw-r--r-- | src/main.c | 5 | ||||
-rw-r--r-- | src/parser.h | 3 | ||||
-rw-r--r-- | src/source_map.c | 22 | ||||
-rw-r--r-- | src/source_map.h | 23 | ||||
-rw-r--r-- | test/cmark.py | 2 |
13 files changed, 187 insertions, 44 deletions
diff --git a/api_test/main.c b/api_test/main.c index 17e1582..61291dc 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -5,6 +5,7 @@ #define CMARK_NO_SHORT_NAMES #include "cmark.h" #include "node.h" +#include "parser.h" #include "harness.h" #include "cplusplus.h" @@ -883,6 +884,41 @@ static void test_feed_across_line_ending(test_batch_runner *runner) { cmark_node_free(document); } +static cmark_node *S_parse_with_fake_total(bufsize_t fake_total, + const char *str, + cmark_err_type *err) { + cmark_parser *parser = cmark_parser_new(CMARK_OPT_DEFAULT); + parser->total_bytes = fake_total; + cmark_parser_feed(parser, str, strlen(str)); + cmark_node *doc = cmark_parser_finish(parser); + *err = cmark_parser_get_error(parser); + cmark_parser_free(parser); + return doc; +} + +static void test_bufsize_overflow(test_batch_runner *runner) { + cmark_node *doc; + cmark_err_type err; + + doc = S_parse_with_fake_total(BUFSIZE_MAX, "a", &err); + OK(runner, doc == NULL, "parse 1 byte after BUFSIZE_MAX bytes fails"); + INT_EQ(runner, err, CMARK_ERR_INPUT_TOO_LARGE, + "parse 1 byte after BUFSIZE_MAX bytes error code"); + + doc = S_parse_with_fake_total(BUFSIZE_MAX - 9, "0123456789", &err); + OK(runner, doc == NULL, "parse 10 byte after BUFSIZE_MAX-9 bytes fails"); + INT_EQ(runner, err, CMARK_ERR_INPUT_TOO_LARGE, + "parse 10 byte after BUFSIZE_MAX-9 bytes error code"); + + doc = S_parse_with_fake_total(BUFSIZE_MAX - 1, "a", &err); + OK(runner, doc != NULL, "parse 1 byte after BUFSIZE_MAX-1 bytes"); + cmark_node_free(doc); + + doc = S_parse_with_fake_total(BUFSIZE_MAX - 10, "0123456789", &err); + OK(runner, doc != NULL, "parse 10 byte after BUFSIZE_MAX-10 bytes"); + cmark_node_free(doc); +} + int main() { int retval; test_batch_runner *runner = test_batch_runner_new(); @@ -908,6 +944,7 @@ int main() { test_cplusplus(runner); test_safe(runner); test_feed_across_line_ending(runner); + test_bufsize_overflow(runner); test_print_summary(runner); retval = test_ok(runner) ? 0 : 1; diff --git a/src/blocks.c b/src/blocks.c index 1c1d160..c680535 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -96,6 +96,8 @@ cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) { parser->refmap = cmark_reference_map_new(mem); parser->root = document; parser->current = document; + parser->error_code = CMARK_ERR_NONE; + parser->total_bytes = 0; parser->line_number = 0; parser->line_offset = 0; parser->offset = 0; @@ -550,6 +552,20 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer, const unsigned char *skipped; static const uint8_t repl[] = {239, 191, 189}; + if (parser->error_code) { + return; + } + + // Limit maximum document size to BUFSIZE_MAX. This makes sure that we + // never create strbufs larger than BUFSIZE_MAX. Unfortunately, the + // public API doesn't have an error reporting mechanism, so all we can + // do is to abort. + if (len > (size_t)(BUFSIZE_MAX - parser->total_bytes)) { + parser->error_code = CMARK_ERR_INPUT_TOO_LARGE; + return; + } + parser->total_bytes += (bufsize_t)len; + if (parser->last_buffer_ended_with_cr && *buffer == '\n') { // skip NL if last buffer ended with CR ; see #117 buffer++; @@ -1266,14 +1282,19 @@ cmark_node *cmark_parser_finish(cmark_parser *parser) { cmark_strbuf_clear(&parser->linebuf); } + cmark_strbuf_clear(&parser->curline); + + if (parser->error_code) { + cmark_node_free(parser->root); + return NULL; + } + finalize_document(parser); if (parser->options & CMARK_OPT_NORMALIZE) { cmark_consolidate_text_nodes(parser->root); } - cmark_strbuf_free(&parser->curline); - #if CMARK_DEBUG_NODES if (cmark_node_check(parser->root, stderr)) { abort(); @@ -1287,3 +1308,26 @@ cmark_parser_get_first_source_extent(cmark_parser *parser) { return parser->source_map->head; } + +cmark_err_type cmark_parser_get_error(cmark_parser *parser) { + return parser->error_code; +} + +const char *cmark_parser_get_error_message(cmark_parser *parser) { + const char *str = NULL; + + switch (parser->error_code) { + case CMARK_ERR_OUT_OF_MEMORY: + str = "Out of memory"; + break; + case CMARK_ERR_INPUT_TOO_LARGE: + str = "Input too large"; + break; + default: + str = "Unknown error"; + break; + } + + return str; +} + diff --git a/src/buffer.c b/src/buffer.c index a6754b6..9a9e9ad 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -33,6 +33,11 @@ void cmark_strbuf_init(cmark_mem *mem, cmark_strbuf *buf, } static CMARK_INLINE void S_strbuf_grow_by(cmark_strbuf *buf, bufsize_t add) { + // Safety check for overflow. + if (add > BUFSIZE_MAX - buf->size) { + fprintf(stderr, "Internal cmark_strbuf overflow"); + abort(); + } cmark_strbuf_grow(buf, buf->size + add); } @@ -42,18 +47,25 @@ void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) { if (target_size < buf->asize) return; - if (target_size > (bufsize_t)(INT32_MAX / 2)) - abort(); - - /* Oversize the buffer by 50% to guarantee amortized linear time - * complexity on append operations. */ - bufsize_t new_size = target_size + target_size / 2; - new_size += 1; - new_size = (new_size + 7) & ~7; + // Oversize the buffer by 50% to guarantee amortized linear time + // complexity on append operations. + bufsize_t add = target_size / 2; + // Account for terminating NUL byte. + add += 1; + // Round up to multiple of eight. + add = (add + 7) & ~7; + + // Check for overflow but allow an additional NUL byte. + if (target_size + add > BUFSIZE_MAX + 1) { + target_size = BUFSIZE_MAX + 1; + } + else { + target_size += add; + } buf->ptr = (unsigned char *)buf->mem->realloc(buf->asize ? buf->ptr : NULL, - new_size); - buf->asize = new_size; + target_size); + buf->asize = target_size; } bufsize_t cmark_strbuf_len(const cmark_strbuf *buf) { return buf->size; } diff --git a/src/buffer.h b/src/buffer.h index e878075..7f31a74 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -13,8 +13,28 @@ extern "C" { #endif +#ifndef CMARK_HUGE_DOCS + +// Maximum strbuf size without terminating NUL byte. +#define BUFSIZE_MAX (INT32_MAX - 1) + typedef int32_t bufsize_t; +#else // CMARK_HUGE_DOCS + +// This is an untested proof of concept of how to handle multi-gigabyte +// documents on 64-bit platforms at the expense of internal struct sizes. + +#ifdef PTRDIFF_MAX + #define BUFSIZE_MAX (PTRDIFF_MAX - 1) +#else + #define BUFSIZE_MAX (ptrdiff_t)((size_t)-1 / 2) +#endif + +typedef ptrdiff_t bufsize_t; + +#endif // CMARK_HUGE_DOCS + typedef struct { cmark_mem *mem; unsigned char *ptr; diff --git a/src/cmark.c b/src/cmark.c index 2ef6cb4..da93abe 100644 --- a/src/cmark.c +++ b/src/cmark.c @@ -36,6 +36,9 @@ char *cmark_markdown_to_html(const char *text, size_t len, int options) { char *result; doc = cmark_parse_document(text, len, options); + if (doc == NULL) { + return NULL; + } result = cmark_render_html(doc, options); cmark_node_free(doc); diff --git a/src/cmark.h b/src/cmark.h index 034f0e6..5ce6d10 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -2,7 +2,6 @@ #define CMARK_H #include <stdio.h> -#include <stdint.h> #include <cmark_export.h> #include <cmark_version.h> @@ -23,7 +22,7 @@ extern "C" { /** Convert 'text' (assumed to be a UTF-8 encoded string with length * 'len') from CommonMark Markdown to HTML, returning a null-terminated, * UTF-8-encoded string. It is the caller's responsibility - * to free the returned buffer. + * to free the returned buffer. Returns NULL on error. */ CMARK_EXPORT char *cmark_markdown_to_html(const char *text, size_t len, int options); @@ -99,6 +98,12 @@ typedef enum { CMARK_PAREN_DELIM } cmark_delim_type; +typedef enum { + CMARK_ERR_NONE, + CMARK_ERR_OUT_OF_MEMORY, + CMARK_ERR_INPUT_TOO_LARGE +} cmark_err_type; + typedef struct cmark_node cmark_node; typedef struct cmark_parser cmark_parser; typedef struct cmark_iter cmark_iter; @@ -489,12 +494,22 @@ cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem); CMARK_EXPORT void cmark_parser_free(cmark_parser *parser); +/** Return the error code after a failed operation. + */ +CMARK_EXPORT +cmark_err_type cmark_parser_get_error(cmark_parser *parser); + +/** Return the error code after a failed operation. + */ +CMARK_EXPORT +const char *cmark_parser_get_error_message(cmark_parser *parser); + /** Feeds a string of length 'len' to 'parser'. */ CMARK_EXPORT void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len); -/** Finish parsing and return a pointer to a tree of nodes. +/** Finish parsing and return a pointer to a tree of nodes or NULL on error. */ CMARK_EXPORT cmark_node *cmark_parser_finish(cmark_parser *parser); @@ -507,7 +522,7 @@ cmark_source_extent *cmark_parser_get_first_source_extent(cmark_parser *parser); /** Parse a CommonMark document in 'buffer' of length 'len'. * Returns a pointer to a tree of nodes. The memory allocated for * the node tree should be released using 'cmark_node_free' - * when it is no longer needed. + * when it is no longer needed. Returns NULL on error. */ CMARK_EXPORT cmark_node *cmark_parse_document(const char *buffer, size_t len, int options); @@ -515,22 +530,23 @@ cmark_node *cmark_parse_document(const char *buffer, size_t len, int options); /** Parse a CommonMark document in file 'f', returning a pointer to * a tree of nodes. The memory allocated for the node tree should be * released using 'cmark_node_free' when it is no longer needed. + * Returns NULL on error. */ CMARK_EXPORT cmark_node *cmark_parse_file(FILE *f, int options); -/** +/** * ## Source map API */ /* Return the index, in bytes, of the start of this extent */ CMARK_EXPORT -uint64_t cmark_source_extent_get_start(cmark_source_extent *extent); +size_t cmark_source_extent_get_start(cmark_source_extent *extent); -/* Return the index, in bytes, of the stop of this extent. This +/* Return the index, in bytes, of the stop of this extent. This * index is not included in the extent*/ CMARK_EXPORT -uint64_t cmark_source_extent_get_stop(cmark_source_extent *extent); +size_t cmark_source_extent_get_stop(cmark_source_extent *extent); /* Return the extent immediately following 'extent' */ CMARK_EXPORT diff --git a/src/inlines.c b/src/inlines.c index 96da28b..099078e 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -1230,7 +1230,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) { // Parse inlines from parent's string_content, adding as children of parent. extern void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent, cmark_reference_map *refmap, int options, - cmark_source_map *source_map, uint64_t total_length) { + cmark_source_map *source_map, bufsize_t total_length) { subject subj; subject_from_buf(mem, &subj, &parent->content, refmap, source_map); bufsize_t initial_len = subj.input.len; diff --git a/src/inlines.h b/src/inlines.h index 8de31b1..8459794 100644 --- a/src/inlines.h +++ b/src/inlines.h @@ -14,7 +14,7 @@ cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title); void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent, cmark_reference_map *refmap, int options, - cmark_source_map *source_map, uint64_t total_length); + cmark_source_map *source_map, bufsize_t total_length); bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input, cmark_reference_map *refmap, @@ -181,6 +181,11 @@ int main(int argc, char *argv[]) { document = cmark_parser_finish(parser); cmark_parser_free(parser); + if (document == NULL) { + fprintf(stderr, "%s", cmark_parser_get_error_message(parser)); + exit(1); + } + print_document(document, writer, options, width); cmark_node_free(document); diff --git a/src/parser.h b/src/parser.h index b28a8a7..7b4fdbc 100644 --- a/src/parser.h +++ b/src/parser.h @@ -2,6 +2,7 @@ #define CMARK_AST_H #include <stdio.h> +#include "cmark.h" #include "node.h" #include "buffer.h" #include "memory.h" @@ -18,6 +19,8 @@ struct cmark_parser { struct cmark_reference_map *refmap; struct cmark_node *root; struct cmark_node *current; + cmark_err_type error_code; + bufsize_t total_bytes; int line_number; bufsize_t offset; bufsize_t column; diff --git a/src/source_map.c b/src/source_map.c index db01a21..dccbe7c 100644 --- a/src/source_map.c +++ b/src/source_map.c @@ -19,7 +19,7 @@ source_map_free(cmark_source_map *self) } cmark_source_extent * -source_map_append_extent(cmark_source_map *self, uint64_t start, uint64_t stop, cmark_node *node, cmark_extent_type type) +source_map_append_extent(cmark_source_map *self, bufsize_t start, bufsize_t stop, cmark_node *node, cmark_extent_type type) { assert (start <= stop); assert (!self->tail || self->tail->stop <= start); @@ -46,7 +46,7 @@ source_map_append_extent(cmark_source_map *self, uint64_t start, uint64_t stop, cmark_source_extent * source_map_insert_extent(cmark_source_map *self, cmark_source_extent *previous, - uint64_t start, uint64_t stop, cmark_node *node, cmark_extent_type type) + bufsize_t start, bufsize_t stop, cmark_node *node, cmark_extent_type type) { if (start == stop) return previous; @@ -101,7 +101,7 @@ source_map_free_extent(cmark_source_map *self, cmark_source_extent *extent) cmark_source_extent * source_map_stitch_extent(cmark_source_map *self, cmark_source_extent *extent, - cmark_node *node, uint64_t total_length) + cmark_node *node, bufsize_t total_length) { cmark_source_extent *next_extent = extent->next; cmark_source_extent *res; @@ -135,7 +135,7 @@ source_map_stitch_extent(cmark_source_map *self, cmark_source_extent *extent, } cmark_source_extent * -source_map_splice_extent(cmark_source_map *self, uint64_t start, uint64_t stop, +source_map_splice_extent(cmark_source_map *self, bufsize_t start, bufsize_t stop, cmark_node *node, cmark_extent_type type) { if (!self->next_cursor) { @@ -154,7 +154,7 @@ source_map_splice_extent(cmark_source_map *self, uint64_t start, uint64_t stop, return self->cursor; } else if (start + self->cursor_offset < self->next_cursor->start) { - uint64_t new_start = self->next_cursor->start - self->cursor_offset; + bufsize_t new_start = self->next_cursor->start - self->cursor_offset; self->cursor = source_map_insert_extent(self, self->cursor, @@ -196,17 +196,17 @@ source_map_pretty_print(cmark_source_map *self) { cmark_source_extent *tmp; for (tmp = self->head; tmp; tmp = tmp->next) { - printf ("%lu:%lu - %s, %s (%p)\n", tmp->start, tmp->stop, - cmark_node_get_type_string(tmp->node), + printf ("%d:%d - %s, %s (%p)\n", tmp->start, tmp->stop, + cmark_node_get_type_string(tmp->node), cmark_source_extent_get_type_string(tmp), (void *) tmp->node); } } bool -source_map_check(cmark_source_map *self, uint64_t total_length) +source_map_check(cmark_source_map *self, bufsize_t total_length) { - uint64_t last_stop = 0; + bufsize_t last_stop = 0; cmark_source_extent *tmp; for (tmp = self->head; tmp; tmp = tmp->next) { @@ -224,13 +224,13 @@ source_map_check(cmark_source_map *self, uint64_t total_length) } -uint64_t +size_t cmark_source_extent_get_start(cmark_source_extent *extent) { return extent->start; } -uint64_t +size_t cmark_source_extent_get_stop(cmark_source_extent *extent) { return extent->stop; diff --git a/src/source_map.h b/src/source_map.h index 619a073..dca5a9f 100644 --- a/src/source_map.h +++ b/src/source_map.h @@ -3,6 +3,7 @@ #include "cmark.h" #include "config.h" +#include "buffer.h" typedef struct _cmark_source_map { @@ -10,14 +11,14 @@ typedef struct _cmark_source_map cmark_source_extent *tail; cmark_source_extent *cursor; cmark_source_extent *next_cursor; - uint64_t cursor_offset; + bufsize_t cursor_offset; cmark_mem *mem; } cmark_source_map; struct cmark_source_extent { - uint64_t start; - uint64_t stop; + bufsize_t start; + bufsize_t stop; struct cmark_source_extent *next; struct cmark_source_extent *prev; cmark_node *node; @@ -29,20 +30,20 @@ cmark_source_map * source_map_new (cmark_mem *mem); void source_map_free (cmark_source_map *self); bool source_map_check (cmark_source_map *self, - uint64_t total_length); + bufsize_t total_length); void source_map_pretty_print (cmark_source_map *self); cmark_source_extent * source_map_append_extent(cmark_source_map *self, - uint64_t start, - uint64_t stop, + bufsize_t start, + bufsize_t stop, cmark_node *node, cmark_extent_type type); cmark_source_extent * source_map_insert_extent(cmark_source_map *self, cmark_source_extent *previous, - uint64_t start, - uint64_t stop, + bufsize_t start, + bufsize_t stop, cmark_node *node, cmark_extent_type type); @@ -52,11 +53,11 @@ cmark_source_extent * source_map_free_extent (cmark_source_map *self, cmark_source_extent * source_map_stitch_extent(cmark_source_map *self, cmark_source_extent *extent, cmark_node *node, - uint64_t total_length); + bufsize_t total_length); cmark_source_extent * source_map_splice_extent(cmark_source_map *self, - uint64_t start, - uint64_t stop, + bufsize_t start, + bufsize_t stop, cmark_node *node, cmark_extent_type type); diff --git a/test/cmark.py b/test/cmark.py index 4be85a3..f4ff576 100644 --- a/test/cmark.py +++ b/test/cmark.py @@ -30,6 +30,8 @@ def to_commonmark(lib, text): render_commonmark.restype = c_char_p render_commonmark.argtypes = [c_void_p, c_int, c_int] node = parse_document(textbytes, textlen, 0) + if node is None: + raise Exception("parse_document failed") result = render_commonmark(node, 0, 0).decode('utf-8') return [0, result, ''] |