diff options
-rw-r--r-- | src/blocks.c | 2 | ||||
-rw-r--r-- | src/buffer.c | 93 | ||||
-rw-r--r-- | src/buffer.h | 37 | ||||
-rw-r--r-- | src/chunk.h | 4 | ||||
-rw-r--r-- | src/commonmark.c | 17 | ||||
-rw-r--r-- | src/latex.c | 5 | ||||
-rw-r--r-- | src/render.c | 2 |
7 files changed, 71 insertions, 89 deletions
diff --git a/src/blocks.c b/src/blocks.c index 32dffa2..c778c7a 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -528,7 +528,7 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer, process = true; } - chunk_len = cmark_strbuf_check_bufsize(eol - buffer); + chunk_len = (eol - buffer); if (process) { if (parser->linebuf->size > 0) { cmark_strbuf_put(parser->linebuf, buffer, chunk_len); diff --git a/src/buffer.c b/src/buffer.c index 4efa97b..80ca49a 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -5,6 +5,7 @@ #include <stdio.h> #include <stdlib.h> #include <stdint.h> +#include <limits.h> #include "config.h" #include "cmark_ctype.h" @@ -28,68 +29,48 @@ void cmark_strbuf_init(cmark_strbuf *buf, bufsize_t initial_size) { cmark_strbuf_grow(buf, initial_size); } -void cmark_strbuf_overflow_err() { - fprintf(stderr, "String buffer overflow"); - abort(); +static CMARK_INLINE bool S_strbuf_grow_by(cmark_strbuf *buf, bufsize_t add) { + return cmark_strbuf_grow(buf, buf->size + add); } -static CMARK_INLINE void S_strbuf_grow_by(cmark_strbuf *buf, size_t add) { - size_t target_size = (size_t)buf->size + add; - - if (target_size < add /* Integer overflow. */ - || target_size > BUFSIZE_MAX /* Truncation overflow. */ - ) { - cmark_strbuf_overflow_err(); - return; /* unreachable */ - } - - if ((bufsize_t)target_size >= buf->asize) - cmark_strbuf_grow(buf, (bufsize_t)target_size); -} +#if BUFSIZE_MAX > (SIZE_MAX / 4) +# error "unsafe value for BUFSIZE_MAX" +#endif -void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) { - unsigned char *new_ptr; +bool cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) { + assert(target_size > 0); if (target_size < buf->asize) - return; - - if (buf->asize == 0) { - new_ptr = NULL; - } else { - new_ptr = buf->ptr; - } + return true; + + /* + * Do not allow string buffers to grow past this "safe" value. + * + * Note that this is a soft cap to prevent unbounded memory growth: + * in practice, the buffer can get larger than this value because we + * overgrow it by 50% + * + * Note that there are no overflow checks for the realloc because + * the value of BUFSIZE_MAX is always assured to be impossible + * to overflow on both 32 and 64 bit systems, since it will never + * be larger than 1/4th of our address space. + */ + if (target_size > BUFSIZE_MAX) + return false; /* Oversize the buffer by 50% to guarantee amortized linear time * complexity on append operations. */ - size_t new_size = (size_t)target_size + (size_t)target_size / 2; - - /* Account for terminating null byte. */ + bufsize_t new_size = target_size + target_size / 2; new_size += 1; - - /* round allocation up to multiple of 8 */ new_size = (new_size + 7) & ~7; - if (new_size < (size_t)target_size /* Integer overflow. */ - || new_size > BUFSIZE_MAX /* Truncation overflow. */ - ) { - if (target_size >= BUFSIZE_MAX) { - /* No space for terminating null byte. */ - cmark_strbuf_overflow_err(); - return; /* unreachable */ - } - /* Oversize by the maximum possible amount. */ - new_size = BUFSIZE_MAX; - } + unsigned char *new_ptr = realloc(buf->asize ? buf->ptr : NULL, new_size); + if (!new_ptr) + return false; - new_ptr = (unsigned char *)realloc(new_ptr, new_size); - - if (!new_ptr) { - perror("realloc in cmark_strbuf_grow"); - abort(); - } - - buf->asize = (bufsize_t)new_size; + buf->asize = new_size; buf->ptr = new_ptr; + return true; } bufsize_t cmark_strbuf_len(const cmark_strbuf *buf) { return buf->size; } @@ -117,8 +98,8 @@ void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, cmark_strbuf_clear(buf); } else { if (data != buf->ptr) { - if (len >= buf->asize) - cmark_strbuf_grow(buf, len); + if (len >= buf->asize && !cmark_strbuf_grow(buf, len)) + return; memmove(buf->ptr, data, len); } buf->size = len; @@ -128,21 +109,21 @@ void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, void cmark_strbuf_sets(cmark_strbuf *buf, const char *string) { cmark_strbuf_set(buf, (const unsigned char *)string, - string ? cmark_strbuf_safe_strlen(string) : 0); + string ? strlen(string) : 0); } void cmark_strbuf_putc(cmark_strbuf *buf, int c) { - S_strbuf_grow_by(buf, 1); + if (!S_strbuf_grow_by(buf, 1)) + return; buf->ptr[buf->size++] = (unsigned char)(c & 0xFF); buf->ptr[buf->size] = '\0'; } void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, bufsize_t len) { - if (len <= 0) + if (len <= 0 || !S_strbuf_grow_by(buf, len)) return; - S_strbuf_grow_by(buf, len); memmove(buf->ptr + buf->size, data, len); buf->size += len; buf->ptr[buf->size] = '\0'; @@ -150,7 +131,7 @@ void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, void cmark_strbuf_puts(cmark_strbuf *buf, const char *string) { cmark_strbuf_put(buf, (const unsigned char *)string, - cmark_strbuf_safe_strlen(string)); + strlen(string)); } void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize, diff --git a/src/buffer.h b/src/buffer.h index 6fd0cae..ad4f341 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -5,13 +5,15 @@ #include <stdarg.h> #include <string.h> #include <limits.h> +#include <unistd.h> +#include <stdbool.h> #include "config.h" #ifdef __cplusplus extern "C" { #endif -typedef int bufsize_t; +typedef ssize_t bufsize_t; typedef struct { unsigned char *ptr; @@ -22,7 +24,22 @@ extern unsigned char cmark_strbuf__initbuf[]; #define GH_BUF_INIT \ { cmark_strbuf__initbuf, 0, 0 } -#define BUFSIZE_MAX INT_MAX + +/* + * Maximum size for memory storage on any given `cmark_strbuf` object. + * + * This is a "safe" value to prevent unbounded memory growth when + * parsing arbitrarily large (and potentially malicious) documents. + * + * It is currently set to 32mb, which is a reasonable default for + * production applications. If you need to parse documents larger than + * that, you can increase this value up to `SSIZE_MAX / 2` (which in + * practice resolves to 1/4th of the total address space for the program). + * + * Anything larger than that is a security threat and hence static checks + * will prevent CMark from compiling. + */ +#define BUFSIZE_MAX (32 * 1024 * 1024) /** * Initialize a cmark_strbuf structure. @@ -35,7 +52,7 @@ void cmark_strbuf_init(cmark_strbuf *buf, bufsize_t initial_size); /** * Grow the buffer to hold at least `target_size` bytes. */ -void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size); +bool cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size); void cmark_strbuf_free(cmark_strbuf *buf); void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b); @@ -72,20 +89,6 @@ void cmark_strbuf_trim(cmark_strbuf *buf); void cmark_strbuf_normalize_whitespace(cmark_strbuf *s); void cmark_strbuf_unescape(cmark_strbuf *s); -/* Print error and abort. */ -void cmark_strbuf_overflow_err(void); - -static CMARK_INLINE bufsize_t cmark_strbuf_check_bufsize(size_t size) { - if (size > BUFSIZE_MAX) { - cmark_strbuf_overflow_err(); - } - return (bufsize_t)size; -} - -static CMARK_INLINE bufsize_t cmark_strbuf_safe_strlen(const char *str) { - return cmark_strbuf_check_bufsize(strlen(str)); -} - #ifdef __cplusplus } #endif diff --git a/src/chunk.h b/src/chunk.h index 7007492..234a4f3 100644 --- a/src/chunk.h +++ b/src/chunk.h @@ -83,7 +83,7 @@ static CMARK_INLINE void cmark_chunk_set_cstr(cmark_chunk *c, const char *str) { c->data = NULL; c->alloc = 0; } else { - c->len = cmark_strbuf_safe_strlen(str); + c->len = strlen(str); c->data = (unsigned char *)malloc(c->len + 1); c->alloc = 1; memcpy(c->data, str, c->len + 1); @@ -91,7 +91,7 @@ static CMARK_INLINE void cmark_chunk_set_cstr(cmark_chunk *c, const char *str) { } static CMARK_INLINE cmark_chunk cmark_chunk_literal(const char *data) { - bufsize_t len = data ? cmark_strbuf_safe_strlen(data) : 0; + bufsize_t len = data ? strlen(data) : 0; cmark_chunk c = {(unsigned char *)data, len, 0}; return c; } diff --git a/src/commonmark.c b/src/commonmark.c index a5c1ccf..f1589f5 100644 --- a/src/commonmark.c +++ b/src/commonmark.c @@ -11,7 +11,6 @@ #include "scanners.h" #include "render.h" -#define safe_strlen(s) cmark_strbuf_safe_strlen(s) #define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping) #define LIT(s) renderer->out(renderer, s, false, LITERAL) #define CR() renderer->cr(renderer) @@ -67,7 +66,7 @@ static int longest_backtick_sequence(const char *code) { int longest = 0; int current = 0; size_t i = 0; - size_t code_len = safe_strlen(code); + size_t code_len = strlen(code); while (i <= code_len) { if (code[i] == '`') { current++; @@ -86,7 +85,7 @@ static int shortest_unused_backtick_sequence(const char *code) { int32_t used = 1; int current = 0; size_t i = 0; - size_t code_len = safe_strlen(code); + size_t code_len = strlen(code); while (i <= code_len) { if (code[i] == '`') { current++; @@ -233,7 +232,7 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, snprintf(listmarker, LISTMARKER_SIZE, "%d%s%s", list_number, list_delim == CMARK_PAREN_DELIM ? ")" : ".", list_number < 10 ? " " : " "); - marker_width = safe_strlen(listmarker); + marker_width = strlen(listmarker); } if (entering) { if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { @@ -275,9 +274,9 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, BLANKLINE(); } info = cmark_node_get_fence_info(node); - info_len = safe_strlen(info); + info_len = strlen(info); code = cmark_node_get_literal(node); - code_len = safe_strlen(code); + code_len = strlen(code); // use indented form if no info, and code doesn't // begin or end with a blank line, and code isn't // first thing in a list item @@ -361,7 +360,7 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, case CMARK_NODE_CODE: code = cmark_node_get_literal(node); - code_len = safe_strlen(code); + code_len = strlen(code); numticks = shortest_unused_backtick_sequence(code); for (i = 0; i < numticks; i++) { LIT("`"); @@ -431,7 +430,7 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, LIT("]("); OUT(cmark_node_get_url(node), false, URL); title = cmark_node_get_title(node); - if (safe_strlen(title) > 0) { + if (strlen(title) > 0) { LIT(" \""); OUT(title, false, TITLE); LIT("\""); @@ -448,7 +447,7 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, LIT("]("); OUT(cmark_node_get_url(node), false, URL); title = cmark_node_get_title(node); - if (safe_strlen(title) > 0) { + if (strlen(title) > 0) { OUT(" \"", allow_wrap, LITERAL); OUT(title, false, TITLE); LIT("\""); diff --git a/src/latex.c b/src/latex.c index 1546baa..7c3decd 100644 --- a/src/latex.c +++ b/src/latex.c @@ -11,7 +11,6 @@ #include "scanners.h" #include "render.h" -#define safe_strlen(s) cmark_strbuf_safe_strlen(s) #define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping) #define LIT(s) renderer->out(renderer, s, false, LITERAL) #define CR() renderer->cr(renderer) @@ -168,13 +167,13 @@ static link_type get_link_type(cmark_node *node) { return INTERNAL_LINK; } - url_len = (int)safe_strlen(url); + url_len = strlen(url); if (url_len == 0 || scan_scheme(&url_chunk, 0) == 0) { return NO_LINK; } const char *title = cmark_node_get_title(node); - title_len = safe_strlen(title); + title_len = strlen(title); // if it has a title, we can't treat it as an autolink: if (title_len == 0) { diff --git a/src/render.c b/src/render.c index 9e59fa3..b1ed2e4 100644 --- a/src/render.c +++ b/src/render.c @@ -19,7 +19,7 @@ static CMARK_INLINE void S_blankline(cmark_renderer *renderer) { static void S_out(cmark_renderer *renderer, const char *source, bool wrap, cmark_escaping escape) { - int length = cmark_strbuf_safe_strlen(source); + int length = strlen(source); unsigned char nextc; int32_t c; int i = 0; |