summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/blocks.c2
-rw-r--r--src/buffer.c93
-rw-r--r--src/buffer.h37
-rw-r--r--src/chunk.h4
-rw-r--r--src/commonmark.c17
-rw-r--r--src/latex.c5
-rw-r--r--src/render.c2
7 files changed, 71 insertions, 89 deletions
diff --git a/src/blocks.c b/src/blocks.c
index 32dffa2..c778c7a 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -528,7 +528,7 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
process = true;
}
- chunk_len = cmark_strbuf_check_bufsize(eol - buffer);
+ chunk_len = (eol - buffer);
if (process) {
if (parser->linebuf->size > 0) {
cmark_strbuf_put(parser->linebuf, buffer, chunk_len);
diff --git a/src/buffer.c b/src/buffer.c
index 4efa97b..80ca49a 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -5,6 +5,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
+#include <limits.h>
#include "config.h"
#include "cmark_ctype.h"
@@ -28,68 +29,48 @@ void cmark_strbuf_init(cmark_strbuf *buf, bufsize_t initial_size) {
cmark_strbuf_grow(buf, initial_size);
}
-void cmark_strbuf_overflow_err() {
- fprintf(stderr, "String buffer overflow");
- abort();
+static CMARK_INLINE bool S_strbuf_grow_by(cmark_strbuf *buf, bufsize_t add) {
+ return cmark_strbuf_grow(buf, buf->size + add);
}
-static CMARK_INLINE void S_strbuf_grow_by(cmark_strbuf *buf, size_t add) {
- size_t target_size = (size_t)buf->size + add;
-
- if (target_size < add /* Integer overflow. */
- || target_size > BUFSIZE_MAX /* Truncation overflow. */
- ) {
- cmark_strbuf_overflow_err();
- return; /* unreachable */
- }
-
- if ((bufsize_t)target_size >= buf->asize)
- cmark_strbuf_grow(buf, (bufsize_t)target_size);
-}
+#if BUFSIZE_MAX > (SIZE_MAX / 4)
+# error "unsafe value for BUFSIZE_MAX"
+#endif
-void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) {
- unsigned char *new_ptr;
+bool cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) {
+ assert(target_size > 0);
if (target_size < buf->asize)
- return;
-
- if (buf->asize == 0) {
- new_ptr = NULL;
- } else {
- new_ptr = buf->ptr;
- }
+ return true;
+
+ /*
+ * Do not allow string buffers to grow past this "safe" value.
+ *
+ * Note that this is a soft cap to prevent unbounded memory growth:
+ * in practice, the buffer can get larger than this value because we
+ * overgrow it by 50%
+ *
+ * Note that there are no overflow checks for the realloc because
+ * the value of BUFSIZE_MAX is always assured to be impossible
+ * to overflow on both 32 and 64 bit systems, since it will never
+ * be larger than 1/4th of our address space.
+ */
+ if (target_size > BUFSIZE_MAX)
+ return false;
/* Oversize the buffer by 50% to guarantee amortized linear time
* complexity on append operations. */
- size_t new_size = (size_t)target_size + (size_t)target_size / 2;
-
- /* Account for terminating null byte. */
+ bufsize_t new_size = target_size + target_size / 2;
new_size += 1;
-
- /* round allocation up to multiple of 8 */
new_size = (new_size + 7) & ~7;
- if (new_size < (size_t)target_size /* Integer overflow. */
- || new_size > BUFSIZE_MAX /* Truncation overflow. */
- ) {
- if (target_size >= BUFSIZE_MAX) {
- /* No space for terminating null byte. */
- cmark_strbuf_overflow_err();
- return; /* unreachable */
- }
- /* Oversize by the maximum possible amount. */
- new_size = BUFSIZE_MAX;
- }
+ unsigned char *new_ptr = realloc(buf->asize ? buf->ptr : NULL, new_size);
+ if (!new_ptr)
+ return false;
- new_ptr = (unsigned char *)realloc(new_ptr, new_size);
-
- if (!new_ptr) {
- perror("realloc in cmark_strbuf_grow");
- abort();
- }
-
- buf->asize = (bufsize_t)new_size;
+ buf->asize = new_size;
buf->ptr = new_ptr;
+ return true;
}
bufsize_t cmark_strbuf_len(const cmark_strbuf *buf) { return buf->size; }
@@ -117,8 +98,8 @@ void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data,
cmark_strbuf_clear(buf);
} else {
if (data != buf->ptr) {
- if (len >= buf->asize)
- cmark_strbuf_grow(buf, len);
+ if (len >= buf->asize && !cmark_strbuf_grow(buf, len))
+ return;
memmove(buf->ptr, data, len);
}
buf->size = len;
@@ -128,21 +109,21 @@ void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data,
void cmark_strbuf_sets(cmark_strbuf *buf, const char *string) {
cmark_strbuf_set(buf, (const unsigned char *)string,
- string ? cmark_strbuf_safe_strlen(string) : 0);
+ string ? strlen(string) : 0);
}
void cmark_strbuf_putc(cmark_strbuf *buf, int c) {
- S_strbuf_grow_by(buf, 1);
+ if (!S_strbuf_grow_by(buf, 1))
+ return;
buf->ptr[buf->size++] = (unsigned char)(c & 0xFF);
buf->ptr[buf->size] = '\0';
}
void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data,
bufsize_t len) {
- if (len <= 0)
+ if (len <= 0 || !S_strbuf_grow_by(buf, len))
return;
- S_strbuf_grow_by(buf, len);
memmove(buf->ptr + buf->size, data, len);
buf->size += len;
buf->ptr[buf->size] = '\0';
@@ -150,7 +131,7 @@ void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data,
void cmark_strbuf_puts(cmark_strbuf *buf, const char *string) {
cmark_strbuf_put(buf, (const unsigned char *)string,
- cmark_strbuf_safe_strlen(string));
+ strlen(string));
}
void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize,
diff --git a/src/buffer.h b/src/buffer.h
index 6fd0cae..ad4f341 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -5,13 +5,15 @@
#include <stdarg.h>
#include <string.h>
#include <limits.h>
+#include <unistd.h>
+#include <stdbool.h>
#include "config.h"
#ifdef __cplusplus
extern "C" {
#endif
-typedef int bufsize_t;
+typedef ssize_t bufsize_t;
typedef struct {
unsigned char *ptr;
@@ -22,7 +24,22 @@ extern unsigned char cmark_strbuf__initbuf[];
#define GH_BUF_INIT \
{ cmark_strbuf__initbuf, 0, 0 }
-#define BUFSIZE_MAX INT_MAX
+
+/*
+ * Maximum size for memory storage on any given `cmark_strbuf` object.
+ *
+ * This is a "safe" value to prevent unbounded memory growth when
+ * parsing arbitrarily large (and potentially malicious) documents.
+ *
+ * It is currently set to 32mb, which is a reasonable default for
+ * production applications. If you need to parse documents larger than
+ * that, you can increase this value up to `SSIZE_MAX / 2` (which in
+ * practice resolves to 1/4th of the total address space for the program).
+ *
+ * Anything larger than that is a security threat and hence static checks
+ * will prevent CMark from compiling.
+ */
+#define BUFSIZE_MAX (32 * 1024 * 1024)
/**
* Initialize a cmark_strbuf structure.
@@ -35,7 +52,7 @@ void cmark_strbuf_init(cmark_strbuf *buf, bufsize_t initial_size);
/**
* Grow the buffer to hold at least `target_size` bytes.
*/
-void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size);
+bool cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size);
void cmark_strbuf_free(cmark_strbuf *buf);
void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b);
@@ -72,20 +89,6 @@ void cmark_strbuf_trim(cmark_strbuf *buf);
void cmark_strbuf_normalize_whitespace(cmark_strbuf *s);
void cmark_strbuf_unescape(cmark_strbuf *s);
-/* Print error and abort. */
-void cmark_strbuf_overflow_err(void);
-
-static CMARK_INLINE bufsize_t cmark_strbuf_check_bufsize(size_t size) {
- if (size > BUFSIZE_MAX) {
- cmark_strbuf_overflow_err();
- }
- return (bufsize_t)size;
-}
-
-static CMARK_INLINE bufsize_t cmark_strbuf_safe_strlen(const char *str) {
- return cmark_strbuf_check_bufsize(strlen(str));
-}
-
#ifdef __cplusplus
}
#endif
diff --git a/src/chunk.h b/src/chunk.h
index 7007492..234a4f3 100644
--- a/src/chunk.h
+++ b/src/chunk.h
@@ -83,7 +83,7 @@ static CMARK_INLINE void cmark_chunk_set_cstr(cmark_chunk *c, const char *str) {
c->data = NULL;
c->alloc = 0;
} else {
- c->len = cmark_strbuf_safe_strlen(str);
+ c->len = strlen(str);
c->data = (unsigned char *)malloc(c->len + 1);
c->alloc = 1;
memcpy(c->data, str, c->len + 1);
@@ -91,7 +91,7 @@ static CMARK_INLINE void cmark_chunk_set_cstr(cmark_chunk *c, const char *str) {
}
static CMARK_INLINE cmark_chunk cmark_chunk_literal(const char *data) {
- bufsize_t len = data ? cmark_strbuf_safe_strlen(data) : 0;
+ bufsize_t len = data ? strlen(data) : 0;
cmark_chunk c = {(unsigned char *)data, len, 0};
return c;
}
diff --git a/src/commonmark.c b/src/commonmark.c
index a5c1ccf..f1589f5 100644
--- a/src/commonmark.c
+++ b/src/commonmark.c
@@ -11,7 +11,6 @@
#include "scanners.h"
#include "render.h"
-#define safe_strlen(s) cmark_strbuf_safe_strlen(s)
#define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping)
#define LIT(s) renderer->out(renderer, s, false, LITERAL)
#define CR() renderer->cr(renderer)
@@ -67,7 +66,7 @@ static int longest_backtick_sequence(const char *code) {
int longest = 0;
int current = 0;
size_t i = 0;
- size_t code_len = safe_strlen(code);
+ size_t code_len = strlen(code);
while (i <= code_len) {
if (code[i] == '`') {
current++;
@@ -86,7 +85,7 @@ static int shortest_unused_backtick_sequence(const char *code) {
int32_t used = 1;
int current = 0;
size_t i = 0;
- size_t code_len = safe_strlen(code);
+ size_t code_len = strlen(code);
while (i <= code_len) {
if (code[i] == '`') {
current++;
@@ -233,7 +232,7 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
snprintf(listmarker, LISTMARKER_SIZE, "%d%s%s", list_number,
list_delim == CMARK_PAREN_DELIM ? ")" : ".",
list_number < 10 ? " " : " ");
- marker_width = safe_strlen(listmarker);
+ marker_width = strlen(listmarker);
}
if (entering) {
if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
@@ -275,9 +274,9 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
BLANKLINE();
}
info = cmark_node_get_fence_info(node);
- info_len = safe_strlen(info);
+ info_len = strlen(info);
code = cmark_node_get_literal(node);
- code_len = safe_strlen(code);
+ code_len = strlen(code);
// use indented form if no info, and code doesn't
// begin or end with a blank line, and code isn't
// first thing in a list item
@@ -361,7 +360,7 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
case CMARK_NODE_CODE:
code = cmark_node_get_literal(node);
- code_len = safe_strlen(code);
+ code_len = strlen(code);
numticks = shortest_unused_backtick_sequence(code);
for (i = 0; i < numticks; i++) {
LIT("`");
@@ -431,7 +430,7 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
LIT("](");
OUT(cmark_node_get_url(node), false, URL);
title = cmark_node_get_title(node);
- if (safe_strlen(title) > 0) {
+ if (strlen(title) > 0) {
LIT(" \"");
OUT(title, false, TITLE);
LIT("\"");
@@ -448,7 +447,7 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
LIT("](");
OUT(cmark_node_get_url(node), false, URL);
title = cmark_node_get_title(node);
- if (safe_strlen(title) > 0) {
+ if (strlen(title) > 0) {
OUT(" \"", allow_wrap, LITERAL);
OUT(title, false, TITLE);
LIT("\"");
diff --git a/src/latex.c b/src/latex.c
index 1546baa..7c3decd 100644
--- a/src/latex.c
+++ b/src/latex.c
@@ -11,7 +11,6 @@
#include "scanners.h"
#include "render.h"
-#define safe_strlen(s) cmark_strbuf_safe_strlen(s)
#define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping)
#define LIT(s) renderer->out(renderer, s, false, LITERAL)
#define CR() renderer->cr(renderer)
@@ -168,13 +167,13 @@ static link_type get_link_type(cmark_node *node) {
return INTERNAL_LINK;
}
- url_len = (int)safe_strlen(url);
+ url_len = strlen(url);
if (url_len == 0 || scan_scheme(&url_chunk, 0) == 0) {
return NO_LINK;
}
const char *title = cmark_node_get_title(node);
- title_len = safe_strlen(title);
+ title_len = strlen(title);
// if it has a title, we can't treat it as an autolink:
if (title_len == 0) {
diff --git a/src/render.c b/src/render.c
index 9e59fa3..b1ed2e4 100644
--- a/src/render.c
+++ b/src/render.c
@@ -19,7 +19,7 @@ static CMARK_INLINE void S_blankline(cmark_renderer *renderer) {
static void S_out(cmark_renderer *renderer, const char *source, bool wrap,
cmark_escaping escape) {
- int length = cmark_strbuf_safe_strlen(source);
+ int length = strlen(source);
unsigned char nextc;
int32_t c;
int i = 0;