summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/blocks.c59
-rw-r--r--src/buffer.c48
-rw-r--r--src/buffer.h18
-rw-r--r--src/chunk.h13
-rw-r--r--src/cmark.c30
-rw-r--r--src/cmark.h14
-rw-r--r--src/inlines.c39
-rw-r--r--src/iterator.c5
-rw-r--r--src/main.c3
-rw-r--r--src/memory.h12
-rw-r--r--src/node.c2
-rwxr-xr-xsrc/references.c20
-rwxr-xr-xtest/CMakeLists.txt5
-rw-r--r--test/exhaustion_tests.py45
14 files changed, 122 insertions, 191 deletions
diff --git a/src/blocks.c b/src/blocks.c
index c778c7a..07eacc6 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -48,15 +48,13 @@ static cmark_node *make_block(cmark_node_type tag, int start_line,
int start_column) {
cmark_node *e;
- e = (cmark_node *)calloc(1, sizeof(*e));
- if (e != NULL) {
- e->type = tag;
- e->open = true;
- e->start_line = start_line;
- e->start_column = start_column;
- e->end_line = start_line;
- cmark_strbuf_init(&e->string_content, 32);
- }
+ e = (cmark_node *)cmark_calloc(1, sizeof(*e));
+ e->type = tag;
+ e->open = true;
+ e->start_line = start_line;
+ e->start_column = start_column;
+ e->end_line = start_line;
+ cmark_strbuf_init(&e->string_content, 32);
return e;
}
@@ -68,10 +66,10 @@ static cmark_node *make_document() {
}
cmark_parser *cmark_parser_new(int options) {
- cmark_parser *parser = (cmark_parser *)malloc(sizeof(cmark_parser));
+ cmark_parser *parser = (cmark_parser *)cmark_calloc(1, sizeof(cmark_parser));
cmark_node *document = make_document();
- cmark_strbuf *line = (cmark_strbuf *)malloc(sizeof(cmark_strbuf));
- cmark_strbuf *buf = (cmark_strbuf *)malloc(sizeof(cmark_strbuf));
+ cmark_strbuf *line = (cmark_strbuf *)cmark_calloc(1, sizeof(cmark_strbuf));
+ cmark_strbuf *buf = (cmark_strbuf *)cmark_calloc(1, sizeof(cmark_strbuf));
cmark_strbuf_init(line, 256);
cmark_strbuf_init(buf, 0);
@@ -395,17 +393,13 @@ static bufsize_t parse_list_marker(cmark_chunk *input, bufsize_t pos,
if (!cmark_isspace(peek_at(input, pos))) {
return 0;
}
- data = (cmark_list *)calloc(1, sizeof(*data));
- if (data == NULL) {
- return 0;
- } else {
- data->marker_offset = 0; // will be adjusted later
- data->list_type = CMARK_BULLET_LIST;
- data->bullet_char = c;
- data->start = 1;
- data->delimiter = CMARK_PERIOD_DELIM;
- data->tight = false;
- }
+ data = (cmark_list *)cmark_calloc(1, sizeof(*data));
+ data->marker_offset = 0; // will be adjusted later
+ data->list_type = CMARK_BULLET_LIST;
+ data->bullet_char = c;
+ data->start = 1;
+ data->delimiter = CMARK_PERIOD_DELIM;
+ data->tight = false;
} else if (cmark_isdigit(c)) {
int start = 0;
int digits = 0;
@@ -425,21 +419,16 @@ static bufsize_t parse_list_marker(cmark_chunk *input, bufsize_t pos,
if (!cmark_isspace(peek_at(input, pos))) {
return 0;
}
- data = (cmark_list *)calloc(1, sizeof(*data));
- if (data == NULL) {
- return 0;
- } else {
- data->marker_offset = 0; // will be adjusted later
- data->list_type = CMARK_ORDERED_LIST;
- data->bullet_char = 0;
- data->start = start;
- data->delimiter = (c == '.' ? CMARK_PERIOD_DELIM : CMARK_PAREN_DELIM);
- data->tight = false;
- }
+ data = (cmark_list *)cmark_calloc(1, sizeof(*data));
+ data->marker_offset = 0; // will be adjusted later
+ data->list_type = CMARK_ORDERED_LIST;
+ data->bullet_char = 0;
+ data->start = start;
+ data->delimiter = (c == '.' ? CMARK_PERIOD_DELIM : CMARK_PAREN_DELIM);
+ data->tight = false;
} else {
return 0;
}
-
} else {
return 0;
}
diff --git a/src/buffer.c b/src/buffer.c
index 80ca49a..2ac3b04 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -10,6 +10,7 @@
#include "config.h"
#include "cmark_ctype.h"
#include "buffer.h"
+#include "memory.h"
/* Used as default value for cmark_strbuf->ptr so that people can always
* assume ptr is non-NULL and zero terminated even for new cmark_strbufs.
@@ -29,34 +30,18 @@ void cmark_strbuf_init(cmark_strbuf *buf, bufsize_t initial_size) {
cmark_strbuf_grow(buf, initial_size);
}
-static CMARK_INLINE bool S_strbuf_grow_by(cmark_strbuf *buf, bufsize_t add) {
- return cmark_strbuf_grow(buf, buf->size + add);
+static CMARK_INLINE void S_strbuf_grow_by(cmark_strbuf *buf, bufsize_t add) {
+ cmark_strbuf_grow(buf, buf->size + add);
}
-#if BUFSIZE_MAX > (SIZE_MAX / 4)
-# error "unsafe value for BUFSIZE_MAX"
-#endif
-
-bool cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) {
+void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) {
assert(target_size > 0);
if (target_size < buf->asize)
- return true;
-
- /*
- * Do not allow string buffers to grow past this "safe" value.
- *
- * Note that this is a soft cap to prevent unbounded memory growth:
- * in practice, the buffer can get larger than this value because we
- * overgrow it by 50%
- *
- * Note that there are no overflow checks for the realloc because
- * the value of BUFSIZE_MAX is always assured to be impossible
- * to overflow on both 32 and 64 bit systems, since it will never
- * be larger than 1/4th of our address space.
- */
- if (target_size > BUFSIZE_MAX)
- return false;
+ return;
+
+ if (target_size > (bufsize_t)(SIZE_MAX / 4))
+ cmark_trigger_oom();
/* Oversize the buffer by 50% to guarantee amortized linear time
* complexity on append operations. */
@@ -64,13 +49,10 @@ bool cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) {
new_size += 1;
new_size = (new_size + 7) & ~7;
- unsigned char *new_ptr = realloc(buf->asize ? buf->ptr : NULL, new_size);
- if (!new_ptr)
- return false;
+ unsigned char *new_ptr = cmark_realloc(buf->asize ? buf->ptr : NULL, new_size);
buf->asize = new_size;
buf->ptr = new_ptr;
- return true;
}
bufsize_t cmark_strbuf_len(const cmark_strbuf *buf) { return buf->size; }
@@ -98,8 +80,8 @@ void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data,
cmark_strbuf_clear(buf);
} else {
if (data != buf->ptr) {
- if (len >= buf->asize && !cmark_strbuf_grow(buf, len))
- return;
+ if (len >= buf->asize)
+ cmark_strbuf_grow(buf, len);
memmove(buf->ptr, data, len);
}
buf->size = len;
@@ -113,17 +95,17 @@ void cmark_strbuf_sets(cmark_strbuf *buf, const char *string) {
}
void cmark_strbuf_putc(cmark_strbuf *buf, int c) {
- if (!S_strbuf_grow_by(buf, 1))
- return;
+ S_strbuf_grow_by(buf, 1);
buf->ptr[buf->size++] = (unsigned char)(c & 0xFF);
buf->ptr[buf->size] = '\0';
}
void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data,
bufsize_t len) {
- if (len <= 0 || !S_strbuf_grow_by(buf, len))
+ if (len <= 0)
return;
+ S_strbuf_grow_by(buf, len);
memmove(buf->ptr + buf->size, data, len);
buf->size += len;
buf->ptr[buf->size] = '\0';
@@ -165,7 +147,7 @@ unsigned char *cmark_strbuf_detach(cmark_strbuf *buf) {
if (buf->asize == 0) {
/* return an empty string */
- return (unsigned char *)calloc(1, 1);
+ return cmark_calloc(1, 1);
}
cmark_strbuf_init(buf, 0);
diff --git a/src/buffer.h b/src/buffer.h
index ad4f341..93ffc95 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -25,22 +25,6 @@ extern unsigned char cmark_strbuf__initbuf[];
#define GH_BUF_INIT \
{ cmark_strbuf__initbuf, 0, 0 }
-/*
- * Maximum size for memory storage on any given `cmark_strbuf` object.
- *
- * This is a "safe" value to prevent unbounded memory growth when
- * parsing arbitrarily large (and potentially malicious) documents.
- *
- * It is currently set to 32mb, which is a reasonable default for
- * production applications. If you need to parse documents larger than
- * that, you can increase this value up to `SSIZE_MAX / 2` (which in
- * practice resolves to 1/4th of the total address space for the program).
- *
- * Anything larger than that is a security threat and hence static checks
- * will prevent CMark from compiling.
- */
-#define BUFSIZE_MAX (32 * 1024 * 1024)
-
/**
* Initialize a cmark_strbuf structure.
*
@@ -52,7 +36,7 @@ void cmark_strbuf_init(cmark_strbuf *buf, bufsize_t initial_size);
/**
* Grow the buffer to hold at least `target_size` bytes.
*/
-bool cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size);
+void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size);
void cmark_strbuf_free(cmark_strbuf *buf);
void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b);
diff --git a/src/chunk.h b/src/chunk.h
index 234a4f3..acceaa1 100644
--- a/src/chunk.h
+++ b/src/chunk.h
@@ -4,6 +4,7 @@
#include <string.h>
#include <stdlib.h>
#include <assert.h>
+#include "memory.h"
#include "cmark_ctype.h"
#include "buffer.h"
@@ -61,13 +62,11 @@ static CMARK_INLINE const char *cmark_chunk_to_cstr(cmark_chunk *c) {
if (c->alloc) {
return (char *)c->data;
}
- str = (unsigned char *)malloc(c->len + 1);
- if (str != NULL) {
- if (c->len > 0) {
- memcpy(str, c->data, c->len);
- }
- str[c->len] = 0;
+ str = (unsigned char *)cmark_calloc(c->len + 1, 1);
+ if (c->len > 0) {
+ memcpy(str, c->data, c->len);
}
+ str[c->len] = 0;
c->data = str;
c->alloc = 1;
@@ -84,7 +83,7 @@ static CMARK_INLINE void cmark_chunk_set_cstr(cmark_chunk *c, const char *str) {
c->alloc = 0;
} else {
c->len = strlen(str);
- c->data = (unsigned char *)malloc(c->len + 1);
+ c->data = (unsigned char *)cmark_calloc(c->len + 1, 1);
c->alloc = 1;
memcpy(c->data, str, c->len + 1);
}
diff --git a/src/cmark.c b/src/cmark.c
index 3491199..3288308 100644
--- a/src/cmark.c
+++ b/src/cmark.c
@@ -10,6 +10,36 @@ int cmark_version() { return CMARK_VERSION; }
const char *cmark_version_string() { return CMARK_VERSION_STRING; }
+void (*_cmark_on_oom)(void) = NULL;
+
+void cmark_trigger_oom(void)
+{
+ if (_cmark_on_oom)
+ _cmark_on_oom();
+ abort();
+}
+
+void cmark_set_oom_handler(void (*handler)(void))
+{
+ _cmark_on_oom = handler;
+}
+
+void *cmark_calloc(size_t nmem, size_t size)
+{
+ void *ptr = calloc(nmem, size);
+ if (!ptr)
+ cmark_trigger_oom();
+ return ptr;
+}
+
+void *cmark_realloc(void *ptr, size_t size)
+{
+ void *ptr_new = realloc(ptr, size);
+ if (!ptr_new)
+ cmark_trigger_oom();
+ return ptr_new;
+}
+
char *cmark_markdown_to_html(const char *text, size_t len, int options) {
cmark_node *doc;
char *result;
diff --git a/src/cmark.h b/src/cmark.h
index 911ceb7..a43011b 100644
--- a/src/cmark.h
+++ b/src/cmark.h
@@ -573,6 +573,20 @@ int cmark_version();
CMARK_EXPORT
const char *cmark_version_string();
+/** Set the callback function that will be issued whenever the
+ * library hits an out of memory situation.
+ *
+ * This can happen when the heap memory allocator fails to allocate
+ * a block of memory, or when the index of an in-memory buffer overflows
+ *
+ * If no OOM handler is set, the library will call `abort` and
+ * terminate itself and the running process. If the custom OOM handler
+ * you set does return (i.e. it does not gracefully terminate the
+ * application), the behavior of the library will be unspecified.
+ */
+CMARK_EXPORT
+void cmark_set_oom_handler(void (*handler)(void));
+
/** # AUTHORS
*
* John MacFarlane, Vicent Marti, Kārlis Gaņģis, Nick Wellnhofer.
diff --git a/src/inlines.c b/src/inlines.c
index 6acbb44..f2e7cf1 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -63,38 +63,16 @@ static bufsize_t subject_find_special_char(subject *subj, int options);
// Create an inline with a literal string value.
static CMARK_INLINE cmark_node *make_literal(cmark_node_type t, cmark_chunk s) {
- cmark_node *e = (cmark_node *)calloc(1, sizeof(*e));
- if (e != NULL) {
- e->type = t;
- e->as.literal = s;
- e->next = NULL;
- e->prev = NULL;
- e->parent = NULL;
- e->first_child = NULL;
- e->last_child = NULL;
- // These fields aren't used for inlines:
- e->start_line = 0;
- e->start_column = 0;
- e->end_line = 0;
- }
+ cmark_node *e = (cmark_node *)cmark_calloc(1, sizeof(*e));
+ e->type = t;
+ e->as.literal = s;
return e;
}
// Create an inline with no value.
static CMARK_INLINE cmark_node *make_simple(cmark_node_type t) {
- cmark_node *e = (cmark_node *)calloc(1, sizeof(*e));
- if (e != NULL) {
- e->type = t;
- e->next = NULL;
- e->prev = NULL;
- e->parent = NULL;
- e->first_child = NULL;
- e->last_child = NULL;
- // These fields aren't used for inlines:
- e->start_line = 0;
- e->start_column = 0;
- e->end_line = 0;
- }
+ cmark_node *e = (cmark_node *)cmark_calloc(1, sizeof(*e));
+ e->type = t;
return e;
}
@@ -116,7 +94,7 @@ static cmark_chunk chunk_clone(cmark_chunk *src) {
bufsize_t len = src->len;
c.len = len;
- c.data = (unsigned char *)malloc(len + 1);
+ c.data = (unsigned char *)cmark_calloc(len + 1, 1);
c.alloc = 1;
memcpy(c.data, src->data, len);
c.data[len] = '\0';
@@ -362,10 +340,7 @@ static void remove_delimiter(subject *subj, delimiter *delim) {
static void push_delimiter(subject *subj, unsigned char c, bool can_open,
bool can_close, cmark_node *inl_text) {
- delimiter *delim = (delimiter *)malloc(sizeof(delimiter));
- if (delim == NULL) {
- return;
- }
+ delimiter *delim = (delimiter *)cmark_calloc(1, sizeof(delimiter));
delim->delim_char = c;
delim->can_open = can_open;
delim->can_close = can_close;
diff --git a/src/iterator.c b/src/iterator.c
index 351b81f..40287f5 100644
--- a/src/iterator.c
+++ b/src/iterator.c
@@ -16,10 +16,7 @@ cmark_iter *cmark_iter_new(cmark_node *root) {
if (root == NULL) {
return NULL;
}
- cmark_iter *iter = (cmark_iter *)malloc(sizeof(cmark_iter));
- if (iter == NULL) {
- return NULL;
- }
+ cmark_iter *iter = (cmark_iter *)cmark_calloc(1, sizeof(cmark_iter));
iter->root = root;
iter->cur.ev_type = CMARK_EVENT_NONE;
iter->cur.node = NULL;
diff --git a/src/main.c b/src/main.c
index ff752e5..5ea4b61 100644
--- a/src/main.c
+++ b/src/main.c
@@ -3,6 +3,7 @@
#include <string.h>
#include <errno.h>
#include "config.h"
+#include "memory.h"
#include "cmark.h"
#if defined(_WIN32) && !defined(__CYGWIN__)
@@ -80,7 +81,7 @@ int main(int argc, char *argv[]) {
_setmode(_fileno(stdout), _O_BINARY);
#endif
- files = (int *)malloc(argc * sizeof(*files));
+ files = (int *)cmark_calloc(argc, sizeof(*files));
for (i = 1; i < argc; i++) {
if (strcmp(argv[i], "--version") == 0) {
diff --git a/src/memory.h b/src/memory.h
new file mode 100644
index 0000000..f05d566
--- /dev/null
+++ b/src/memory.h
@@ -0,0 +1,12 @@
+#ifndef CMARK_MEM_H
+#define CMARK_MEM_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void *cmark_calloc(size_t nmem, size_t size);
+void *cmark_realloc(void *ptr, size_t size);
+void cmark_trigger_oom(void);
+
+#endif
diff --git a/src/node.c b/src/node.c
index 00edbb1..30cd69f 100644
--- a/src/node.c
+++ b/src/node.c
@@ -71,7 +71,7 @@ static bool S_can_contain(cmark_node *node, cmark_node *child) {
}
cmark_node *cmark_node_new(cmark_node_type type) {
- cmark_node *node = (cmark_node *)calloc(1, sizeof(*node));
+ cmark_node *node = (cmark_node *)cmark_calloc(1, sizeof(*node));
node->type = type;
switch (node->type) {
diff --git a/src/references.c b/src/references.c
index 6cb2b20..168bd89 100755
--- a/src/references.c
+++ b/src/references.c
@@ -76,16 +76,14 @@ void cmark_reference_create(cmark_reference_map *map, cmark_chunk *label,
if (reflabel == NULL)
return;
- ref = (cmark_reference *)calloc(1, sizeof(*ref));
- if (ref != NULL) {
- ref->label = reflabel;
- ref->hash = refhash(ref->label);
- ref->url = cmark_clean_url(url);
- ref->title = cmark_clean_title(title);
- ref->next = NULL;
-
- add_reference(map, ref);
- }
+ ref = (cmark_reference *)cmark_calloc(1, sizeof(*ref));
+ ref->label = reflabel;
+ ref->hash = refhash(ref->label);
+ ref->url = cmark_clean_url(url);
+ ref->title = cmark_clean_title(title);
+ ref->next = NULL;
+
+ add_reference(map, ref);
}
// Returns reference if refmap contains a reference with matching
@@ -140,5 +138,5 @@ void cmark_reference_map_free(cmark_reference_map *map) {
}
cmark_reference_map *cmark_reference_map_new(void) {
- return (cmark_reference_map *)calloc(1, sizeof(cmark_reference_map));
+ return (cmark_reference_map *)cmark_calloc(1, sizeof(cmark_reference_map));
}
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 098321b..3b23cff 100755
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -38,11 +38,6 @@ IF (PYTHONINTERP_FOUND)
"--library-dir" "${CMAKE_CURRENT_BINARY_DIR}/../src"
)
- add_test(exhaustion_tests_executable
- ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/exhaustion_tests.py"
- "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark"
- )
-
add_test(spectest_executable
${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/spec.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark"
)
diff --git a/test/exhaustion_tests.py b/test/exhaustion_tests.py
deleted file mode 100644
index 876bb15..0000000
--- a/test/exhaustion_tests.py
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-import argparse
-import sys
-import os
-import tempfile
-from subprocess import *
-
-DATA = bytes("""
-Ph'nglui mglw'nafh Cthulhu R'lyeh wgah'nagl fhtagn. Ya r'luh hlirghor uaaah ah, nilgh'ri fhtagnnyth ilyaa gof'nn ilyaa nog zhro hupadgh yaor, ah hupadgh ph'zhro h'uh'e Yoggoth vulgtm uh'e hai mnahn'. Phlegeth fm'latgh lw'nafh Cthulhu kn'a nog 'bthnk ya ehye fm'latgh y-n'gha, y-r'luh 'fhalmaoth gnaiih ep hai ep nog lloig grah'n hafh'drn zhroyar, mnahn' uh'e nnnch' chtenff uln f'ooboshu orr'e k'yarnak hlirgh. H'vulgtm ng'bthnk n'ghft stell'bsna hai nnnkadishtu lloig nglui nilgh'ri ron, nagnaiih ronnyth phlegeth f'ep ooboshuor mg y-Hastur shagg, f'Chaugnar Faugn bug uaaah ehye kn'a geb orr'e lw'nafh. Ilyaa shtunggli uh'e naflhrii k'yarnak ya, nog k'yarnak wgah'n shagg nnnhlirgh, gof'nnyar r'luh Azathoth wgah'n.
-""", 'UTF-8')
-
-# CMark has a default maximum buffer growth size of 32mb,
-# which means that the total size of the buffer cannot be
-# larger than (32 + 16)mb in total, accounting for overgrowth
-MAX_FILE_SIZE = 48.0 # mb
-
-def exhaustion(prog):
- with tempfile.TemporaryFile() as tmp:
- p1 = Popen(prog.split(), stdout=tmp, stdin=PIPE)
- written, read = 0, 0
-
- for i in range(512 * 512):
- p1.stdin.write(DATA)
- written += len(DATA)
-
- p1.stdin.close()
- res = p1.wait()
- written = written / (1024.0 * 1024.0)
- read = tmp.tell() / (1024.0 * 1024.0)
-
- if res != 0:
- raise Exception("CMark did not exit properly")
- if written <= read:
- raise Exception("Output was not truncated (%fmb -> %fmb)" % (written, read))
- if read > MAX_FILE_SIZE:
- raise Exception("Output was not truncated at the expected range (%fmb)" % (read))
-
-if __name__ == "__main__":
- parser = argparse.ArgumentParser(description='Run cmark exhaustion tests')
- parser.add_argument('--program', dest='program', nargs='?', default=None,
- help='program to test')
- args = parser.parse_args(sys.argv[1:])
- exhaustion(args.program)