summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--api_test/main.c37
-rw-r--r--src/blocks.c48
-rw-r--r--src/buffer.c32
-rw-r--r--src/buffer.h20
-rw-r--r--src/cmark.c3
-rw-r--r--src/cmark.h32
-rw-r--r--src/inlines.c2
-rw-r--r--src/inlines.h2
-rw-r--r--src/main.c5
-rw-r--r--src/parser.h3
-rw-r--r--src/source_map.c22
-rw-r--r--src/source_map.h23
-rw-r--r--test/cmark.py2
13 files changed, 187 insertions, 44 deletions
diff --git a/api_test/main.c b/api_test/main.c
index 17e1582..61291dc 100644
--- a/api_test/main.c
+++ b/api_test/main.c
@@ -5,6 +5,7 @@
#define CMARK_NO_SHORT_NAMES
#include "cmark.h"
#include "node.h"
+#include "parser.h"
#include "harness.h"
#include "cplusplus.h"
@@ -883,6 +884,41 @@ static void test_feed_across_line_ending(test_batch_runner *runner) {
cmark_node_free(document);
}
+static cmark_node *S_parse_with_fake_total(bufsize_t fake_total,
+ const char *str,
+ cmark_err_type *err) {
+ cmark_parser *parser = cmark_parser_new(CMARK_OPT_DEFAULT);
+ parser->total_bytes = fake_total;
+ cmark_parser_feed(parser, str, strlen(str));
+ cmark_node *doc = cmark_parser_finish(parser);
+ *err = cmark_parser_get_error(parser);
+ cmark_parser_free(parser);
+ return doc;
+}
+
+static void test_bufsize_overflow(test_batch_runner *runner) {
+ cmark_node *doc;
+ cmark_err_type err;
+
+ doc = S_parse_with_fake_total(BUFSIZE_MAX, "a", &err);
+ OK(runner, doc == NULL, "parse 1 byte after BUFSIZE_MAX bytes fails");
+ INT_EQ(runner, err, CMARK_ERR_INPUT_TOO_LARGE,
+ "parse 1 byte after BUFSIZE_MAX bytes error code");
+
+ doc = S_parse_with_fake_total(BUFSIZE_MAX - 9, "0123456789", &err);
+ OK(runner, doc == NULL, "parse 10 byte after BUFSIZE_MAX-9 bytes fails");
+ INT_EQ(runner, err, CMARK_ERR_INPUT_TOO_LARGE,
+ "parse 10 byte after BUFSIZE_MAX-9 bytes error code");
+
+ doc = S_parse_with_fake_total(BUFSIZE_MAX - 1, "a", &err);
+ OK(runner, doc != NULL, "parse 1 byte after BUFSIZE_MAX-1 bytes");
+ cmark_node_free(doc);
+
+ doc = S_parse_with_fake_total(BUFSIZE_MAX - 10, "0123456789", &err);
+ OK(runner, doc != NULL, "parse 10 byte after BUFSIZE_MAX-10 bytes");
+ cmark_node_free(doc);
+}
+
int main() {
int retval;
test_batch_runner *runner = test_batch_runner_new();
@@ -908,6 +944,7 @@ int main() {
test_cplusplus(runner);
test_safe(runner);
test_feed_across_line_ending(runner);
+ test_bufsize_overflow(runner);
test_print_summary(runner);
retval = test_ok(runner) ? 0 : 1;
diff --git a/src/blocks.c b/src/blocks.c
index 1c1d160..c680535 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -96,6 +96,8 @@ cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) {
parser->refmap = cmark_reference_map_new(mem);
parser->root = document;
parser->current = document;
+ parser->error_code = CMARK_ERR_NONE;
+ parser->total_bytes = 0;
parser->line_number = 0;
parser->line_offset = 0;
parser->offset = 0;
@@ -550,6 +552,20 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
const unsigned char *skipped;
static const uint8_t repl[] = {239, 191, 189};
+ if (parser->error_code) {
+ return;
+ }
+
+ // Limit maximum document size to BUFSIZE_MAX. This makes sure that we
+ // never create strbufs larger than BUFSIZE_MAX. Unfortunately, the
+ // public API doesn't have an error reporting mechanism, so all we can
+ // do is to abort.
+ if (len > (size_t)(BUFSIZE_MAX - parser->total_bytes)) {
+ parser->error_code = CMARK_ERR_INPUT_TOO_LARGE;
+ return;
+ }
+ parser->total_bytes += (bufsize_t)len;
+
if (parser->last_buffer_ended_with_cr && *buffer == '\n') {
// skip NL if last buffer ended with CR ; see #117
buffer++;
@@ -1266,14 +1282,19 @@ cmark_node *cmark_parser_finish(cmark_parser *parser) {
cmark_strbuf_clear(&parser->linebuf);
}
+ cmark_strbuf_clear(&parser->curline);
+
+ if (parser->error_code) {
+ cmark_node_free(parser->root);
+ return NULL;
+ }
+
finalize_document(parser);
if (parser->options & CMARK_OPT_NORMALIZE) {
cmark_consolidate_text_nodes(parser->root);
}
- cmark_strbuf_free(&parser->curline);
-
#if CMARK_DEBUG_NODES
if (cmark_node_check(parser->root, stderr)) {
abort();
@@ -1287,3 +1308,26 @@ cmark_parser_get_first_source_extent(cmark_parser *parser)
{
return parser->source_map->head;
}
+
+cmark_err_type cmark_parser_get_error(cmark_parser *parser) {
+ return parser->error_code;
+}
+
+const char *cmark_parser_get_error_message(cmark_parser *parser) {
+ const char *str = NULL;
+
+ switch (parser->error_code) {
+ case CMARK_ERR_OUT_OF_MEMORY:
+ str = "Out of memory";
+ break;
+ case CMARK_ERR_INPUT_TOO_LARGE:
+ str = "Input too large";
+ break;
+ default:
+ str = "Unknown error";
+ break;
+ }
+
+ return str;
+}
+
diff --git a/src/buffer.c b/src/buffer.c
index a6754b6..9a9e9ad 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -33,6 +33,11 @@ void cmark_strbuf_init(cmark_mem *mem, cmark_strbuf *buf,
}
static CMARK_INLINE void S_strbuf_grow_by(cmark_strbuf *buf, bufsize_t add) {
+ // Safety check for overflow.
+ if (add > BUFSIZE_MAX - buf->size) {
+ fprintf(stderr, "Internal cmark_strbuf overflow");
+ abort();
+ }
cmark_strbuf_grow(buf, buf->size + add);
}
@@ -42,18 +47,25 @@ void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) {
if (target_size < buf->asize)
return;
- if (target_size > (bufsize_t)(INT32_MAX / 2))
- abort();
-
- /* Oversize the buffer by 50% to guarantee amortized linear time
- * complexity on append operations. */
- bufsize_t new_size = target_size + target_size / 2;
- new_size += 1;
- new_size = (new_size + 7) & ~7;
+ // Oversize the buffer by 50% to guarantee amortized linear time
+ // complexity on append operations.
+ bufsize_t add = target_size / 2;
+ // Account for terminating NUL byte.
+ add += 1;
+ // Round up to multiple of eight.
+ add = (add + 7) & ~7;
+
+ // Check for overflow but allow an additional NUL byte.
+ if (target_size + add > BUFSIZE_MAX + 1) {
+ target_size = BUFSIZE_MAX + 1;
+ }
+ else {
+ target_size += add;
+ }
buf->ptr = (unsigned char *)buf->mem->realloc(buf->asize ? buf->ptr : NULL,
- new_size);
- buf->asize = new_size;
+ target_size);
+ buf->asize = target_size;
}
bufsize_t cmark_strbuf_len(const cmark_strbuf *buf) { return buf->size; }
diff --git a/src/buffer.h b/src/buffer.h
index e878075..7f31a74 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -13,8 +13,28 @@
extern "C" {
#endif
+#ifndef CMARK_HUGE_DOCS
+
+// Maximum strbuf size without terminating NUL byte.
+#define BUFSIZE_MAX (INT32_MAX - 1)
+
typedef int32_t bufsize_t;
+#else // CMARK_HUGE_DOCS
+
+// This is an untested proof of concept of how to handle multi-gigabyte
+// documents on 64-bit platforms at the expense of internal struct sizes.
+
+#ifdef PTRDIFF_MAX
+ #define BUFSIZE_MAX (PTRDIFF_MAX - 1)
+#else
+ #define BUFSIZE_MAX (ptrdiff_t)((size_t)-1 / 2)
+#endif
+
+typedef ptrdiff_t bufsize_t;
+
+#endif // CMARK_HUGE_DOCS
+
typedef struct {
cmark_mem *mem;
unsigned char *ptr;
diff --git a/src/cmark.c b/src/cmark.c
index 2ef6cb4..da93abe 100644
--- a/src/cmark.c
+++ b/src/cmark.c
@@ -36,6 +36,9 @@ char *cmark_markdown_to_html(const char *text, size_t len, int options) {
char *result;
doc = cmark_parse_document(text, len, options);
+ if (doc == NULL) {
+ return NULL;
+ }
result = cmark_render_html(doc, options);
cmark_node_free(doc);
diff --git a/src/cmark.h b/src/cmark.h
index 034f0e6..5ce6d10 100644
--- a/src/cmark.h
+++ b/src/cmark.h
@@ -2,7 +2,6 @@
#define CMARK_H
#include <stdio.h>
-#include <stdint.h>
#include <cmark_export.h>
#include <cmark_version.h>
@@ -23,7 +22,7 @@ extern "C" {
/** Convert 'text' (assumed to be a UTF-8 encoded string with length
* 'len') from CommonMark Markdown to HTML, returning a null-terminated,
* UTF-8-encoded string. It is the caller's responsibility
- * to free the returned buffer.
+ * to free the returned buffer. Returns NULL on error.
*/
CMARK_EXPORT
char *cmark_markdown_to_html(const char *text, size_t len, int options);
@@ -99,6 +98,12 @@ typedef enum {
CMARK_PAREN_DELIM
} cmark_delim_type;
+typedef enum {
+ CMARK_ERR_NONE,
+ CMARK_ERR_OUT_OF_MEMORY,
+ CMARK_ERR_INPUT_TOO_LARGE
+} cmark_err_type;
+
typedef struct cmark_node cmark_node;
typedef struct cmark_parser cmark_parser;
typedef struct cmark_iter cmark_iter;
@@ -489,12 +494,22 @@ cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem);
CMARK_EXPORT
void cmark_parser_free(cmark_parser *parser);
+/** Return the error code after a failed operation.
+ */
+CMARK_EXPORT
+cmark_err_type cmark_parser_get_error(cmark_parser *parser);
+
+/** Return the error code after a failed operation.
+ */
+CMARK_EXPORT
+const char *cmark_parser_get_error_message(cmark_parser *parser);
+
/** Feeds a string of length 'len' to 'parser'.
*/
CMARK_EXPORT
void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len);
-/** Finish parsing and return a pointer to a tree of nodes.
+/** Finish parsing and return a pointer to a tree of nodes or NULL on error.
*/
CMARK_EXPORT
cmark_node *cmark_parser_finish(cmark_parser *parser);
@@ -507,7 +522,7 @@ cmark_source_extent *cmark_parser_get_first_source_extent(cmark_parser *parser);
/** Parse a CommonMark document in 'buffer' of length 'len'.
* Returns a pointer to a tree of nodes. The memory allocated for
* the node tree should be released using 'cmark_node_free'
- * when it is no longer needed.
+ * when it is no longer needed. Returns NULL on error.
*/
CMARK_EXPORT
cmark_node *cmark_parse_document(const char *buffer, size_t len, int options);
@@ -515,22 +530,23 @@ cmark_node *cmark_parse_document(const char *buffer, size_t len, int options);
/** Parse a CommonMark document in file 'f', returning a pointer to
* a tree of nodes. The memory allocated for the node tree should be
* released using 'cmark_node_free' when it is no longer needed.
+ * Returns NULL on error.
*/
CMARK_EXPORT
cmark_node *cmark_parse_file(FILE *f, int options);
-/**
+/**
* ## Source map API
*/
/* Return the index, in bytes, of the start of this extent */
CMARK_EXPORT
-uint64_t cmark_source_extent_get_start(cmark_source_extent *extent);
+size_t cmark_source_extent_get_start(cmark_source_extent *extent);
-/* Return the index, in bytes, of the stop of this extent. This
+/* Return the index, in bytes, of the stop of this extent. This
* index is not included in the extent*/
CMARK_EXPORT
-uint64_t cmark_source_extent_get_stop(cmark_source_extent *extent);
+size_t cmark_source_extent_get_stop(cmark_source_extent *extent);
/* Return the extent immediately following 'extent' */
CMARK_EXPORT
diff --git a/src/inlines.c b/src/inlines.c
index 96da28b..099078e 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -1230,7 +1230,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
// Parse inlines from parent's string_content, adding as children of parent.
extern void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent,
cmark_reference_map *refmap, int options,
- cmark_source_map *source_map, uint64_t total_length) {
+ cmark_source_map *source_map, bufsize_t total_length) {
subject subj;
subject_from_buf(mem, &subj, &parent->content, refmap, source_map);
bufsize_t initial_len = subj.input.len;
diff --git a/src/inlines.h b/src/inlines.h
index 8de31b1..8459794 100644
--- a/src/inlines.h
+++ b/src/inlines.h
@@ -14,7 +14,7 @@ cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title);
void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent,
cmark_reference_map *refmap, int options,
- cmark_source_map *source_map, uint64_t total_length);
+ cmark_source_map *source_map, bufsize_t total_length);
bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input,
cmark_reference_map *refmap,
diff --git a/src/main.c b/src/main.c
index 42cd8b1..aeb81de 100644
--- a/src/main.c
+++ b/src/main.c
@@ -181,6 +181,11 @@ int main(int argc, char *argv[]) {
document = cmark_parser_finish(parser);
cmark_parser_free(parser);
+ if (document == NULL) {
+ fprintf(stderr, "%s", cmark_parser_get_error_message(parser));
+ exit(1);
+ }
+
print_document(document, writer, options, width);
cmark_node_free(document);
diff --git a/src/parser.h b/src/parser.h
index b28a8a7..7b4fdbc 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -2,6 +2,7 @@
#define CMARK_AST_H
#include <stdio.h>
+#include "cmark.h"
#include "node.h"
#include "buffer.h"
#include "memory.h"
@@ -18,6 +19,8 @@ struct cmark_parser {
struct cmark_reference_map *refmap;
struct cmark_node *root;
struct cmark_node *current;
+ cmark_err_type error_code;
+ bufsize_t total_bytes;
int line_number;
bufsize_t offset;
bufsize_t column;
diff --git a/src/source_map.c b/src/source_map.c
index db01a21..dccbe7c 100644
--- a/src/source_map.c
+++ b/src/source_map.c
@@ -19,7 +19,7 @@ source_map_free(cmark_source_map *self)
}
cmark_source_extent *
-source_map_append_extent(cmark_source_map *self, uint64_t start, uint64_t stop, cmark_node *node, cmark_extent_type type)
+source_map_append_extent(cmark_source_map *self, bufsize_t start, bufsize_t stop, cmark_node *node, cmark_extent_type type)
{
assert (start <= stop);
assert (!self->tail || self->tail->stop <= start);
@@ -46,7 +46,7 @@ source_map_append_extent(cmark_source_map *self, uint64_t start, uint64_t stop,
cmark_source_extent *
source_map_insert_extent(cmark_source_map *self, cmark_source_extent *previous,
- uint64_t start, uint64_t stop, cmark_node *node, cmark_extent_type type)
+ bufsize_t start, bufsize_t stop, cmark_node *node, cmark_extent_type type)
{
if (start == stop)
return previous;
@@ -101,7 +101,7 @@ source_map_free_extent(cmark_source_map *self, cmark_source_extent *extent)
cmark_source_extent *
source_map_stitch_extent(cmark_source_map *self, cmark_source_extent *extent,
- cmark_node *node, uint64_t total_length)
+ cmark_node *node, bufsize_t total_length)
{
cmark_source_extent *next_extent = extent->next;
cmark_source_extent *res;
@@ -135,7 +135,7 @@ source_map_stitch_extent(cmark_source_map *self, cmark_source_extent *extent,
}
cmark_source_extent *
-source_map_splice_extent(cmark_source_map *self, uint64_t start, uint64_t stop,
+source_map_splice_extent(cmark_source_map *self, bufsize_t start, bufsize_t stop,
cmark_node *node, cmark_extent_type type)
{
if (!self->next_cursor) {
@@ -154,7 +154,7 @@ source_map_splice_extent(cmark_source_map *self, uint64_t start, uint64_t stop,
return self->cursor;
} else if (start + self->cursor_offset < self->next_cursor->start) {
- uint64_t new_start = self->next_cursor->start - self->cursor_offset;
+ bufsize_t new_start = self->next_cursor->start - self->cursor_offset;
self->cursor = source_map_insert_extent(self,
self->cursor,
@@ -196,17 +196,17 @@ source_map_pretty_print(cmark_source_map *self) {
cmark_source_extent *tmp;
for (tmp = self->head; tmp; tmp = tmp->next) {
- printf ("%lu:%lu - %s, %s (%p)\n", tmp->start, tmp->stop,
- cmark_node_get_type_string(tmp->node),
+ printf ("%d:%d - %s, %s (%p)\n", tmp->start, tmp->stop,
+ cmark_node_get_type_string(tmp->node),
cmark_source_extent_get_type_string(tmp),
(void *) tmp->node);
}
}
bool
-source_map_check(cmark_source_map *self, uint64_t total_length)
+source_map_check(cmark_source_map *self, bufsize_t total_length)
{
- uint64_t last_stop = 0;
+ bufsize_t last_stop = 0;
cmark_source_extent *tmp;
for (tmp = self->head; tmp; tmp = tmp->next) {
@@ -224,13 +224,13 @@ source_map_check(cmark_source_map *self, uint64_t total_length)
}
-uint64_t
+size_t
cmark_source_extent_get_start(cmark_source_extent *extent)
{
return extent->start;
}
-uint64_t
+size_t
cmark_source_extent_get_stop(cmark_source_extent *extent)
{
return extent->stop;
diff --git a/src/source_map.h b/src/source_map.h
index 619a073..dca5a9f 100644
--- a/src/source_map.h
+++ b/src/source_map.h
@@ -3,6 +3,7 @@
#include "cmark.h"
#include "config.h"
+#include "buffer.h"
typedef struct _cmark_source_map
{
@@ -10,14 +11,14 @@ typedef struct _cmark_source_map
cmark_source_extent *tail;
cmark_source_extent *cursor;
cmark_source_extent *next_cursor;
- uint64_t cursor_offset;
+ bufsize_t cursor_offset;
cmark_mem *mem;
} cmark_source_map;
struct cmark_source_extent
{
- uint64_t start;
- uint64_t stop;
+ bufsize_t start;
+ bufsize_t stop;
struct cmark_source_extent *next;
struct cmark_source_extent *prev;
cmark_node *node;
@@ -29,20 +30,20 @@ cmark_source_map * source_map_new (cmark_mem *mem);
void source_map_free (cmark_source_map *self);
bool source_map_check (cmark_source_map *self,
- uint64_t total_length);
+ bufsize_t total_length);
void source_map_pretty_print (cmark_source_map *self);
cmark_source_extent * source_map_append_extent(cmark_source_map *self,
- uint64_t start,
- uint64_t stop,
+ bufsize_t start,
+ bufsize_t stop,
cmark_node *node,
cmark_extent_type type);
cmark_source_extent * source_map_insert_extent(cmark_source_map *self,
cmark_source_extent *previous,
- uint64_t start,
- uint64_t stop,
+ bufsize_t start,
+ bufsize_t stop,
cmark_node *node,
cmark_extent_type type);
@@ -52,11 +53,11 @@ cmark_source_extent * source_map_free_extent (cmark_source_map *self,
cmark_source_extent * source_map_stitch_extent(cmark_source_map *self,
cmark_source_extent *extent,
cmark_node *node,
- uint64_t total_length);
+ bufsize_t total_length);
cmark_source_extent * source_map_splice_extent(cmark_source_map *self,
- uint64_t start,
- uint64_t stop,
+ bufsize_t start,
+ bufsize_t stop,
cmark_node *node,
cmark_extent_type type);
diff --git a/test/cmark.py b/test/cmark.py
index 4be85a3..f4ff576 100644
--- a/test/cmark.py
+++ b/test/cmark.py
@@ -30,6 +30,8 @@ def to_commonmark(lib, text):
render_commonmark.restype = c_char_p
render_commonmark.argtypes = [c_void_p, c_int, c_int]
node = parse_document(textbytes, textlen, 0)
+ if node is None:
+ raise Exception("parse_document failed")
result = render_commonmark(node, 0, 0).decode('utf-8')
return [0, result, '']