summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorNick Wellnhofer <wellnhofer@aevum.de>2016-12-31 02:25:16 +0100
committerJohn MacFarlane <jgm@berkeley.edu>2016-12-30 18:25:16 -0700
commit4fbe344df43ed7f60a3d3a53981088334cb709fc (patch)
treef7f8264d06cb31024f169fda11c5d72821510c51 /src
parentc4c1d59ca29aceb1c5919908ac97d9476264fd96 (diff)
Change types for source map offsets (#174)
* Improve strbuf guarantees Introduce BUFSIZE_MAX macro and make sure that the strbuf implementation can handle strings up to this size. * Abort early if document size exceeds internal limit * Change types for source map offsets Switch to size_t for the public API, making the public headers C89-compatible again. Switch to bufsize_t internally, reducing memory usage and improving performance on 32-bit platforms. * Make parser return NULL on internal index overflow Make S_parser_feed set an error and ignore subsequent chunks if the total input document size exceeds an internal limit. Make cmark_parser_finish return NULL if an error was encountered. Add public API functions to retrieve error code and error message. strbuf overflow in renderers and OOM in parser or renderers still cause an abort.
Diffstat (limited to 'src')
-rw-r--r--src/blocks.c48
-rw-r--r--src/buffer.c32
-rw-r--r--src/buffer.h20
-rw-r--r--src/cmark.c3
-rw-r--r--src/cmark.h32
-rw-r--r--src/inlines.c2
-rw-r--r--src/inlines.h2
-rw-r--r--src/main.c5
-rw-r--r--src/parser.h3
-rw-r--r--src/source_map.c22
-rw-r--r--src/source_map.h23
11 files changed, 148 insertions, 44 deletions
diff --git a/src/blocks.c b/src/blocks.c
index 1c1d160..c680535 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -96,6 +96,8 @@ cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) {
parser->refmap = cmark_reference_map_new(mem);
parser->root = document;
parser->current = document;
+ parser->error_code = CMARK_ERR_NONE;
+ parser->total_bytes = 0;
parser->line_number = 0;
parser->line_offset = 0;
parser->offset = 0;
@@ -550,6 +552,20 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
const unsigned char *skipped;
static const uint8_t repl[] = {239, 191, 189};
+ if (parser->error_code) {
+ return;
+ }
+
+ // Limit maximum document size to BUFSIZE_MAX. This makes sure that we
+ // never create strbufs larger than BUFSIZE_MAX. Unfortunately, the
+ // public API doesn't have an error reporting mechanism, so all we can
+ // do is to abort.
+ if (len > (size_t)(BUFSIZE_MAX - parser->total_bytes)) {
+ parser->error_code = CMARK_ERR_INPUT_TOO_LARGE;
+ return;
+ }
+ parser->total_bytes += (bufsize_t)len;
+
if (parser->last_buffer_ended_with_cr && *buffer == '\n') {
// skip NL if last buffer ended with CR ; see #117
buffer++;
@@ -1266,14 +1282,19 @@ cmark_node *cmark_parser_finish(cmark_parser *parser) {
cmark_strbuf_clear(&parser->linebuf);
}
+ cmark_strbuf_clear(&parser->curline);
+
+ if (parser->error_code) {
+ cmark_node_free(parser->root);
+ return NULL;
+ }
+
finalize_document(parser);
if (parser->options & CMARK_OPT_NORMALIZE) {
cmark_consolidate_text_nodes(parser->root);
}
- cmark_strbuf_free(&parser->curline);
-
#if CMARK_DEBUG_NODES
if (cmark_node_check(parser->root, stderr)) {
abort();
@@ -1287,3 +1308,26 @@ cmark_parser_get_first_source_extent(cmark_parser *parser)
{
return parser->source_map->head;
}
+
+cmark_err_type cmark_parser_get_error(cmark_parser *parser) {
+ return parser->error_code;
+}
+
+const char *cmark_parser_get_error_message(cmark_parser *parser) {
+ const char *str = NULL;
+
+ switch (parser->error_code) {
+ case CMARK_ERR_OUT_OF_MEMORY:
+ str = "Out of memory";
+ break;
+ case CMARK_ERR_INPUT_TOO_LARGE:
+ str = "Input too large";
+ break;
+ default:
+ str = "Unknown error";
+ break;
+ }
+
+ return str;
+}
+
diff --git a/src/buffer.c b/src/buffer.c
index a6754b6..9a9e9ad 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -33,6 +33,11 @@ void cmark_strbuf_init(cmark_mem *mem, cmark_strbuf *buf,
}
static CMARK_INLINE void S_strbuf_grow_by(cmark_strbuf *buf, bufsize_t add) {
+ // Safety check for overflow.
+ if (add > BUFSIZE_MAX - buf->size) {
+ fprintf(stderr, "Internal cmark_strbuf overflow");
+ abort();
+ }
cmark_strbuf_grow(buf, buf->size + add);
}
@@ -42,18 +47,25 @@ void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) {
if (target_size < buf->asize)
return;
- if (target_size > (bufsize_t)(INT32_MAX / 2))
- abort();
-
- /* Oversize the buffer by 50% to guarantee amortized linear time
- * complexity on append operations. */
- bufsize_t new_size = target_size + target_size / 2;
- new_size += 1;
- new_size = (new_size + 7) & ~7;
+ // Oversize the buffer by 50% to guarantee amortized linear time
+ // complexity on append operations.
+ bufsize_t add = target_size / 2;
+ // Account for terminating NUL byte.
+ add += 1;
+ // Round up to multiple of eight.
+ add = (add + 7) & ~7;
+
+ // Check for overflow but allow an additional NUL byte.
+ if (target_size + add > BUFSIZE_MAX + 1) {
+ target_size = BUFSIZE_MAX + 1;
+ }
+ else {
+ target_size += add;
+ }
buf->ptr = (unsigned char *)buf->mem->realloc(buf->asize ? buf->ptr : NULL,
- new_size);
- buf->asize = new_size;
+ target_size);
+ buf->asize = target_size;
}
bufsize_t cmark_strbuf_len(const cmark_strbuf *buf) { return buf->size; }
diff --git a/src/buffer.h b/src/buffer.h
index e878075..7f31a74 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -13,8 +13,28 @@
extern "C" {
#endif
+#ifndef CMARK_HUGE_DOCS
+
+// Maximum strbuf size without terminating NUL byte.
+#define BUFSIZE_MAX (INT32_MAX - 1)
+
typedef int32_t bufsize_t;
+#else // CMARK_HUGE_DOCS
+
+// This is an untested proof of concept of how to handle multi-gigabyte
+// documents on 64-bit platforms at the expense of internal struct sizes.
+
+#ifdef PTRDIFF_MAX
+ #define BUFSIZE_MAX (PTRDIFF_MAX - 1)
+#else
+ #define BUFSIZE_MAX (ptrdiff_t)((size_t)-1 / 2)
+#endif
+
+typedef ptrdiff_t bufsize_t;
+
+#endif // CMARK_HUGE_DOCS
+
typedef struct {
cmark_mem *mem;
unsigned char *ptr;
diff --git a/src/cmark.c b/src/cmark.c
index 2ef6cb4..da93abe 100644
--- a/src/cmark.c
+++ b/src/cmark.c
@@ -36,6 +36,9 @@ char *cmark_markdown_to_html(const char *text, size_t len, int options) {
char *result;
doc = cmark_parse_document(text, len, options);
+ if (doc == NULL) {
+ return NULL;
+ }
result = cmark_render_html(doc, options);
cmark_node_free(doc);
diff --git a/src/cmark.h b/src/cmark.h
index 034f0e6..5ce6d10 100644
--- a/src/cmark.h
+++ b/src/cmark.h
@@ -2,7 +2,6 @@
#define CMARK_H
#include <stdio.h>
-#include <stdint.h>
#include <cmark_export.h>
#include <cmark_version.h>
@@ -23,7 +22,7 @@ extern "C" {
/** Convert 'text' (assumed to be a UTF-8 encoded string with length
* 'len') from CommonMark Markdown to HTML, returning a null-terminated,
* UTF-8-encoded string. It is the caller's responsibility
- * to free the returned buffer.
+ * to free the returned buffer. Returns NULL on error.
*/
CMARK_EXPORT
char *cmark_markdown_to_html(const char *text, size_t len, int options);
@@ -99,6 +98,12 @@ typedef enum {
CMARK_PAREN_DELIM
} cmark_delim_type;
+typedef enum {
+ CMARK_ERR_NONE,
+ CMARK_ERR_OUT_OF_MEMORY,
+ CMARK_ERR_INPUT_TOO_LARGE
+} cmark_err_type;
+
typedef struct cmark_node cmark_node;
typedef struct cmark_parser cmark_parser;
typedef struct cmark_iter cmark_iter;
@@ -489,12 +494,22 @@ cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem);
CMARK_EXPORT
void cmark_parser_free(cmark_parser *parser);
+/** Return the error code after a failed operation.
+ */
+CMARK_EXPORT
+cmark_err_type cmark_parser_get_error(cmark_parser *parser);
+
+/** Return the error code after a failed operation.
+ */
+CMARK_EXPORT
+const char *cmark_parser_get_error_message(cmark_parser *parser);
+
/** Feeds a string of length 'len' to 'parser'.
*/
CMARK_EXPORT
void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len);
-/** Finish parsing and return a pointer to a tree of nodes.
+/** Finish parsing and return a pointer to a tree of nodes or NULL on error.
*/
CMARK_EXPORT
cmark_node *cmark_parser_finish(cmark_parser *parser);
@@ -507,7 +522,7 @@ cmark_source_extent *cmark_parser_get_first_source_extent(cmark_parser *parser);
/** Parse a CommonMark document in 'buffer' of length 'len'.
* Returns a pointer to a tree of nodes. The memory allocated for
* the node tree should be released using 'cmark_node_free'
- * when it is no longer needed.
+ * when it is no longer needed. Returns NULL on error.
*/
CMARK_EXPORT
cmark_node *cmark_parse_document(const char *buffer, size_t len, int options);
@@ -515,22 +530,23 @@ cmark_node *cmark_parse_document(const char *buffer, size_t len, int options);
/** Parse a CommonMark document in file 'f', returning a pointer to
* a tree of nodes. The memory allocated for the node tree should be
* released using 'cmark_node_free' when it is no longer needed.
+ * Returns NULL on error.
*/
CMARK_EXPORT
cmark_node *cmark_parse_file(FILE *f, int options);
-/**
+/**
* ## Source map API
*/
/* Return the index, in bytes, of the start of this extent */
CMARK_EXPORT
-uint64_t cmark_source_extent_get_start(cmark_source_extent *extent);
+size_t cmark_source_extent_get_start(cmark_source_extent *extent);
-/* Return the index, in bytes, of the stop of this extent. This
+/* Return the index, in bytes, of the stop of this extent. This
* index is not included in the extent*/
CMARK_EXPORT
-uint64_t cmark_source_extent_get_stop(cmark_source_extent *extent);
+size_t cmark_source_extent_get_stop(cmark_source_extent *extent);
/* Return the extent immediately following 'extent' */
CMARK_EXPORT
diff --git a/src/inlines.c b/src/inlines.c
index 96da28b..099078e 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -1230,7 +1230,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
// Parse inlines from parent's string_content, adding as children of parent.
extern void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent,
cmark_reference_map *refmap, int options,
- cmark_source_map *source_map, uint64_t total_length) {
+ cmark_source_map *source_map, bufsize_t total_length) {
subject subj;
subject_from_buf(mem, &subj, &parent->content, refmap, source_map);
bufsize_t initial_len = subj.input.len;
diff --git a/src/inlines.h b/src/inlines.h
index 8de31b1..8459794 100644
--- a/src/inlines.h
+++ b/src/inlines.h
@@ -14,7 +14,7 @@ cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title);
void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent,
cmark_reference_map *refmap, int options,
- cmark_source_map *source_map, uint64_t total_length);
+ cmark_source_map *source_map, bufsize_t total_length);
bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input,
cmark_reference_map *refmap,
diff --git a/src/main.c b/src/main.c
index 42cd8b1..aeb81de 100644
--- a/src/main.c
+++ b/src/main.c
@@ -181,6 +181,11 @@ int main(int argc, char *argv[]) {
document = cmark_parser_finish(parser);
cmark_parser_free(parser);
+ if (document == NULL) {
+ fprintf(stderr, "%s", cmark_parser_get_error_message(parser));
+ exit(1);
+ }
+
print_document(document, writer, options, width);
cmark_node_free(document);
diff --git a/src/parser.h b/src/parser.h
index b28a8a7..7b4fdbc 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -2,6 +2,7 @@
#define CMARK_AST_H
#include <stdio.h>
+#include "cmark.h"
#include "node.h"
#include "buffer.h"
#include "memory.h"
@@ -18,6 +19,8 @@ struct cmark_parser {
struct cmark_reference_map *refmap;
struct cmark_node *root;
struct cmark_node *current;
+ cmark_err_type error_code;
+ bufsize_t total_bytes;
int line_number;
bufsize_t offset;
bufsize_t column;
diff --git a/src/source_map.c b/src/source_map.c
index db01a21..dccbe7c 100644
--- a/src/source_map.c
+++ b/src/source_map.c
@@ -19,7 +19,7 @@ source_map_free(cmark_source_map *self)
}
cmark_source_extent *
-source_map_append_extent(cmark_source_map *self, uint64_t start, uint64_t stop, cmark_node *node, cmark_extent_type type)
+source_map_append_extent(cmark_source_map *self, bufsize_t start, bufsize_t stop, cmark_node *node, cmark_extent_type type)
{
assert (start <= stop);
assert (!self->tail || self->tail->stop <= start);
@@ -46,7 +46,7 @@ source_map_append_extent(cmark_source_map *self, uint64_t start, uint64_t stop,
cmark_source_extent *
source_map_insert_extent(cmark_source_map *self, cmark_source_extent *previous,
- uint64_t start, uint64_t stop, cmark_node *node, cmark_extent_type type)
+ bufsize_t start, bufsize_t stop, cmark_node *node, cmark_extent_type type)
{
if (start == stop)
return previous;
@@ -101,7 +101,7 @@ source_map_free_extent(cmark_source_map *self, cmark_source_extent *extent)
cmark_source_extent *
source_map_stitch_extent(cmark_source_map *self, cmark_source_extent *extent,
- cmark_node *node, uint64_t total_length)
+ cmark_node *node, bufsize_t total_length)
{
cmark_source_extent *next_extent = extent->next;
cmark_source_extent *res;
@@ -135,7 +135,7 @@ source_map_stitch_extent(cmark_source_map *self, cmark_source_extent *extent,
}
cmark_source_extent *
-source_map_splice_extent(cmark_source_map *self, uint64_t start, uint64_t stop,
+source_map_splice_extent(cmark_source_map *self, bufsize_t start, bufsize_t stop,
cmark_node *node, cmark_extent_type type)
{
if (!self->next_cursor) {
@@ -154,7 +154,7 @@ source_map_splice_extent(cmark_source_map *self, uint64_t start, uint64_t stop,
return self->cursor;
} else if (start + self->cursor_offset < self->next_cursor->start) {
- uint64_t new_start = self->next_cursor->start - self->cursor_offset;
+ bufsize_t new_start = self->next_cursor->start - self->cursor_offset;
self->cursor = source_map_insert_extent(self,
self->cursor,
@@ -196,17 +196,17 @@ source_map_pretty_print(cmark_source_map *self) {
cmark_source_extent *tmp;
for (tmp = self->head; tmp; tmp = tmp->next) {
- printf ("%lu:%lu - %s, %s (%p)\n", tmp->start, tmp->stop,
- cmark_node_get_type_string(tmp->node),
+ printf ("%d:%d - %s, %s (%p)\n", tmp->start, tmp->stop,
+ cmark_node_get_type_string(tmp->node),
cmark_source_extent_get_type_string(tmp),
(void *) tmp->node);
}
}
bool
-source_map_check(cmark_source_map *self, uint64_t total_length)
+source_map_check(cmark_source_map *self, bufsize_t total_length)
{
- uint64_t last_stop = 0;
+ bufsize_t last_stop = 0;
cmark_source_extent *tmp;
for (tmp = self->head; tmp; tmp = tmp->next) {
@@ -224,13 +224,13 @@ source_map_check(cmark_source_map *self, uint64_t total_length)
}
-uint64_t
+size_t
cmark_source_extent_get_start(cmark_source_extent *extent)
{
return extent->start;
}
-uint64_t
+size_t
cmark_source_extent_get_stop(cmark_source_extent *extent)
{
return extent->stop;
diff --git a/src/source_map.h b/src/source_map.h
index 619a073..dca5a9f 100644
--- a/src/source_map.h
+++ b/src/source_map.h
@@ -3,6 +3,7 @@
#include "cmark.h"
#include "config.h"
+#include "buffer.h"
typedef struct _cmark_source_map
{
@@ -10,14 +11,14 @@ typedef struct _cmark_source_map
cmark_source_extent *tail;
cmark_source_extent *cursor;
cmark_source_extent *next_cursor;
- uint64_t cursor_offset;
+ bufsize_t cursor_offset;
cmark_mem *mem;
} cmark_source_map;
struct cmark_source_extent
{
- uint64_t start;
- uint64_t stop;
+ bufsize_t start;
+ bufsize_t stop;
struct cmark_source_extent *next;
struct cmark_source_extent *prev;
cmark_node *node;
@@ -29,20 +30,20 @@ cmark_source_map * source_map_new (cmark_mem *mem);
void source_map_free (cmark_source_map *self);
bool source_map_check (cmark_source_map *self,
- uint64_t total_length);
+ bufsize_t total_length);
void source_map_pretty_print (cmark_source_map *self);
cmark_source_extent * source_map_append_extent(cmark_source_map *self,
- uint64_t start,
- uint64_t stop,
+ bufsize_t start,
+ bufsize_t stop,
cmark_node *node,
cmark_extent_type type);
cmark_source_extent * source_map_insert_extent(cmark_source_map *self,
cmark_source_extent *previous,
- uint64_t start,
- uint64_t stop,
+ bufsize_t start,
+ bufsize_t stop,
cmark_node *node,
cmark_extent_type type);
@@ -52,11 +53,11 @@ cmark_source_extent * source_map_free_extent (cmark_source_map *self,
cmark_source_extent * source_map_stitch_extent(cmark_source_map *self,
cmark_source_extent *extent,
cmark_node *node,
- uint64_t total_length);
+ bufsize_t total_length);
cmark_source_extent * source_map_splice_extent(cmark_source_map *self,
- uint64_t start,
- uint64_t stop,
+ bufsize_t start,
+ bufsize_t stop,
cmark_node *node,
cmark_extent_type type);