From 60b6962db0b0488667180e11cc6cfb1cec1b41ea Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 3 Jan 2017 22:08:38 -0800 Subject: Revert "Change types for source map offsets (#174)" This reverts commit 4fbe344df43ed7f60a3d3a53981088334cb709fc. --- api_test/main.c | 37 ------------------------------------- src/blocks.c | 48 ++---------------------------------------------- src/buffer.c | 32 ++++++++++---------------------- src/buffer.h | 20 -------------------- src/cmark.c | 3 --- src/cmark.h | 32 ++++++++------------------------ src/inlines.c | 2 +- src/inlines.h | 2 +- src/main.c | 5 ----- src/parser.h | 3 --- src/source_map.c | 22 +++++++++++----------- src/source_map.h | 23 +++++++++++------------ test/cmark.py | 2 -- 13 files changed, 44 insertions(+), 187 deletions(-) diff --git a/api_test/main.c b/api_test/main.c index 61291dc..17e1582 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -5,7 +5,6 @@ #define CMARK_NO_SHORT_NAMES #include "cmark.h" #include "node.h" -#include "parser.h" #include "harness.h" #include "cplusplus.h" @@ -884,41 +883,6 @@ static void test_feed_across_line_ending(test_batch_runner *runner) { cmark_node_free(document); } -static cmark_node *S_parse_with_fake_total(bufsize_t fake_total, - const char *str, - cmark_err_type *err) { - cmark_parser *parser = cmark_parser_new(CMARK_OPT_DEFAULT); - parser->total_bytes = fake_total; - cmark_parser_feed(parser, str, strlen(str)); - cmark_node *doc = cmark_parser_finish(parser); - *err = cmark_parser_get_error(parser); - cmark_parser_free(parser); - return doc; -} - -static void test_bufsize_overflow(test_batch_runner *runner) { - cmark_node *doc; - cmark_err_type err; - - doc = S_parse_with_fake_total(BUFSIZE_MAX, "a", &err); - OK(runner, doc == NULL, "parse 1 byte after BUFSIZE_MAX bytes fails"); - INT_EQ(runner, err, CMARK_ERR_INPUT_TOO_LARGE, - "parse 1 byte after BUFSIZE_MAX bytes error code"); - - doc = S_parse_with_fake_total(BUFSIZE_MAX - 9, "0123456789", &err); - OK(runner, doc == NULL, "parse 10 byte after BUFSIZE_MAX-9 bytes fails"); - INT_EQ(runner, err, CMARK_ERR_INPUT_TOO_LARGE, - "parse 10 byte after BUFSIZE_MAX-9 bytes error code"); - - doc = S_parse_with_fake_total(BUFSIZE_MAX - 1, "a", &err); - OK(runner, doc != NULL, "parse 1 byte after BUFSIZE_MAX-1 bytes"); - cmark_node_free(doc); - - doc = S_parse_with_fake_total(BUFSIZE_MAX - 10, "0123456789", &err); - OK(runner, doc != NULL, "parse 10 byte after BUFSIZE_MAX-10 bytes"); - cmark_node_free(doc); -} - int main() { int retval; test_batch_runner *runner = test_batch_runner_new(); @@ -944,7 +908,6 @@ int main() { test_cplusplus(runner); test_safe(runner); test_feed_across_line_ending(runner); - test_bufsize_overflow(runner); test_print_summary(runner); retval = test_ok(runner) ? 0 : 1; diff --git a/src/blocks.c b/src/blocks.c index c680535..1c1d160 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -96,8 +96,6 @@ cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) { parser->refmap = cmark_reference_map_new(mem); parser->root = document; parser->current = document; - parser->error_code = CMARK_ERR_NONE; - parser->total_bytes = 0; parser->line_number = 0; parser->line_offset = 0; parser->offset = 0; @@ -552,20 +550,6 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer, const unsigned char *skipped; static const uint8_t repl[] = {239, 191, 189}; - if (parser->error_code) { - return; - } - - // Limit maximum document size to BUFSIZE_MAX. This makes sure that we - // never create strbufs larger than BUFSIZE_MAX. Unfortunately, the - // public API doesn't have an error reporting mechanism, so all we can - // do is to abort. - if (len > (size_t)(BUFSIZE_MAX - parser->total_bytes)) { - parser->error_code = CMARK_ERR_INPUT_TOO_LARGE; - return; - } - parser->total_bytes += (bufsize_t)len; - if (parser->last_buffer_ended_with_cr && *buffer == '\n') { // skip NL if last buffer ended with CR ; see #117 buffer++; @@ -1282,19 +1266,14 @@ cmark_node *cmark_parser_finish(cmark_parser *parser) { cmark_strbuf_clear(&parser->linebuf); } - cmark_strbuf_clear(&parser->curline); - - if (parser->error_code) { - cmark_node_free(parser->root); - return NULL; - } - finalize_document(parser); if (parser->options & CMARK_OPT_NORMALIZE) { cmark_consolidate_text_nodes(parser->root); } + cmark_strbuf_free(&parser->curline); + #if CMARK_DEBUG_NODES if (cmark_node_check(parser->root, stderr)) { abort(); @@ -1308,26 +1287,3 @@ cmark_parser_get_first_source_extent(cmark_parser *parser) { return parser->source_map->head; } - -cmark_err_type cmark_parser_get_error(cmark_parser *parser) { - return parser->error_code; -} - -const char *cmark_parser_get_error_message(cmark_parser *parser) { - const char *str = NULL; - - switch (parser->error_code) { - case CMARK_ERR_OUT_OF_MEMORY: - str = "Out of memory"; - break; - case CMARK_ERR_INPUT_TOO_LARGE: - str = "Input too large"; - break; - default: - str = "Unknown error"; - break; - } - - return str; -} - diff --git a/src/buffer.c b/src/buffer.c index 9a9e9ad..a6754b6 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -33,11 +33,6 @@ void cmark_strbuf_init(cmark_mem *mem, cmark_strbuf *buf, } static CMARK_INLINE void S_strbuf_grow_by(cmark_strbuf *buf, bufsize_t add) { - // Safety check for overflow. - if (add > BUFSIZE_MAX - buf->size) { - fprintf(stderr, "Internal cmark_strbuf overflow"); - abort(); - } cmark_strbuf_grow(buf, buf->size + add); } @@ -47,25 +42,18 @@ void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) { if (target_size < buf->asize) return; - // Oversize the buffer by 50% to guarantee amortized linear time - // complexity on append operations. - bufsize_t add = target_size / 2; - // Account for terminating NUL byte. - add += 1; - // Round up to multiple of eight. - add = (add + 7) & ~7; - - // Check for overflow but allow an additional NUL byte. - if (target_size + add > BUFSIZE_MAX + 1) { - target_size = BUFSIZE_MAX + 1; - } - else { - target_size += add; - } + if (target_size > (bufsize_t)(INT32_MAX / 2)) + abort(); + + /* Oversize the buffer by 50% to guarantee amortized linear time + * complexity on append operations. */ + bufsize_t new_size = target_size + target_size / 2; + new_size += 1; + new_size = (new_size + 7) & ~7; buf->ptr = (unsigned char *)buf->mem->realloc(buf->asize ? buf->ptr : NULL, - target_size); - buf->asize = target_size; + new_size); + buf->asize = new_size; } bufsize_t cmark_strbuf_len(const cmark_strbuf *buf) { return buf->size; } diff --git a/src/buffer.h b/src/buffer.h index 7f31a74..e878075 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -13,28 +13,8 @@ extern "C" { #endif -#ifndef CMARK_HUGE_DOCS - -// Maximum strbuf size without terminating NUL byte. -#define BUFSIZE_MAX (INT32_MAX - 1) - typedef int32_t bufsize_t; -#else // CMARK_HUGE_DOCS - -// This is an untested proof of concept of how to handle multi-gigabyte -// documents on 64-bit platforms at the expense of internal struct sizes. - -#ifdef PTRDIFF_MAX - #define BUFSIZE_MAX (PTRDIFF_MAX - 1) -#else - #define BUFSIZE_MAX (ptrdiff_t)((size_t)-1 / 2) -#endif - -typedef ptrdiff_t bufsize_t; - -#endif // CMARK_HUGE_DOCS - typedef struct { cmark_mem *mem; unsigned char *ptr; diff --git a/src/cmark.c b/src/cmark.c index da93abe..2ef6cb4 100644 --- a/src/cmark.c +++ b/src/cmark.c @@ -36,9 +36,6 @@ char *cmark_markdown_to_html(const char *text, size_t len, int options) { char *result; doc = cmark_parse_document(text, len, options); - if (doc == NULL) { - return NULL; - } result = cmark_render_html(doc, options); cmark_node_free(doc); diff --git a/src/cmark.h b/src/cmark.h index 5ce6d10..034f0e6 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -2,6 +2,7 @@ #define CMARK_H #include +#include #include #include @@ -22,7 +23,7 @@ extern "C" { /** Convert 'text' (assumed to be a UTF-8 encoded string with length * 'len') from CommonMark Markdown to HTML, returning a null-terminated, * UTF-8-encoded string. It is the caller's responsibility - * to free the returned buffer. Returns NULL on error. + * to free the returned buffer. */ CMARK_EXPORT char *cmark_markdown_to_html(const char *text, size_t len, int options); @@ -98,12 +99,6 @@ typedef enum { CMARK_PAREN_DELIM } cmark_delim_type; -typedef enum { - CMARK_ERR_NONE, - CMARK_ERR_OUT_OF_MEMORY, - CMARK_ERR_INPUT_TOO_LARGE -} cmark_err_type; - typedef struct cmark_node cmark_node; typedef struct cmark_parser cmark_parser; typedef struct cmark_iter cmark_iter; @@ -494,22 +489,12 @@ cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem); CMARK_EXPORT void cmark_parser_free(cmark_parser *parser); -/** Return the error code after a failed operation. - */ -CMARK_EXPORT -cmark_err_type cmark_parser_get_error(cmark_parser *parser); - -/** Return the error code after a failed operation. - */ -CMARK_EXPORT -const char *cmark_parser_get_error_message(cmark_parser *parser); - /** Feeds a string of length 'len' to 'parser'. */ CMARK_EXPORT void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len); -/** Finish parsing and return a pointer to a tree of nodes or NULL on error. +/** Finish parsing and return a pointer to a tree of nodes. */ CMARK_EXPORT cmark_node *cmark_parser_finish(cmark_parser *parser); @@ -522,7 +507,7 @@ cmark_source_extent *cmark_parser_get_first_source_extent(cmark_parser *parser); /** Parse a CommonMark document in 'buffer' of length 'len'. * Returns a pointer to a tree of nodes. The memory allocated for * the node tree should be released using 'cmark_node_free' - * when it is no longer needed. Returns NULL on error. + * when it is no longer needed. */ CMARK_EXPORT cmark_node *cmark_parse_document(const char *buffer, size_t len, int options); @@ -530,23 +515,22 @@ cmark_node *cmark_parse_document(const char *buffer, size_t len, int options); /** Parse a CommonMark document in file 'f', returning a pointer to * a tree of nodes. The memory allocated for the node tree should be * released using 'cmark_node_free' when it is no longer needed. - * Returns NULL on error. */ CMARK_EXPORT cmark_node *cmark_parse_file(FILE *f, int options); -/** +/** * ## Source map API */ /* Return the index, in bytes, of the start of this extent */ CMARK_EXPORT -size_t cmark_source_extent_get_start(cmark_source_extent *extent); +uint64_t cmark_source_extent_get_start(cmark_source_extent *extent); -/* Return the index, in bytes, of the stop of this extent. This +/* Return the index, in bytes, of the stop of this extent. This * index is not included in the extent*/ CMARK_EXPORT -size_t cmark_source_extent_get_stop(cmark_source_extent *extent); +uint64_t cmark_source_extent_get_stop(cmark_source_extent *extent); /* Return the extent immediately following 'extent' */ CMARK_EXPORT diff --git a/src/inlines.c b/src/inlines.c index 02b4723..9aea865 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -1229,7 +1229,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) { // Parse inlines from parent's string_content, adding as children of parent. extern void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent, cmark_reference_map *refmap, int options, - cmark_source_map *source_map, bufsize_t total_length) { + cmark_source_map *source_map, uint64_t total_length) { subject subj; subject_from_buf(mem, &subj, &parent->content, refmap, source_map); bufsize_t initial_len = subj.input.len; diff --git a/src/inlines.h b/src/inlines.h index 8459794..8de31b1 100644 --- a/src/inlines.h +++ b/src/inlines.h @@ -14,7 +14,7 @@ cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title); void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent, cmark_reference_map *refmap, int options, - cmark_source_map *source_map, bufsize_t total_length); + cmark_source_map *source_map, uint64_t total_length); bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input, cmark_reference_map *refmap, diff --git a/src/main.c b/src/main.c index aeb81de..42cd8b1 100644 --- a/src/main.c +++ b/src/main.c @@ -181,11 +181,6 @@ int main(int argc, char *argv[]) { document = cmark_parser_finish(parser); cmark_parser_free(parser); - if (document == NULL) { - fprintf(stderr, "%s", cmark_parser_get_error_message(parser)); - exit(1); - } - print_document(document, writer, options, width); cmark_node_free(document); diff --git a/src/parser.h b/src/parser.h index 7b4fdbc..b28a8a7 100644 --- a/src/parser.h +++ b/src/parser.h @@ -2,7 +2,6 @@ #define CMARK_AST_H #include -#include "cmark.h" #include "node.h" #include "buffer.h" #include "memory.h" @@ -19,8 +18,6 @@ struct cmark_parser { struct cmark_reference_map *refmap; struct cmark_node *root; struct cmark_node *current; - cmark_err_type error_code; - bufsize_t total_bytes; int line_number; bufsize_t offset; bufsize_t column; diff --git a/src/source_map.c b/src/source_map.c index dccbe7c..db01a21 100644 --- a/src/source_map.c +++ b/src/source_map.c @@ -19,7 +19,7 @@ source_map_free(cmark_source_map *self) } cmark_source_extent * -source_map_append_extent(cmark_source_map *self, bufsize_t start, bufsize_t stop, cmark_node *node, cmark_extent_type type) +source_map_append_extent(cmark_source_map *self, uint64_t start, uint64_t stop, cmark_node *node, cmark_extent_type type) { assert (start <= stop); assert (!self->tail || self->tail->stop <= start); @@ -46,7 +46,7 @@ source_map_append_extent(cmark_source_map *self, bufsize_t start, bufsize_t stop cmark_source_extent * source_map_insert_extent(cmark_source_map *self, cmark_source_extent *previous, - bufsize_t start, bufsize_t stop, cmark_node *node, cmark_extent_type type) + uint64_t start, uint64_t stop, cmark_node *node, cmark_extent_type type) { if (start == stop) return previous; @@ -101,7 +101,7 @@ source_map_free_extent(cmark_source_map *self, cmark_source_extent *extent) cmark_source_extent * source_map_stitch_extent(cmark_source_map *self, cmark_source_extent *extent, - cmark_node *node, bufsize_t total_length) + cmark_node *node, uint64_t total_length) { cmark_source_extent *next_extent = extent->next; cmark_source_extent *res; @@ -135,7 +135,7 @@ source_map_stitch_extent(cmark_source_map *self, cmark_source_extent *extent, } cmark_source_extent * -source_map_splice_extent(cmark_source_map *self, bufsize_t start, bufsize_t stop, +source_map_splice_extent(cmark_source_map *self, uint64_t start, uint64_t stop, cmark_node *node, cmark_extent_type type) { if (!self->next_cursor) { @@ -154,7 +154,7 @@ source_map_splice_extent(cmark_source_map *self, bufsize_t start, bufsize_t stop return self->cursor; } else if (start + self->cursor_offset < self->next_cursor->start) { - bufsize_t new_start = self->next_cursor->start - self->cursor_offset; + uint64_t new_start = self->next_cursor->start - self->cursor_offset; self->cursor = source_map_insert_extent(self, self->cursor, @@ -196,17 +196,17 @@ source_map_pretty_print(cmark_source_map *self) { cmark_source_extent *tmp; for (tmp = self->head; tmp; tmp = tmp->next) { - printf ("%d:%d - %s, %s (%p)\n", tmp->start, tmp->stop, - cmark_node_get_type_string(tmp->node), + printf ("%lu:%lu - %s, %s (%p)\n", tmp->start, tmp->stop, + cmark_node_get_type_string(tmp->node), cmark_source_extent_get_type_string(tmp), (void *) tmp->node); } } bool -source_map_check(cmark_source_map *self, bufsize_t total_length) +source_map_check(cmark_source_map *self, uint64_t total_length) { - bufsize_t last_stop = 0; + uint64_t last_stop = 0; cmark_source_extent *tmp; for (tmp = self->head; tmp; tmp = tmp->next) { @@ -224,13 +224,13 @@ source_map_check(cmark_source_map *self, bufsize_t total_length) } -size_t +uint64_t cmark_source_extent_get_start(cmark_source_extent *extent) { return extent->start; } -size_t +uint64_t cmark_source_extent_get_stop(cmark_source_extent *extent) { return extent->stop; diff --git a/src/source_map.h b/src/source_map.h index dca5a9f..619a073 100644 --- a/src/source_map.h +++ b/src/source_map.h @@ -3,7 +3,6 @@ #include "cmark.h" #include "config.h" -#include "buffer.h" typedef struct _cmark_source_map { @@ -11,14 +10,14 @@ typedef struct _cmark_source_map cmark_source_extent *tail; cmark_source_extent *cursor; cmark_source_extent *next_cursor; - bufsize_t cursor_offset; + uint64_t cursor_offset; cmark_mem *mem; } cmark_source_map; struct cmark_source_extent { - bufsize_t start; - bufsize_t stop; + uint64_t start; + uint64_t stop; struct cmark_source_extent *next; struct cmark_source_extent *prev; cmark_node *node; @@ -30,20 +29,20 @@ cmark_source_map * source_map_new (cmark_mem *mem); void source_map_free (cmark_source_map *self); bool source_map_check (cmark_source_map *self, - bufsize_t total_length); + uint64_t total_length); void source_map_pretty_print (cmark_source_map *self); cmark_source_extent * source_map_append_extent(cmark_source_map *self, - bufsize_t start, - bufsize_t stop, + uint64_t start, + uint64_t stop, cmark_node *node, cmark_extent_type type); cmark_source_extent * source_map_insert_extent(cmark_source_map *self, cmark_source_extent *previous, - bufsize_t start, - bufsize_t stop, + uint64_t start, + uint64_t stop, cmark_node *node, cmark_extent_type type); @@ -53,11 +52,11 @@ cmark_source_extent * source_map_free_extent (cmark_source_map *self, cmark_source_extent * source_map_stitch_extent(cmark_source_map *self, cmark_source_extent *extent, cmark_node *node, - bufsize_t total_length); + uint64_t total_length); cmark_source_extent * source_map_splice_extent(cmark_source_map *self, - bufsize_t start, - bufsize_t stop, + uint64_t start, + uint64_t stop, cmark_node *node, cmark_extent_type type); diff --git a/test/cmark.py b/test/cmark.py index f4ff576..4be85a3 100644 --- a/test/cmark.py +++ b/test/cmark.py @@ -30,8 +30,6 @@ def to_commonmark(lib, text): render_commonmark.restype = c_char_p render_commonmark.argtypes = [c_void_p, c_int, c_int] node = parse_document(textbytes, textlen, 0) - if node is None: - raise Exception("parse_document failed") result = render_commonmark(node, 0, 0).decode('utf-8') return [0, result, ''] -- cgit v1.2.3