summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2017-01-03 22:08:38 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2017-01-03 22:08:38 -0800
commit60b6962db0b0488667180e11cc6cfb1cec1b41ea (patch)
tree9f3d399e48b781b5e363b47a2aa04e2b758e331c
parentcc50a3aba3e34dc58ca819a65b907871e2ea6fd9 (diff)
Revert "Change types for source map offsets (#174)"
This reverts commit 4fbe344df43ed7f60a3d3a53981088334cb709fc.
-rw-r--r--api_test/main.c37
-rw-r--r--src/blocks.c48
-rw-r--r--src/buffer.c32
-rw-r--r--src/buffer.h20
-rw-r--r--src/cmark.c3
-rw-r--r--src/cmark.h32
-rw-r--r--src/inlines.c2
-rw-r--r--src/inlines.h2
-rw-r--r--src/main.c5
-rw-r--r--src/parser.h3
-rw-r--r--src/source_map.c22
-rw-r--r--src/source_map.h23
-rw-r--r--test/cmark.py2
13 files changed, 44 insertions, 187 deletions
diff --git a/api_test/main.c b/api_test/main.c
index 61291dc..17e1582 100644
--- a/api_test/main.c
+++ b/api_test/main.c
@@ -5,7 +5,6 @@
#define CMARK_NO_SHORT_NAMES
#include "cmark.h"
#include "node.h"
-#include "parser.h"
#include "harness.h"
#include "cplusplus.h"
@@ -884,41 +883,6 @@ static void test_feed_across_line_ending(test_batch_runner *runner) {
cmark_node_free(document);
}
-static cmark_node *S_parse_with_fake_total(bufsize_t fake_total,
- const char *str,
- cmark_err_type *err) {
- cmark_parser *parser = cmark_parser_new(CMARK_OPT_DEFAULT);
- parser->total_bytes = fake_total;
- cmark_parser_feed(parser, str, strlen(str));
- cmark_node *doc = cmark_parser_finish(parser);
- *err = cmark_parser_get_error(parser);
- cmark_parser_free(parser);
- return doc;
-}
-
-static void test_bufsize_overflow(test_batch_runner *runner) {
- cmark_node *doc;
- cmark_err_type err;
-
- doc = S_parse_with_fake_total(BUFSIZE_MAX, "a", &err);
- OK(runner, doc == NULL, "parse 1 byte after BUFSIZE_MAX bytes fails");
- INT_EQ(runner, err, CMARK_ERR_INPUT_TOO_LARGE,
- "parse 1 byte after BUFSIZE_MAX bytes error code");
-
- doc = S_parse_with_fake_total(BUFSIZE_MAX - 9, "0123456789", &err);
- OK(runner, doc == NULL, "parse 10 byte after BUFSIZE_MAX-9 bytes fails");
- INT_EQ(runner, err, CMARK_ERR_INPUT_TOO_LARGE,
- "parse 10 byte after BUFSIZE_MAX-9 bytes error code");
-
- doc = S_parse_with_fake_total(BUFSIZE_MAX - 1, "a", &err);
- OK(runner, doc != NULL, "parse 1 byte after BUFSIZE_MAX-1 bytes");
- cmark_node_free(doc);
-
- doc = S_parse_with_fake_total(BUFSIZE_MAX - 10, "0123456789", &err);
- OK(runner, doc != NULL, "parse 10 byte after BUFSIZE_MAX-10 bytes");
- cmark_node_free(doc);
-}
-
int main() {
int retval;
test_batch_runner *runner = test_batch_runner_new();
@@ -944,7 +908,6 @@ int main() {
test_cplusplus(runner);
test_safe(runner);
test_feed_across_line_ending(runner);
- test_bufsize_overflow(runner);
test_print_summary(runner);
retval = test_ok(runner) ? 0 : 1;
diff --git a/src/blocks.c b/src/blocks.c
index c680535..1c1d160 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -96,8 +96,6 @@ cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) {
parser->refmap = cmark_reference_map_new(mem);
parser->root = document;
parser->current = document;
- parser->error_code = CMARK_ERR_NONE;
- parser->total_bytes = 0;
parser->line_number = 0;
parser->line_offset = 0;
parser->offset = 0;
@@ -552,20 +550,6 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
const unsigned char *skipped;
static const uint8_t repl[] = {239, 191, 189};
- if (parser->error_code) {
- return;
- }
-
- // Limit maximum document size to BUFSIZE_MAX. This makes sure that we
- // never create strbufs larger than BUFSIZE_MAX. Unfortunately, the
- // public API doesn't have an error reporting mechanism, so all we can
- // do is to abort.
- if (len > (size_t)(BUFSIZE_MAX - parser->total_bytes)) {
- parser->error_code = CMARK_ERR_INPUT_TOO_LARGE;
- return;
- }
- parser->total_bytes += (bufsize_t)len;
-
if (parser->last_buffer_ended_with_cr && *buffer == '\n') {
// skip NL if last buffer ended with CR ; see #117
buffer++;
@@ -1282,19 +1266,14 @@ cmark_node *cmark_parser_finish(cmark_parser *parser) {
cmark_strbuf_clear(&parser->linebuf);
}
- cmark_strbuf_clear(&parser->curline);
-
- if (parser->error_code) {
- cmark_node_free(parser->root);
- return NULL;
- }
-
finalize_document(parser);
if (parser->options & CMARK_OPT_NORMALIZE) {
cmark_consolidate_text_nodes(parser->root);
}
+ cmark_strbuf_free(&parser->curline);
+
#if CMARK_DEBUG_NODES
if (cmark_node_check(parser->root, stderr)) {
abort();
@@ -1308,26 +1287,3 @@ cmark_parser_get_first_source_extent(cmark_parser *parser)
{
return parser->source_map->head;
}
-
-cmark_err_type cmark_parser_get_error(cmark_parser *parser) {
- return parser->error_code;
-}
-
-const char *cmark_parser_get_error_message(cmark_parser *parser) {
- const char *str = NULL;
-
- switch (parser->error_code) {
- case CMARK_ERR_OUT_OF_MEMORY:
- str = "Out of memory";
- break;
- case CMARK_ERR_INPUT_TOO_LARGE:
- str = "Input too large";
- break;
- default:
- str = "Unknown error";
- break;
- }
-
- return str;
-}
-
diff --git a/src/buffer.c b/src/buffer.c
index 9a9e9ad..a6754b6 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -33,11 +33,6 @@ void cmark_strbuf_init(cmark_mem *mem, cmark_strbuf *buf,
}
static CMARK_INLINE void S_strbuf_grow_by(cmark_strbuf *buf, bufsize_t add) {
- // Safety check for overflow.
- if (add > BUFSIZE_MAX - buf->size) {
- fprintf(stderr, "Internal cmark_strbuf overflow");
- abort();
- }
cmark_strbuf_grow(buf, buf->size + add);
}
@@ -47,25 +42,18 @@ void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) {
if (target_size < buf->asize)
return;
- // Oversize the buffer by 50% to guarantee amortized linear time
- // complexity on append operations.
- bufsize_t add = target_size / 2;
- // Account for terminating NUL byte.
- add += 1;
- // Round up to multiple of eight.
- add = (add + 7) & ~7;
-
- // Check for overflow but allow an additional NUL byte.
- if (target_size + add > BUFSIZE_MAX + 1) {
- target_size = BUFSIZE_MAX + 1;
- }
- else {
- target_size += add;
- }
+ if (target_size > (bufsize_t)(INT32_MAX / 2))
+ abort();
+
+ /* Oversize the buffer by 50% to guarantee amortized linear time
+ * complexity on append operations. */
+ bufsize_t new_size = target_size + target_size / 2;
+ new_size += 1;
+ new_size = (new_size + 7) & ~7;
buf->ptr = (unsigned char *)buf->mem->realloc(buf->asize ? buf->ptr : NULL,
- target_size);
- buf->asize = target_size;
+ new_size);
+ buf->asize = new_size;
}
bufsize_t cmark_strbuf_len(const cmark_strbuf *buf) { return buf->size; }
diff --git a/src/buffer.h b/src/buffer.h
index 7f31a74..e878075 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -13,28 +13,8 @@
extern "C" {
#endif
-#ifndef CMARK_HUGE_DOCS
-
-// Maximum strbuf size without terminating NUL byte.
-#define BUFSIZE_MAX (INT32_MAX - 1)
-
typedef int32_t bufsize_t;
-#else // CMARK_HUGE_DOCS
-
-// This is an untested proof of concept of how to handle multi-gigabyte
-// documents on 64-bit platforms at the expense of internal struct sizes.
-
-#ifdef PTRDIFF_MAX
- #define BUFSIZE_MAX (PTRDIFF_MAX - 1)
-#else
- #define BUFSIZE_MAX (ptrdiff_t)((size_t)-1 / 2)
-#endif
-
-typedef ptrdiff_t bufsize_t;
-
-#endif // CMARK_HUGE_DOCS
-
typedef struct {
cmark_mem *mem;
unsigned char *ptr;
diff --git a/src/cmark.c b/src/cmark.c
index da93abe..2ef6cb4 100644
--- a/src/cmark.c
+++ b/src/cmark.c
@@ -36,9 +36,6 @@ char *cmark_markdown_to_html(const char *text, size_t len, int options) {
char *result;
doc = cmark_parse_document(text, len, options);
- if (doc == NULL) {
- return NULL;
- }
result = cmark_render_html(doc, options);
cmark_node_free(doc);
diff --git a/src/cmark.h b/src/cmark.h
index 5ce6d10..034f0e6 100644
--- a/src/cmark.h
+++ b/src/cmark.h
@@ -2,6 +2,7 @@
#define CMARK_H
#include <stdio.h>
+#include <stdint.h>
#include <cmark_export.h>
#include <cmark_version.h>
@@ -22,7 +23,7 @@ extern "C" {
/** Convert 'text' (assumed to be a UTF-8 encoded string with length
* 'len') from CommonMark Markdown to HTML, returning a null-terminated,
* UTF-8-encoded string. It is the caller's responsibility
- * to free the returned buffer. Returns NULL on error.
+ * to free the returned buffer.
*/
CMARK_EXPORT
char *cmark_markdown_to_html(const char *text, size_t len, int options);
@@ -98,12 +99,6 @@ typedef enum {
CMARK_PAREN_DELIM
} cmark_delim_type;
-typedef enum {
- CMARK_ERR_NONE,
- CMARK_ERR_OUT_OF_MEMORY,
- CMARK_ERR_INPUT_TOO_LARGE
-} cmark_err_type;
-
typedef struct cmark_node cmark_node;
typedef struct cmark_parser cmark_parser;
typedef struct cmark_iter cmark_iter;
@@ -494,22 +489,12 @@ cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem);
CMARK_EXPORT
void cmark_parser_free(cmark_parser *parser);
-/** Return the error code after a failed operation.
- */
-CMARK_EXPORT
-cmark_err_type cmark_parser_get_error(cmark_parser *parser);
-
-/** Return the error code after a failed operation.
- */
-CMARK_EXPORT
-const char *cmark_parser_get_error_message(cmark_parser *parser);
-
/** Feeds a string of length 'len' to 'parser'.
*/
CMARK_EXPORT
void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len);
-/** Finish parsing and return a pointer to a tree of nodes or NULL on error.
+/** Finish parsing and return a pointer to a tree of nodes.
*/
CMARK_EXPORT
cmark_node *cmark_parser_finish(cmark_parser *parser);
@@ -522,7 +507,7 @@ cmark_source_extent *cmark_parser_get_first_source_extent(cmark_parser *parser);
/** Parse a CommonMark document in 'buffer' of length 'len'.
* Returns a pointer to a tree of nodes. The memory allocated for
* the node tree should be released using 'cmark_node_free'
- * when it is no longer needed. Returns NULL on error.
+ * when it is no longer needed.
*/
CMARK_EXPORT
cmark_node *cmark_parse_document(const char *buffer, size_t len, int options);
@@ -530,23 +515,22 @@ cmark_node *cmark_parse_document(const char *buffer, size_t len, int options);
/** Parse a CommonMark document in file 'f', returning a pointer to
* a tree of nodes. The memory allocated for the node tree should be
* released using 'cmark_node_free' when it is no longer needed.
- * Returns NULL on error.
*/
CMARK_EXPORT
cmark_node *cmark_parse_file(FILE *f, int options);
-/**
+/**
* ## Source map API
*/
/* Return the index, in bytes, of the start of this extent */
CMARK_EXPORT
-size_t cmark_source_extent_get_start(cmark_source_extent *extent);
+uint64_t cmark_source_extent_get_start(cmark_source_extent *extent);
-/* Return the index, in bytes, of the stop of this extent. This
+/* Return the index, in bytes, of the stop of this extent. This
* index is not included in the extent*/
CMARK_EXPORT
-size_t cmark_source_extent_get_stop(cmark_source_extent *extent);
+uint64_t cmark_source_extent_get_stop(cmark_source_extent *extent);
/* Return the extent immediately following 'extent' */
CMARK_EXPORT
diff --git a/src/inlines.c b/src/inlines.c
index 02b4723..9aea865 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -1229,7 +1229,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
// Parse inlines from parent's string_content, adding as children of parent.
extern void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent,
cmark_reference_map *refmap, int options,
- cmark_source_map *source_map, bufsize_t total_length) {
+ cmark_source_map *source_map, uint64_t total_length) {
subject subj;
subject_from_buf(mem, &subj, &parent->content, refmap, source_map);
bufsize_t initial_len = subj.input.len;
diff --git a/src/inlines.h b/src/inlines.h
index 8459794..8de31b1 100644
--- a/src/inlines.h
+++ b/src/inlines.h
@@ -14,7 +14,7 @@ cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title);
void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent,
cmark_reference_map *refmap, int options,
- cmark_source_map *source_map, bufsize_t total_length);
+ cmark_source_map *source_map, uint64_t total_length);
bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input,
cmark_reference_map *refmap,
diff --git a/src/main.c b/src/main.c
index aeb81de..42cd8b1 100644
--- a/src/main.c
+++ b/src/main.c
@@ -181,11 +181,6 @@ int main(int argc, char *argv[]) {
document = cmark_parser_finish(parser);
cmark_parser_free(parser);
- if (document == NULL) {
- fprintf(stderr, "%s", cmark_parser_get_error_message(parser));
- exit(1);
- }
-
print_document(document, writer, options, width);
cmark_node_free(document);
diff --git a/src/parser.h b/src/parser.h
index 7b4fdbc..b28a8a7 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -2,7 +2,6 @@
#define CMARK_AST_H
#include <stdio.h>
-#include "cmark.h"
#include "node.h"
#include "buffer.h"
#include "memory.h"
@@ -19,8 +18,6 @@ struct cmark_parser {
struct cmark_reference_map *refmap;
struct cmark_node *root;
struct cmark_node *current;
- cmark_err_type error_code;
- bufsize_t total_bytes;
int line_number;
bufsize_t offset;
bufsize_t column;
diff --git a/src/source_map.c b/src/source_map.c
index dccbe7c..db01a21 100644
--- a/src/source_map.c
+++ b/src/source_map.c
@@ -19,7 +19,7 @@ source_map_free(cmark_source_map *self)
}
cmark_source_extent *
-source_map_append_extent(cmark_source_map *self, bufsize_t start, bufsize_t stop, cmark_node *node, cmark_extent_type type)
+source_map_append_extent(cmark_source_map *self, uint64_t start, uint64_t stop, cmark_node *node, cmark_extent_type type)
{
assert (start <= stop);
assert (!self->tail || self->tail->stop <= start);
@@ -46,7 +46,7 @@ source_map_append_extent(cmark_source_map *self, bufsize_t start, bufsize_t stop
cmark_source_extent *
source_map_insert_extent(cmark_source_map *self, cmark_source_extent *previous,
- bufsize_t start, bufsize_t stop, cmark_node *node, cmark_extent_type type)
+ uint64_t start, uint64_t stop, cmark_node *node, cmark_extent_type type)
{
if (start == stop)
return previous;
@@ -101,7 +101,7 @@ source_map_free_extent(cmark_source_map *self, cmark_source_extent *extent)
cmark_source_extent *
source_map_stitch_extent(cmark_source_map *self, cmark_source_extent *extent,
- cmark_node *node, bufsize_t total_length)
+ cmark_node *node, uint64_t total_length)
{
cmark_source_extent *next_extent = extent->next;
cmark_source_extent *res;
@@ -135,7 +135,7 @@ source_map_stitch_extent(cmark_source_map *self, cmark_source_extent *extent,
}
cmark_source_extent *
-source_map_splice_extent(cmark_source_map *self, bufsize_t start, bufsize_t stop,
+source_map_splice_extent(cmark_source_map *self, uint64_t start, uint64_t stop,
cmark_node *node, cmark_extent_type type)
{
if (!self->next_cursor) {
@@ -154,7 +154,7 @@ source_map_splice_extent(cmark_source_map *self, bufsize_t start, bufsize_t stop
return self->cursor;
} else if (start + self->cursor_offset < self->next_cursor->start) {
- bufsize_t new_start = self->next_cursor->start - self->cursor_offset;
+ uint64_t new_start = self->next_cursor->start - self->cursor_offset;
self->cursor = source_map_insert_extent(self,
self->cursor,
@@ -196,17 +196,17 @@ source_map_pretty_print(cmark_source_map *self) {
cmark_source_extent *tmp;
for (tmp = self->head; tmp; tmp = tmp->next) {
- printf ("%d:%d - %s, %s (%p)\n", tmp->start, tmp->stop,
- cmark_node_get_type_string(tmp->node),
+ printf ("%lu:%lu - %s, %s (%p)\n", tmp->start, tmp->stop,
+ cmark_node_get_type_string(tmp->node),
cmark_source_extent_get_type_string(tmp),
(void *) tmp->node);
}
}
bool
-source_map_check(cmark_source_map *self, bufsize_t total_length)
+source_map_check(cmark_source_map *self, uint64_t total_length)
{
- bufsize_t last_stop = 0;
+ uint64_t last_stop = 0;
cmark_source_extent *tmp;
for (tmp = self->head; tmp; tmp = tmp->next) {
@@ -224,13 +224,13 @@ source_map_check(cmark_source_map *self, bufsize_t total_length)
}
-size_t
+uint64_t
cmark_source_extent_get_start(cmark_source_extent *extent)
{
return extent->start;
}
-size_t
+uint64_t
cmark_source_extent_get_stop(cmark_source_extent *extent)
{
return extent->stop;
diff --git a/src/source_map.h b/src/source_map.h
index dca5a9f..619a073 100644
--- a/src/source_map.h
+++ b/src/source_map.h
@@ -3,7 +3,6 @@
#include "cmark.h"
#include "config.h"
-#include "buffer.h"
typedef struct _cmark_source_map
{
@@ -11,14 +10,14 @@ typedef struct _cmark_source_map
cmark_source_extent *tail;
cmark_source_extent *cursor;
cmark_source_extent *next_cursor;
- bufsize_t cursor_offset;
+ uint64_t cursor_offset;
cmark_mem *mem;
} cmark_source_map;
struct cmark_source_extent
{
- bufsize_t start;
- bufsize_t stop;
+ uint64_t start;
+ uint64_t stop;
struct cmark_source_extent *next;
struct cmark_source_extent *prev;
cmark_node *node;
@@ -30,20 +29,20 @@ cmark_source_map * source_map_new (cmark_mem *mem);
void source_map_free (cmark_source_map *self);
bool source_map_check (cmark_source_map *self,
- bufsize_t total_length);
+ uint64_t total_length);
void source_map_pretty_print (cmark_source_map *self);
cmark_source_extent * source_map_append_extent(cmark_source_map *self,
- bufsize_t start,
- bufsize_t stop,
+ uint64_t start,
+ uint64_t stop,
cmark_node *node,
cmark_extent_type type);
cmark_source_extent * source_map_insert_extent(cmark_source_map *self,
cmark_source_extent *previous,
- bufsize_t start,
- bufsize_t stop,
+ uint64_t start,
+ uint64_t stop,
cmark_node *node,
cmark_extent_type type);
@@ -53,11 +52,11 @@ cmark_source_extent * source_map_free_extent (cmark_source_map *self,
cmark_source_extent * source_map_stitch_extent(cmark_source_map *self,
cmark_source_extent *extent,
cmark_node *node,
- bufsize_t total_length);
+ uint64_t total_length);
cmark_source_extent * source_map_splice_extent(cmark_source_map *self,
- bufsize_t start,
- bufsize_t stop,
+ uint64_t start,
+ uint64_t stop,
cmark_node *node,
cmark_extent_type type);
diff --git a/test/cmark.py b/test/cmark.py
index f4ff576..4be85a3 100644
--- a/test/cmark.py
+++ b/test/cmark.py
@@ -30,8 +30,6 @@ def to_commonmark(lib, text):
render_commonmark.restype = c_char_p
render_commonmark.argtypes = [c_void_p, c_int, c_int]
node = parse_document(textbytes, textlen, 0)
- if node is None:
- raise Exception("parse_document failed")
result = render_commonmark(node, 0, 0).decode('utf-8')
return [0, result, '']