summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2015-06-07 13:24:26 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2015-06-07 13:24:26 -0700
commit802270f434a72935ba75c725b3cadcae4f478735 (patch)
treeb045b8831ac4c691ce90c41efa8e04b5330918da
parent3adc586d9d7539e4d33f737110ffd4e236379099 (diff)
parentfdfa1e4bedf95691389efb9991ac8a6a4599c158 (diff)
Merge pull request #56 from nwellnhof/bufsize_t
Safer handling of string buffer sizes and indices
-rw-r--r--api_test/main.c6
-rw-r--r--man/man3/cmark.34
-rw-r--r--src/blocks.c31
-rw-r--r--src/buffer.c154
-rw-r--r--src/buffer.h44
-rw-r--r--src/chunk.h15
-rw-r--r--src/cmark.c2
-rw-r--r--src/cmark.h2
-rw-r--r--src/commonmark.c44
-rw-r--r--src/houdini.h19
-rw-r--r--src/houdini_href_e.c4
-rw-r--r--src/houdini_html_e.c6
-rw-r--r--src/houdini_html_u.c14
-rw-r--r--src/html.c25
-rw-r--r--src/inlines.c62
-rw-r--r--src/inlines.h2
-rw-r--r--src/parser.h6
-rw-r--r--src/scanners.c84
-rw-r--r--src/scanners.h30
-rw-r--r--src/scanners.re76
-rw-r--r--src/utf8.c20
-rw-r--r--src/utf8.h6
-rw-r--r--src/xml.c9
23 files changed, 344 insertions, 321 deletions
diff --git a/api_test/main.c b/api_test/main.c
index 01df51d..132d48c 100644
--- a/api_test/main.c
+++ b/api_test/main.c
@@ -643,16 +643,16 @@ test_incomplete_char(test_batch_runner *runner, const char *utf8,
static void
test_continuation_byte(test_batch_runner *runner, const char *utf8)
{
- int len = strlen(utf8);
+ size_t len = strlen(utf8);
- for (int pos = 1; pos < len; ++pos) {
+ for (size_t pos = 1; pos < len; ++pos) {
char buf[20];
sprintf(buf, "((((%s))))", utf8);
buf[4+pos] = '\x20';
char expected[50];
strcpy(expected, "<p>((((" UTF8_REPL "\x20");
- for (int i = pos + 1; i < len; ++i) {
+ for (size_t i = pos + 1; i < len; ++i) {
strcat(expected, UTF8_REPL);
}
strcat(expected, "))))</p>\n");
diff --git a/man/man3/cmark.3 b/man/man3/cmark.3
index 5b68ecb..82c34cd 100644
--- a/man/man3/cmark.3
+++ b/man/man3/cmark.3
@@ -1,4 +1,4 @@
-.TH cmark 3 "March 21, 2015" "LOCAL" "Library Functions Manual"
+.TH cmark 3 "June 07, 2015" "LOCAL" "Library Functions Manual"
.SH
NAME
.PP
@@ -10,7 +10,7 @@ DESCRIPTION
Simple Interface
.PP
-\fIchar *\f[] \fBcmark_markdown_to_html\f[](\fIconst char *text\f[], \fIint len\f[], \fIint options\f[])
+\fIchar *\f[] \fBcmark_markdown_to_html\f[](\fIconst char *text\f[], \fIsize_t len\f[], \fIint options\f[])
.PP
Convert \f[I]text\f[] (assumed to be a UTF\-8 encoded string with length
diff --git a/src/blocks.c b/src/blocks.c
index b72c256..a3ac712 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -30,7 +30,7 @@ S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len,
static void
S_process_line(cmark_parser *parser, const unsigned char *buffer,
- size_t bytes);
+ bufsize_t bytes);
static cmark_node* make_block(cmark_node_type tag, int start_line, int start_column)
{
@@ -95,7 +95,7 @@ static cmark_node*
finalize(cmark_parser *parser, cmark_node* b);
// Returns true if line has only space characters, else false.
-static bool is_blank(cmark_strbuf *s, int offset)
+static bool is_blank(cmark_strbuf *s, bufsize_t offset)
{
while (offset < s->size) {
switch (s->ptr[offset]) {
@@ -128,7 +128,7 @@ static inline bool accepts_lines(cmark_node_type block_type)
block_type == NODE_CODE_BLOCK);
}
-static void add_line(cmark_node* node, cmark_chunk *ch, int offset)
+static void add_line(cmark_node* node, cmark_chunk *ch, bufsize_t offset)
{
assert(node->open);
cmark_strbuf_put(&node->string_content, ch->data + offset, ch->len - offset);
@@ -136,7 +136,7 @@ static void add_line(cmark_node* node, cmark_chunk *ch, int offset)
static void remove_trailing_blank_lines(cmark_strbuf *ln)
{
- int i;
+ bufsize_t i;
unsigned char c;
for (i = ln->size - 1; i >= 0; --i) {
@@ -204,7 +204,7 @@ static int break_out_of_lists(cmark_parser *parser, cmark_node ** bptr)
static cmark_node*
finalize(cmark_parser *parser, cmark_node* b)
{
- int pos;
+ bufsize_t pos;
cmark_node* item;
cmark_node* subitem;
cmark_node* parent;
@@ -367,10 +367,10 @@ static void process_inlines(cmark_node* root, cmark_reference_map *refmap, int o
// Attempts to parse a list item marker (bullet or enumerated).
// On success, returns length of the marker, and populates
// data with the details. On failure, returns 0.
-static int parse_list_marker(cmark_chunk *input, int pos, cmark_list **dataptr)
+static bufsize_t parse_list_marker(cmark_chunk *input, bufsize_t pos, cmark_list **dataptr)
{
unsigned char c;
- int startpos;
+ bufsize_t startpos;
cmark_list *data;
startpos = pos;
@@ -497,6 +497,7 @@ S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len,
while (buffer < end) {
const unsigned char *eol;
size_t line_len;
+ bufsize_t bufsize;
for (eol = buffer; eol < end; ++eol) {
if (S_is_line_end_char(*eol))
@@ -514,17 +515,19 @@ S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len,
} else if (eof) {
line_len = end - buffer;
} else {
- cmark_strbuf_put(parser->linebuf, buffer, end - buffer);
+ bufsize = cmark_strbuf_check_bufsize(end - buffer);
+ cmark_strbuf_put(parser->linebuf, buffer, bufsize);
break;
}
+ bufsize = cmark_strbuf_check_bufsize(line_len);
if (parser->linebuf->size > 0) {
- cmark_strbuf_put(parser->linebuf, buffer, line_len);
+ cmark_strbuf_put(parser->linebuf, buffer, bufsize);
S_process_line(parser, parser->linebuf->ptr,
parser->linebuf->size);
cmark_strbuf_clear(parser->linebuf);
} else {
- S_process_line(parser, buffer, line_len);
+ S_process_line(parser, buffer, bufsize);
}
buffer += line_len;
@@ -533,7 +536,7 @@ S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len,
static void chop_trailing_hashtags(cmark_chunk *ch)
{
- int n, orig_n;
+ bufsize_t n, orig_n;
cmark_chunk_rtrim(ch);
orig_n = n = ch->len - 1;
@@ -562,10 +565,10 @@ S_find_first_nonspace(cmark_parser *parser, cmark_chunk *input)
}
static void
-S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
+S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t bytes)
{
cmark_node* last_matched_container;
- int matched = 0;
+ bufsize_t matched = 0;
int lev = 0;
int i;
cmark_list *data = NULL;
@@ -712,7 +715,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
parser->offset = parser->first_nonspace + matched;
container = add_child(parser, container, NODE_HEADER, parser->offset + 1);
- int hashpos = cmark_chunk_strchr(&input, '#', parser->first_nonspace);
+ bufsize_t hashpos = cmark_chunk_strchr(&input, '#', parser->first_nonspace);
int level = 0;
while (peek_at(&input, hashpos) == '#') {
diff --git a/src/buffer.c b/src/buffer.c
index e2ebc02..7d16af8 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -4,6 +4,7 @@
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
+#include <stdint.h>
#include "config.h"
#include "cmark_ctype.h"
@@ -14,48 +15,75 @@
*/
unsigned char cmark_strbuf__initbuf[1];
-#define ENSURE_SIZE(b, d) \
- if ((d) > b->asize) \
- cmark_strbuf_grow(b, (d)); \
-
#ifndef MIN
#define MIN(x,y) ((x<y) ? x : y)
#endif
-void cmark_strbuf_init(cmark_strbuf *buf, int initial_size)
+void cmark_strbuf_init(cmark_strbuf *buf, bufsize_t initial_size)
{
buf->asize = 0;
buf->size = 0;
buf->ptr = cmark_strbuf__initbuf;
- if (initial_size)
+ if (initial_size > 0)
cmark_strbuf_grow(buf, initial_size);
}
-void cmark_strbuf_grow(cmark_strbuf *buf, int target_size)
+void cmark_strbuf_overflow_err() {
+ fprintf(stderr, "String buffer overflow");
+ abort();
+}
+
+static inline void
+S_strbuf_grow_by(cmark_strbuf *buf, size_t add) {
+ size_t target_size = (size_t)buf->size + add;
+
+ if (target_size < add /* Integer overflow. */
+ || target_size > BUFSIZE_MAX /* Truncation overflow. */
+ ) {
+ cmark_strbuf_overflow_err();
+ return; /* unreachable */
+ }
+
+ if ((bufsize_t)target_size >= buf->asize)
+ cmark_strbuf_grow(buf, (bufsize_t)target_size);
+}
+
+void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size)
{
unsigned char *new_ptr;
- int new_size;
- if (target_size <= buf->asize)
+ if (target_size < buf->asize)
return;
if (buf->asize == 0) {
- new_size = target_size;
new_ptr = NULL;
} else {
- new_size = buf->asize;
new_ptr = buf->ptr;
}
- /* grow the buffer size by 1.5, until it's big enough
- * to fit our target size */
- while (new_size < target_size)
- new_size = (new_size << 1) - (new_size >> 1);
+ /* Oversize the buffer by 50% to guarantee amortized linear time
+ * complexity on append operations. */
+ size_t new_size = (size_t)target_size + (size_t)target_size / 2;
+
+ /* Account for terminating null byte. */
+ new_size += 1;
/* round allocation up to multiple of 8 */
new_size = (new_size + 7) & ~7;
+ if (new_size < (size_t)target_size /* Integer overflow. */
+ || new_size > BUFSIZE_MAX /* Truncation overflow. */
+ ) {
+ if (target_size >= BUFSIZE_MAX) {
+ /* No space for terminating null byte. */
+ cmark_strbuf_overflow_err();
+ return; /* unreachable */
+ }
+ /* Oversize by the maximum possible amount. */
+ new_size = BUFSIZE_MAX;
+ }
+
new_ptr = (unsigned char *)realloc(new_ptr, new_size);
if (!new_ptr) {
@@ -63,16 +91,11 @@ void cmark_strbuf_grow(cmark_strbuf *buf, int target_size)
abort();
}
- buf->asize = new_size;
+ buf->asize = (bufsize_t)new_size;
buf->ptr = new_ptr;
-
- /* truncate the existing buffer size if necessary */
- if (buf->size >= buf->asize)
- buf->size = buf->asize - 1;
- buf->ptr[buf->size] = '\0';
}
-size_t cmark_strbuf_len(const cmark_strbuf *buf)
+bufsize_t cmark_strbuf_len(const cmark_strbuf *buf)
{
return buf->size;
}
@@ -95,13 +118,14 @@ void cmark_strbuf_clear(cmark_strbuf *buf)
buf->ptr[0] = '\0';
}
-void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, int len)
+void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, bufsize_t len)
{
if (len <= 0 || data == NULL) {
cmark_strbuf_clear(buf);
} else {
if (data != buf->ptr) {
- ENSURE_SIZE(buf, len + 1);
+ if (len >= buf->asize)
+ cmark_strbuf_grow(buf, len);
memmove(buf->ptr, data, len);
}
buf->size = len;
@@ -112,22 +136,22 @@ void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, int len)
void cmark_strbuf_sets(cmark_strbuf *buf, const char *string)
{
cmark_strbuf_set(buf, (const unsigned char *)string,
- string ? strlen(string) : 0);
+ string ? cmark_strbuf_safe_strlen(string) : 0);
}
void cmark_strbuf_putc(cmark_strbuf *buf, int c)
{
- ENSURE_SIZE(buf, buf->size + 2);
+ S_strbuf_grow_by(buf, 1);
buf->ptr[buf->size++] = c;
buf->ptr[buf->size] = '\0';
}
-void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, int len)
+void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, bufsize_t len)
{
if (len <= 0)
return;
- ENSURE_SIZE(buf, buf->size + len + 1);
+ S_strbuf_grow_by(buf, len);
memmove(buf->ptr + buf->size, data, len);
buf->size += len;
buf->ptr[buf->size] = '\0';
@@ -135,21 +159,22 @@ void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, int len)
void cmark_strbuf_puts(cmark_strbuf *buf, const char *string)
{
- cmark_strbuf_put(buf, (const unsigned char *)string, strlen(string));
+ cmark_strbuf_put(buf, (const unsigned char *)string,
+ cmark_strbuf_safe_strlen(string));
}
void cmark_strbuf_vprintf(cmark_strbuf *buf, const char *format, va_list ap)
{
- const int expected_size = buf->size + (strlen(format) * 2);
- int len;
-
- ENSURE_SIZE(buf, expected_size);
+ size_t expected_size = strlen(format);
+ if (expected_size <= SIZE_MAX / 2)
+ expected_size *= 2;
+ S_strbuf_grow_by(buf, expected_size);
while (1) {
va_list args;
va_copy(args, ap);
- len = vsnprintf(
+ int len = vsnprintf(
(char *)buf->ptr + buf->size,
buf->asize - buf->size,
format, args
@@ -168,12 +193,12 @@ void cmark_strbuf_vprintf(cmark_strbuf *buf, const char *format, va_list ap)
abort();
}
- if (len + 1 <= buf->asize - buf->size) {
+ if ((size_t)len < (size_t)(buf->asize - buf->size)) {
buf->size += len;
break;
}
- ENSURE_SIZE(buf, buf->size + len + 1);
+ S_strbuf_grow_by(buf, len);
}
}
@@ -186,11 +211,13 @@ void cmark_strbuf_printf(cmark_strbuf *buf, const char *format, ...)
va_end(ap);
}
-void cmark_strbuf_copy_cstr(char *data, int datasize, const cmark_strbuf *buf)
+void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize, const cmark_strbuf *buf)
{
- int copylen;
+ bufsize_t copylen;
- assert(data && datasize && buf);
+ assert(buf);
+ if (!data || datasize <= 0)
+ return;
data[0] = '\0';
@@ -224,22 +251,6 @@ unsigned char *cmark_strbuf_detach(cmark_strbuf *buf)
return data;
}
-void cmark_strbuf_attach(cmark_strbuf *buf, unsigned char *ptr, int asize)
-{
- cmark_strbuf_free(buf);
-
- if (ptr) {
- buf->ptr = ptr;
- buf->size = strlen((char *)ptr);
- if (asize)
- buf->asize = (asize < buf->size) ? buf->size + 1 : asize;
- else /* pass 0 to fall back on strlen + 1 */
- buf->asize = buf->size + 1;
- } else {
- cmark_strbuf_grow(buf, asize);
- }
-}
-
int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b)
{
int result = memcmp(a->ptr, b->ptr, MIN(a->size, b->size));
@@ -247,20 +258,28 @@ int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b)
(a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0;
}
-int cmark_strbuf_strchr(const cmark_strbuf *buf, int c, int pos)
+bufsize_t cmark_strbuf_strchr(const cmark_strbuf *buf, int c, bufsize_t pos)
{
+ if (pos >= buf->size)
+ return -1;
+ if (pos < 0)
+ pos = 0;
+
const unsigned char *p = (unsigned char *)memchr(buf->ptr + pos, c, buf->size - pos);
if (!p)
return -1;
- return (int)(p - (const unsigned char *)buf->ptr);
+ return (bufsize_t)(p - (const unsigned char *)buf->ptr);
}
-int cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, int pos)
+bufsize_t cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, bufsize_t pos)
{
- int i;
+ if (pos < 0 || buf->size == 0)
+ return -1;
+ if (pos >= buf->size)
+ pos = buf->size - 1;
- for (i = pos; i >= 0; i--) {
+ for (bufsize_t i = pos; i >= 0; i--) {
if (buf->ptr[i] == (unsigned char) c)
return i;
}
@@ -268,17 +287,22 @@ int cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, int pos)
return -1;
}
-void cmark_strbuf_truncate(cmark_strbuf *buf, int len)
+void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len)
{
+ if (len < 0)
+ len = 0;
+
if (len < buf->size) {
buf->size = len;
buf->ptr[buf->size] = '\0';
}
}
-void cmark_strbuf_drop(cmark_strbuf *buf, int n)
+void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n)
{
if (n > 0) {
+ if (n > buf->size)
+ n = buf->size;
buf->size = buf->size - n;
if (buf->size)
memmove(buf->ptr, buf->ptr + n, buf->size);
@@ -304,7 +328,7 @@ void cmark_strbuf_rtrim(cmark_strbuf *buf)
void cmark_strbuf_trim(cmark_strbuf *buf)
{
- int i = 0;
+ bufsize_t i = 0;
if (!buf->size)
return;
@@ -322,7 +346,7 @@ void cmark_strbuf_trim(cmark_strbuf *buf)
void cmark_strbuf_normalize_whitespace(cmark_strbuf *s)
{
bool last_char_was_space = false;
- int r, w;
+ bufsize_t r, w;
for (r = 0, w = 0; r < s->size; ++r) {
switch (s->ptr[r]) {
@@ -347,7 +371,7 @@ void cmark_strbuf_normalize_whitespace(cmark_strbuf *s)
// Destructively unescape a string: remove backslashes before punctuation chars.
extern void cmark_strbuf_unescape(cmark_strbuf *buf)
{
- int r, w;
+ bufsize_t r, w;
for (r = 0, w = 0; r < buf->size; ++r) {
if (buf->ptr[r] == '\\' && cmark_ispunct(buf->ptr[r + 1]))
diff --git a/src/buffer.h b/src/buffer.h
index 417df26..babd051 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -3,20 +3,25 @@
#include <stddef.h>
#include <stdarg.h>
+#include <string.h>
+#include <limits.h>
#include "config.h"
#ifdef __cplusplus
extern "C" {
#endif
+typedef int bufsize_t;
+
typedef struct {
unsigned char *ptr;
- int asize, size;
+ bufsize_t asize, size;
} cmark_strbuf;
extern unsigned char cmark_strbuf__initbuf[];
#define GH_BUF_INIT { cmark_strbuf__initbuf, 0, 0 }
+#define BUFSIZE_MAX INT_MAX
/**
* Initialize a cmark_strbuf structure.
@@ -24,23 +29,22 @@ extern unsigned char cmark_strbuf__initbuf[];
* For the cases where GH_BUF_INIT cannot be used to do static
* initialization.
*/
-void cmark_strbuf_init(cmark_strbuf *buf, int initial_size);
+void cmark_strbuf_init(cmark_strbuf *buf, bufsize_t initial_size);
/**
* Grow the buffer to hold at least `target_size` bytes.
*/
-void cmark_strbuf_grow(cmark_strbuf *buf, int target_size);
+void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size);
void cmark_strbuf_free(cmark_strbuf *buf);
void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b);
-size_t cmark_strbuf_len(const cmark_strbuf *buf);
+bufsize_t cmark_strbuf_len(const cmark_strbuf *buf);
int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b);
-void cmark_strbuf_attach(cmark_strbuf *buf, unsigned char *ptr, int asize);
unsigned char *cmark_strbuf_detach(cmark_strbuf *buf);
-void cmark_strbuf_copy_cstr(char *data, int datasize, const cmark_strbuf *buf);
+void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize, const cmark_strbuf *buf);
static inline const char *cmark_strbuf_cstr(const cmark_strbuf *buf)
{
@@ -49,25 +53,41 @@ static inline const char *cmark_strbuf_cstr(const cmark_strbuf *buf)
#define cmark_strbuf_at(buf, n) ((buf)->ptr[n])
-void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, int len);
+void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, bufsize_t len);
void cmark_strbuf_sets(cmark_strbuf *buf, const char *string);
void cmark_strbuf_putc(cmark_strbuf *buf, int c);
-void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, int len);
+void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, bufsize_t len);
void cmark_strbuf_puts(cmark_strbuf *buf, const char *string);
void cmark_strbuf_printf(cmark_strbuf *buf, const char *format, ...)
CMARK_ATTRIBUTE((format (printf, 2, 3)));
void cmark_strbuf_vprintf(cmark_strbuf *buf, const char *format, va_list ap);
void cmark_strbuf_clear(cmark_strbuf *buf);
-int cmark_strbuf_strchr(const cmark_strbuf *buf, int c, int pos);
-int cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, int pos);
-void cmark_strbuf_drop(cmark_strbuf *buf, int n);
-void cmark_strbuf_truncate(cmark_strbuf *buf, int len);
+bufsize_t cmark_strbuf_strchr(const cmark_strbuf *buf, int c, bufsize_t pos);
+bufsize_t cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, bufsize_t pos);
+void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n);
+void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len);
void cmark_strbuf_rtrim(cmark_strbuf *buf);
void cmark_strbuf_trim(cmark_strbuf *buf);
void cmark_strbuf_normalize_whitespace(cmark_strbuf *s);
void cmark_strbuf_unescape(cmark_strbuf *s);
+/* Print error and abort. */
+void cmark_strbuf_overflow_err(void);
+
+static inline bufsize_t
+cmark_strbuf_check_bufsize(size_t size) {
+ if (size > BUFSIZE_MAX) {
+ cmark_strbuf_overflow_err();
+ }
+ return (bufsize_t)size;
+}
+
+static inline bufsize_t
+cmark_strbuf_safe_strlen(const char *str) {
+ return cmark_strbuf_check_bufsize(strlen(str));
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/src/chunk.h b/src/chunk.h
index a246a9d..f23a02d 100644
--- a/src/chunk.h
+++ b/src/chunk.h
@@ -11,8 +11,8 @@
typedef struct {
unsigned char *data;
- int len;
- int alloc; // also implies a NULL-terminated string
+ bufsize_t len;
+ bufsize_t alloc; // also implies a NULL-terminated string
} cmark_chunk;
static inline void cmark_chunk_free(cmark_chunk *c)
@@ -51,10 +51,10 @@ static inline void cmark_chunk_trim(cmark_chunk *c)
cmark_chunk_rtrim(c);
}
-static inline int cmark_chunk_strchr(cmark_chunk *ch, int c, int offset)
+static inline bufsize_t cmark_chunk_strchr(cmark_chunk *ch, int c, bufsize_t offset)
{
const unsigned char *p = (unsigned char *)memchr(ch->data + offset, c, ch->len - offset);
- return p ? (int)(p - ch->data) : ch->len;
+ return p ? (bufsize_t)(p - ch->data) : ch->len;
}
static inline const char *cmark_chunk_to_cstr(cmark_chunk *c)
@@ -87,7 +87,7 @@ static inline void cmark_chunk_set_cstr(cmark_chunk *c, const char *str)
c->data = NULL;
c->alloc = 0;
} else {
- c->len = strlen(str);
+ c->len = cmark_strbuf_safe_strlen(str);
c->data = (unsigned char *)malloc(c->len + 1);
c->alloc = 1;
memcpy(c->data, str, c->len + 1);
@@ -96,11 +96,12 @@ static inline void cmark_chunk_set_cstr(cmark_chunk *c, const char *str)
static inline cmark_chunk cmark_chunk_literal(const char *data)
{
- cmark_chunk c = {(unsigned char *)data, data ? strlen(data) : 0, 0};
+ bufsize_t len = data ? cmark_strbuf_safe_strlen(data) : 0;
+ cmark_chunk c = {(unsigned char *)data, len, 0};
return c;
}
-static inline cmark_chunk cmark_chunk_dup(const cmark_chunk *ch, int pos, int len)
+static inline cmark_chunk cmark_chunk_dup(const cmark_chunk *ch, bufsize_t pos, bufsize_t len)
{
cmark_chunk c = {ch->data + pos, len, 0};
return c;
diff --git a/src/cmark.c b/src/cmark.c
index 79ceabf..35765b1 100644
--- a/src/cmark.c
+++ b/src/cmark.c
@@ -9,7 +9,7 @@
const int cmark_version = CMARK_VERSION;
const char cmark_version_string[] = CMARK_VERSION_STRING;
-char *cmark_markdown_to_html(const char *text, int len, int options)
+char *cmark_markdown_to_html(const char *text, size_t len, int options)
{
cmark_node *doc;
char *result;
diff --git a/src/cmark.h b/src/cmark.h
index 84c6f76..d86e13e 100644
--- a/src/cmark.h
+++ b/src/cmark.h
@@ -24,7 +24,7 @@ extern "C" {
* UTF-8-encoded string.
*/
CMARK_EXPORT
-char *cmark_markdown_to_html(const char *text, int len, int options);
+char *cmark_markdown_to_html(const char *text, size_t len, int options);
/** ## Node Structure
*/
diff --git a/src/commonmark.c b/src/commonmark.c
index dba1fcf..4594748 100644
--- a/src/commonmark.c
+++ b/src/commonmark.c
@@ -20,7 +20,7 @@ struct render_state {
int column;
int width;
int need_cr;
- int last_breakable;
+ bufsize_t last_breakable;
bool begin_line;
bool no_wrap;
bool in_tight_list_item;
@@ -237,30 +237,29 @@ shortest_unused_backtick_sequence(cmark_chunk *code)
static bool
is_autolink(cmark_node *node)
{
- const char *title;
- const char *url;
+ cmark_chunk *title;
+ cmark_chunk *url;
cmark_node *link_text;
if (node->type != CMARK_NODE_LINK) {
return false;
}
- url = cmark_node_get_url(node);
- if (url == NULL ||
- _scan_scheme((unsigned char *)url) == 0) {
+ url = &node->as.link.url;
+ if (url->len == 0 || scan_scheme(url, 0) == 0) {
return false;
}
- title = cmark_node_get_title(node);
+ title = &node->as.link.title;
// if it has a title, we can't treat it as an autolink:
- if (title != NULL && strlen(title) > 0) {
+ if (title->len > 0) {
return false;
}
link_text = node->first_child;
cmark_consolidate_text_nodes(link_text);
- return ((int)strlen(url) == link_text->as.literal.len &&
- strncmp(url,
+ return (url->len == link_text->as.literal.len &&
+ strncmp((char*)url->data,
(char*)link_text->as.literal.data,
link_text->as.literal.len) == 0);
}
@@ -289,11 +288,11 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
int numticks;
int i;
bool entering = (ev_type == CMARK_EVENT_ENTER);
- const char *info;
- const char *title;
+ cmark_chunk *info;
+ cmark_chunk *title;
cmark_strbuf listmarker = GH_BUF_INIT;
char *emph_delim;
- int marker_width;
+ bufsize_t marker_width;
// Don't adjust tight list status til we've started the list.
// Otherwise we loose the blank line between a paragraph and
@@ -396,12 +395,12 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
case CMARK_NODE_CODE_BLOCK:
blankline(state);
- info = cmark_node_get_fence_info(node);
+ info = &node->as.code.info;
code = &node->as.code.literal;
// use indented form if no info, and code doesn't
// begin or end with a blank line, and code isn't
// first thing in a list item
- if ((info == NULL || strlen(info) == 0) &&
+ if (info->len == 0 &&
(code->len > 2 &&
!isspace(code->data[0]) &&
!(isspace(code->data[code->len - 1]) &&
@@ -422,7 +421,7 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
lit(state, "`", false);
}
lit(state, " ", false);
- out(state, cmark_chunk_literal(info), false, LITERAL);
+ out(state, *info, false, LITERAL);
cr(state);
out(state, node->as.code.literal, false, LITERAL);
cr(state);
@@ -542,11 +541,10 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
out(state,
cmark_chunk_literal(cmark_node_get_url(node)),
false, URL);
- title = cmark_node_get_title(node);
- if (title && strlen(title) > 0) {
+ title = &node->as.link.title;
+ if (title->len > 0) {
lit(state, " \"", true);
- out(state, cmark_chunk_literal(title),
- false, TITLE);
+ out(state, *title, false, TITLE);
lit(state, "\"", false);
}
lit(state, ")", false);
@@ -560,10 +558,10 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
} else {
lit(state, "](", false);
out(state, cmark_chunk_literal(cmark_node_get_url(node)), false, URL);
- title = cmark_node_get_title(node);
- if (title && strlen(title) > 0) {
+ title = &node->as.link.title;
+ if (title->len > 0) {
lit(state, " \"", true);
- out(state, cmark_chunk_literal(title), false, TITLE);
+ out(state, *title, false, TITLE);
lit(state, "\"", false);
}
lit(state, ")", false);
diff --git a/src/houdini.h b/src/houdini.h
index 9f00f6d..b926cf3 100644
--- a/src/houdini.h
+++ b/src/houdini.h
@@ -31,19 +31,12 @@ extern "C" {
#define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10)
#define HOUDINI_UNESCAPED_SIZE(x) (x)
-extern size_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, size_t size, int secure);
-extern int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, size_t size);
-extern void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_xml(cmark_strbuf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_uri(cmark_strbuf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_url(cmark_strbuf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, size_t size);
-extern int houdini_unescape_uri(cmark_strbuf *ob, const uint8_t *src, size_t size);
-extern int houdini_unescape_url(cmark_strbuf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_js(cmark_strbuf *ob, const uint8_t *src, size_t size);
-extern int houdini_unescape_js(cmark_strbuf *ob, const uint8_t *src, size_t size);
+extern bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, bufsize_t size);
+extern int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size);
+extern int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size, int secure);
+extern int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size);
+extern void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, bufsize_t size);
+extern int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size);
#ifdef __cplusplus
}
diff --git a/src/houdini_href_e.c b/src/houdini_href_e.c
index 7527780..7fb958a 100644
--- a/src/houdini_href_e.c
+++ b/src/houdini_href_e.c
@@ -49,10 +49,10 @@ static const char HREF_SAFE[] = {
};
int
-houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, size_t size)
+houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size)
{
static const uint8_t hex_chars[] = "0123456789ABCDEF";
- size_t i = 0, org;
+ bufsize_t i = 0, org;
uint8_t hex_str[3];
hex_str[0] = '%';
diff --git a/src/houdini_html_e.c b/src/houdini_html_e.c
index 1a4c3e1..7f4b91f 100644
--- a/src/houdini_html_e.c
+++ b/src/houdini_html_e.c
@@ -45,9 +45,9 @@ static const char *HTML_ESCAPES[] = {
};
int
-houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, size_t size, int secure)
+houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size, int secure)
{
- size_t i = 0, org, esc = 0;
+ bufsize_t i = 0, org, esc = 0;
while (i < size) {
org = i;
@@ -75,7 +75,7 @@ houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, size_t size, int secu
}
int
-houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, size_t size)
+houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size)
{
return houdini_escape_html0(ob, src, size, 1);
}
diff --git a/src/houdini_html_u.c b/src/houdini_html_u.c
index eaf295e..e57894d 100644
--- a/src/houdini_html_u.c
+++ b/src/houdini_html_u.c
@@ -7,10 +7,10 @@
#include "utf8.h"
#include "html_unescape.h"
-size_t
-houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, size_t size)
+bufsize_t
+houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, bufsize_t size)
{
- size_t i = 0;
+ bufsize_t i = 0;
if (size >= 3 && src[0] == '#') {
int codepoint = 0;
@@ -68,7 +68,7 @@ houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, size_t size)
const struct html_ent *entity = find_entity((char *)src, i);
if (entity != NULL) {
- int len = 0;
+ bufsize_t len = 0;
while (len < 4 && entity->utf8[len] != '\0') {
++len;
}
@@ -85,9 +85,9 @@ houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, size_t size)
}
int
-houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, size_t size)
+houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size)
{
- size_t i = 0, org, ent;
+ bufsize_t i = 0, org, ent;
while (i < size) {
org = i;
@@ -122,7 +122,7 @@ houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, size_t size)
return 1;
}
-void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, size_t size)
+void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, bufsize_t size)
{
if (!houdini_unescape_html(ob, src, size))
cmark_strbuf_put(ob, src, size);
diff --git a/src/html.c b/src/html.c
index d3f9fc7..a30bbca 100644
--- a/src/html.c
+++ b/src/html.c
@@ -11,20 +11,9 @@
// Functions to convert cmark_nodes to HTML strings.
-static void escape_html(cmark_strbuf *dest, const unsigned char *source, int length)
+static void escape_html(cmark_strbuf *dest, const unsigned char *source, bufsize_t length)
{
- if (length < 0)
- length = strlen((char *)source);
-
- houdini_escape_html0(dest, source, (size_t)length, 0);
-}
-
-static void escape_href(cmark_strbuf *dest, const unsigned char *source, int length)
-{
- if (length < 0)
- length = strlen((char *)source);
-
- houdini_escape_href(dest, source, (size_t)length);
+ houdini_escape_html0(dest, source, length, 0);
}
static inline void cr(cmark_strbuf *html)
@@ -165,7 +154,7 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
S_render_sourcepos(node, html, options);
cmark_strbuf_puts(html, "><code>");
} else {
- int first_tag = 0;
+ bufsize_t first_tag = 0;
while (first_tag < node->as.code.info.len &&
node->as.code.info.data[first_tag] != ' ') {
first_tag += 1;
@@ -261,8 +250,8 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
case CMARK_NODE_LINK:
if (entering) {
cmark_strbuf_puts(html, "<a href=\"");
- escape_href(html, node->as.link.url.data,
- node->as.link.url.len);
+ houdini_escape_href(html, node->as.link.url.data,
+ node->as.link.url.len);
if (node->as.link.title.len) {
cmark_strbuf_puts(html, "\" title=\"");
@@ -279,8 +268,8 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
case CMARK_NODE_IMAGE:
if (entering) {
cmark_strbuf_puts(html, "<img src=\"");
- escape_href(html, node->as.link.url.data,
- node->as.link.url.len);
+ houdini_escape_href(html, node->as.link.url.data,
+ node->as.link.url.len);
cmark_strbuf_puts(html, "\" alt=\"");
state->plain = node;
diff --git a/src/inlines.c b/src/inlines.c
index 8a1ee44..7e8f806 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -36,7 +36,7 @@ typedef struct delimiter {
struct delimiter *previous;
struct delimiter *next;
cmark_node *inl_text;
- int position;
+ bufsize_t position;
unsigned char delim_char;
bool can_open;
bool can_close;
@@ -45,7 +45,7 @@ typedef struct delimiter {
typedef struct {
cmark_chunk input;
- int pos;
+ bufsize_t pos;
cmark_reference_map *refmap;
delimiter *last_delim;
} subject;
@@ -57,7 +57,7 @@ static int parse_inline(subject* subj, cmark_node * parent, int options);
static void subject_from_buf(subject *e, cmark_strbuf *buffer,
cmark_reference_map *refmap);
-static int subject_find_special_char(subject *subj, int options);
+static bufsize_t subject_find_special_char(subject *subj, int options);
static cmark_chunk cmark_clean_autolink(cmark_chunk *url, int is_email)
{
@@ -143,7 +143,7 @@ static inline cmark_node* make_simple(cmark_node_type t)
static cmark_chunk chunk_clone(cmark_chunk *src)
{
cmark_chunk c;
- int len = src->len;
+ bufsize_t len = src->len;
c.len = len;
c.data = (unsigned char *)malloc(len + 1);
@@ -177,7 +177,7 @@ static inline unsigned char peek_char(subject *subj)
return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : 0;
}
-static inline unsigned char peek_at(subject *subj, int pos)
+static inline unsigned char peek_at(subject *subj, bufsize_t pos)
{
return subj->input.data[pos];
}
@@ -195,8 +195,8 @@ static inline int is_eof(subject* subj)
static inline cmark_chunk take_while(subject* subj, int (*f)(int))
{
unsigned char c;
- int startpos = subj->pos;
- int len = 0;
+ bufsize_t startpos = subj->pos;
+ bufsize_t len = 0;
while ((c = peek_char(subj)) && (*f)(c)) {
advance(subj);
@@ -211,7 +211,7 @@ static inline cmark_chunk take_while(subject* subj, int (*f)(int))
// parsed). Return 0 if you don't find matching closing
// backticks, otherwise return the position in the subject
// after the closing backticks.
-static int scan_to_closing_backticks(subject* subj, int openticklength)
+static bufsize_t scan_to_closing_backticks(subject* subj, bufsize_t openticklength)
{
// read non backticks
unsigned char c;
@@ -221,7 +221,7 @@ static int scan_to_closing_backticks(subject* subj, int openticklength)
if (is_eof(subj)) {
return 0; // did not find closing ticks, return 0
}
- int numticks = 0;
+ bufsize_t numticks = 0;
while (peek_char(subj) == '`') {
advance(subj);
numticks++;
@@ -237,8 +237,8 @@ static int scan_to_closing_backticks(subject* subj, int openticklength)
static cmark_node* handle_backticks(subject *subj)
{
cmark_chunk openticks = take_while(subj, isbacktick);
- int startpos = subj->pos;
- int endpos = scan_to_closing_backticks(subj, openticks.len);
+ bufsize_t startpos = subj->pos;
+ bufsize_t endpos = scan_to_closing_backticks(subj, openticks.len);
if (endpos == 0) { // not found
subj->pos = startpos; // rewind
@@ -260,7 +260,7 @@ static int
scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close)
{
int numdelims = 0;
- int before_char_pos;
+ bufsize_t before_char_pos;
int32_t after_char = 0;
int32_t before_char = 0;
int len;
@@ -376,7 +376,7 @@ static void push_delimiter(subject *subj, unsigned char c, bool can_open,
// Assumes the subject has a c at the current position.
static cmark_node* handle_delim(subject* subj, unsigned char c, bool smart)
{
- int numdelims;
+ bufsize_t numdelims;
cmark_node * inl_text;
bool can_open, can_close;
cmark_chunk contents;
@@ -500,11 +500,11 @@ static delimiter*
S_insert_emph(subject *subj, delimiter *opener, delimiter *closer)
{
delimiter *delim, *tmp_delim;
- int use_delims;
+ bufsize_t use_delims;
cmark_node *opener_inl = opener->inl_text;
cmark_node *closer_inl = closer->inl_text;
- int opener_num_chars = opener_inl->as.literal.len;
- int closer_num_chars = closer_inl->as.literal.len;
+ bufsize_t opener_num_chars = opener_inl->as.literal.len;
+ bufsize_t closer_num_chars = closer_inl->as.literal.len;
cmark_node *tmp, *emph, *first_child, *last_child;
// calculate the actual number of characters used from this closer
@@ -596,7 +596,7 @@ static cmark_node* handle_backslash(subject *subj)
static cmark_node* handle_entity(subject* subj)
{
cmark_strbuf ent = GH_BUF_INIT;
- size_t len;
+ bufsize_t len;
advance(subj);
@@ -618,7 +618,7 @@ static cmark_node *make_str_with_entities(cmark_chunk *content)
{
cmark_strbuf unescaped = GH_BUF_INIT;
- if (houdini_unescape_html(&unescaped, content->data, (size_t)content->len)) {
+ if (houdini_unescape_html(&unescaped, content->data, content->len)) {
return make_str(cmark_chunk_buf_detach(&unescaped));
} else {
return make_str(*content);
@@ -678,7 +678,7 @@ cmark_chunk cmark_clean_title(cmark_chunk *title)
// Assumes the subject has a '<' character at the current position.
static cmark_node* handle_pointy_brace(subject* subj)
{
- int matchlen = 0;
+ bufsize_t matchlen = 0;
cmark_chunk contents;
advance(subj); // advance past first <
@@ -725,7 +725,7 @@ static cmark_node* handle_pointy_brace(subject* subj)
// encountered. Backticks in labels do not start code spans.
static int link_label(subject* subj, cmark_chunk *raw_label)
{
- int startpos = subj->pos;
+ bufsize_t startpos = subj->pos;
int length = 0;
unsigned char c;
@@ -769,10 +769,10 @@ noMatch:
// Return a link, an image, or a literal close bracket.
static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent)
{
- int initial_pos;
- int starturl, endurl, starttitle, endtitle, endall;
- int n;
- int sps;
+ bufsize_t initial_pos;
+ bufsize_t starturl, endurl, starttitle, endtitle, endall;
+ bufsize_t n;
+ bufsize_t sps;
cmark_reference *ref;
bool is_image = false;
cmark_chunk url_chunk, title_chunk;
@@ -922,7 +922,7 @@ match:
// Assumes the subject has a newline at the current position.
static cmark_node* handle_newline(subject *subj)
{
- int nlpos = subj->pos;
+ bufsize_t nlpos = subj->pos;
// skip over newline
advance(subj);
// skip spaces at beginning of line
@@ -938,7 +938,7 @@ static cmark_node* handle_newline(subject *subj)
}
}
-static int subject_find_special_char(subject *subj, int options)
+static bufsize_t subject_find_special_char(subject *subj, int options)
{
// "\r\n\\`&_*[]<!"
static const int8_t SPECIAL_CHARS[256] = {
@@ -980,7 +980,7 @@ static int subject_find_special_char(subject *subj, int options)
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
- int n = subj->pos + 1;
+ bufsize_t n = subj->pos + 1;
while (n < subj->input.len) {
if (SPECIAL_CHARS[subj->input.data[n]])
@@ -1001,7 +1001,7 @@ static int parse_inline(subject* subj, cmark_node * parent, int options)
cmark_node* new_inl = NULL;
cmark_chunk contents;
unsigned char c;
- int endpos;
+ bufsize_t endpos;
c = peek_char(subj);
if (c == 0) {
return 0;
@@ -1098,7 +1098,7 @@ static void spnl(subject* subj)
// Modify refmap if a reference is encountered.
// Return 0 if no reference found, otherwise position of subject
// after reference is parsed.
-int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap)
+bufsize_t cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap)
{
subject subj;
@@ -1106,8 +1106,8 @@ int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refma
cmark_chunk url;
cmark_chunk title;
- int matchlen = 0;
- int beforetitle;
+ bufsize_t matchlen = 0;
+ bufsize_t beforetitle;
subject_from_buf(&subj, input, NULL);
diff --git a/src/inlines.h b/src/inlines.h
index 534588e..f8847fc 100644
--- a/src/inlines.h
+++ b/src/inlines.h
@@ -10,7 +10,7 @@ cmark_chunk cmark_clean_title(cmark_chunk *title);
void cmark_parse_inlines(cmark_node* parent, cmark_reference_map *refmap, int options);
-int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap);
+bufsize_t cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap);
#ifdef __cplusplus
}
diff --git a/src/parser.h b/src/parser.h
index ccdf84b..6e18c67 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -16,12 +16,12 @@ struct cmark_parser {
struct cmark_node* root;
struct cmark_node* current;
int line_number;
- int offset;
- int first_nonspace;
+ bufsize_t offset;
+ bufsize_t first_nonspace;
int indent;
bool blank;
cmark_strbuf *curline;
- int last_line_length;
+ bufsize_t last_line_length;
cmark_strbuf *linebuf;
int options;
};
diff --git a/src/scanners.c b/src/scanners.c
index 7f9ed2e..3f4ddac 100644
--- a/src/scanners.c
+++ b/src/scanners.c
@@ -1,11 +1,11 @@
-/* Generated by re2c 0.13.6 */
+/* Generated by re2c 0.13.5 */
#include <stdlib.h>
#include "chunk.h"
#include "scanners.h"
-int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset)
+bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, bufsize_t offset)
{
- int res;
+ bufsize_t res;
unsigned char *ptr = (unsigned char *)c->data;
unsigned char lim = ptr[c->len];
@@ -19,7 +19,7 @@ int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset)
// Try to match a scheme including colon.
-int _scan_scheme(const unsigned char *p)
+bufsize_t _scan_scheme(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
@@ -578,7 +578,7 @@ yy34:
if (yych != ':') goto yy31;
yy35:
++p;
- { return (p - start); }
+ { return (bufsize_t)(p - start); }
yy37:
yych = *++p;
if (yych == 'E') goto yy38;
@@ -2919,7 +2919,7 @@ yy484:
}
// Try to match URI autolink after first <, returning number of chars matched.
-int _scan_autolink_uri(const unsigned char *p)
+bufsize_t _scan_autolink_uri(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
@@ -3517,7 +3517,7 @@ yy520:
}
if (yych <= '=') goto yy516;
++p;
- { return (p - start); }
+ { return (bufsize_t)(p - start); }
yy524:
yych = *++p;
if (yych == 'E') goto yy525;
@@ -5858,7 +5858,7 @@ yy971:
}
// Try to match email autolink after first <, returning num of chars matched.
-int _scan_autolink_email(const unsigned char *p)
+bufsize_t _scan_autolink_email(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
@@ -6060,7 +6060,7 @@ yy984:
}
yy985:
++p;
- { return (p - start); }
+ { return (bufsize_t)(p - start); }
yy987:
++p;
yych = *p;
@@ -10803,7 +10803,7 @@ yy1230:
}
// Try to match an HTML tag after first <, returning num of chars matched.
-int _scan_html_tag(const unsigned char *p)
+bufsize_t _scan_html_tag(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
@@ -10964,7 +10964,7 @@ yy1242:
if (yych != '>') goto yy1239;
yy1243:
++p;
- { return (p - start); }
+ { return (bufsize_t)(p - start); }
yy1245:
yych = *++p;
if (yych == 'C') goto yy1260;
@@ -11455,7 +11455,7 @@ yy1297:
// Try to match an HTML block tag including first <,
// returning num of chars matched.
-int _scan_html_block_tag(const unsigned char *p)
+bufsize_t _scan_html_block_tag(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
@@ -11513,7 +11513,7 @@ yy1303:
goto yy1301;
yy1304:
++p;
- { return (p - start); }
+ { return (bufsize_t)(p - start); }
yy1306:
yych = *++p;
if (yych <= '/') {
@@ -12022,7 +12022,7 @@ yy1343:
}
yy1344:
++p;
- { return (p - start); }
+ { return (bufsize_t)(p - start); }
yy1346:
yych = *++p;
if (yych <= 'R') {
@@ -12639,7 +12639,7 @@ yy1466:
}
yy1467:
++p;
- { return (p - start); }
+ { return (bufsize_t)(p - start); }
yy1469:
yych = *++p;
if (yych <= 'R') {
@@ -13243,7 +13243,7 @@ yy1585:
// This may optionally be contained in <..>; otherwise
// whitespace and unbalanced right parentheses aren't allowed.
// Newlines aren't ever allowed.
-int _scan_link_url(const unsigned char *p)
+bufsize_t _scan_link_url(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
@@ -13308,7 +13308,7 @@ int _scan_link_url(const unsigned char *p)
}
}
yy1588:
- { return (p - start); }
+ { return (bufsize_t)(p - start); }
yy1589:
yyaccept = 0;
marker = ++p;
@@ -13402,7 +13402,7 @@ yy1599:
yy1600:
p = marker;
if (yyaccept <= 1) {
- if (yyaccept == 0) {
+ if (yyaccept <= 0) {
goto yy1588;
} else {
goto yy1595;
@@ -13490,7 +13490,7 @@ yy1607:
if (yych <= ' ') goto yy1608;
if (yych != ')') goto yy1603;
yy1608:
- { return (p - start); }
+ { return (bufsize_t)(p - start); }
yy1609:
++p;
yych = *p;
@@ -13732,7 +13732,7 @@ yy1623:
// Try to match a link title (in single quotes, in double quotes, or
// in parentheses), returning number of chars matched. Allow one
// level of internal nesting (quotes within quotes).
-int _scan_link_title(const unsigned char *p)
+bufsize_t _scan_link_title(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
@@ -13818,13 +13818,13 @@ yy1632:
yy1633:
p = marker;
if (yyaccept <= 1) {
- if (yyaccept == 0) {
+ if (yyaccept <= 0) {
goto yy1626;
} else {
goto yy1637;
}
} else {
- if (yyaccept == 2) {
+ if (yyaccept <= 2) {
goto yy1644;
} else {
goto yy1651;
@@ -13842,7 +13842,7 @@ yy1634:
yy1636:
++p;
yy1637:
- { return (p - start); }
+ { return (bufsize_t)(p - start); }
yy1638:
yyaccept = 1;
marker = ++p;
@@ -13874,7 +13874,7 @@ yy1641:
yy1643:
++p;
yy1644:
- { return (p - start); }
+ { return (bufsize_t)(p - start); }
yy1645:
yyaccept = 2;
marker = ++p;
@@ -13906,7 +13906,7 @@ yy1648:
yy1650:
++p;
yy1651:
- { return (p - start); }
+ { return (bufsize_t)(p - start); }
yy1652:
yyaccept = 3;
marker = ++p;
@@ -13922,7 +13922,7 @@ yy1652:
}
// Match space characters, including newlines.
-int _scan_spacechars(const unsigned char *p)
+bufsize_t _scan_spacechars(const unsigned char *p)
{
const unsigned char *start = p; \
@@ -13973,7 +13973,7 @@ int _scan_spacechars(const unsigned char *p)
goto yy1659;
}
yy1655:
- { return (p - start); }
+ { return (bufsize_t)(p - start); }
yy1656:
yych = *++p;
goto yy1658;
@@ -13993,7 +13993,7 @@ yy1659:
}
// Match ATX header start.
-int _scan_atx_header_start(const unsigned char *p)
+bufsize_t _scan_atx_header_start(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
@@ -14059,7 +14059,7 @@ yy1665:
yy1666:
++p;
yy1667:
- { return (p - start); }
+ { return (bufsize_t)(p - start); }
yy1668:
++p;
yych = *p;
@@ -14128,7 +14128,7 @@ yy1672:
// Match setext header line. Return 1 for level-1 header,
// 2 for level-2, 0 for no match.
-int _scan_setext_header_line(const unsigned char *p)
+bufsize_t _scan_setext_header_line(const unsigned char *p)
{
const unsigned char *marker = NULL;
@@ -14269,7 +14269,7 @@ yy1693:
// Scan a horizontal rule line: "...three or more hyphens, asterisks,
// or underscores on a line by themselves. If you wish, you may use
// spaces between the hyphens or asterisks."
-int _scan_hrule(const unsigned char *p)
+bufsize_t _scan_hrule(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
@@ -14384,7 +14384,7 @@ yy1709:
if (yych != '\r') goto yy1704;
yy1711:
++p;
- { return (p - start); }
+ { return (bufsize_t)(p - start); }
yy1713:
++p;
yych = *p;
@@ -14422,7 +14422,7 @@ yy1719:
}
yy1721:
++p;
- { return (p - start); }
+ { return (bufsize_t)(p - start); }
yy1723:
++p;
yych = *p;
@@ -14460,13 +14460,13 @@ yy1729:
}
yy1731:
++p;
- { return (p - start); }
+ { return (bufsize_t)(p - start); }
}
}
// Scan an opening code fence.
-int _scan_open_code_fence(const unsigned char *p)
+bufsize_t _scan_open_code_fence(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
@@ -14557,7 +14557,7 @@ yy1743:
yy1745:
++p;
p = marker;
- { return (p - start); }
+ { return (bufsize_t)(p - start); }
yy1747:
yych = *++p;
if (yybm[0+yych] & 64) {
@@ -14585,13 +14585,13 @@ yy1750:
yy1752:
++p;
p = marker;
- { return (p - start); }
+ { return (bufsize_t)(p - start); }
}
}
// Scan a closing code fence with length at least len.
-int _scan_close_code_fence(const unsigned char *p)
+bufsize_t _scan_close_code_fence(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
@@ -14687,7 +14687,7 @@ yy1764:
yy1766:
++p;
p = marker;
- { return (p - start); }
+ { return (bufsize_t)(p - start); }
yy1768:
yych = *++p;
if (yybm[0+yych] & 128) {
@@ -14725,14 +14725,14 @@ yy1771:
yy1773:
++p;
p = marker;
- { return (p - start); }
+ { return (bufsize_t)(p - start); }
}
}
// Scans an entity.
// Returns number of chars matched.
-int _scan_entity(const unsigned char *p)
+bufsize_t _scan_entity(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
@@ -14799,7 +14799,7 @@ yy1783:
}
yy1784:
++p;
- { return (p - start); }
+ { return (bufsize_t)(p - start); }
yy1786:
yych = *++p;
if (yych <= ';') {
diff --git a/src/scanners.h b/src/scanners.h
index 1353f3b..bc5134e 100644
--- a/src/scanners.h
+++ b/src/scanners.h
@@ -5,21 +5,21 @@
extern "C" {
#endif
-int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset);
-int _scan_scheme(const unsigned char *p);
-int _scan_autolink_uri(const unsigned char *p);
-int _scan_autolink_email(const unsigned char *p);
-int _scan_html_tag(const unsigned char *p);
-int _scan_html_block_tag(const unsigned char *p);
-int _scan_link_url(const unsigned char *p);
-int _scan_link_title(const unsigned char *p);
-int _scan_spacechars(const unsigned char *p);
-int _scan_atx_header_start(const unsigned char *p);
-int _scan_setext_header_line(const unsigned char *p);
-int _scan_hrule(const unsigned char *p);
-int _scan_open_code_fence(const unsigned char *p);
-int _scan_close_code_fence(const unsigned char *p);
-int _scan_entity(const unsigned char *p);
+bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, bufsize_t offset);
+bufsize_t _scan_scheme(const unsigned char *p);
+bufsize_t _scan_autolink_uri(const unsigned char *p);
+bufsize_t _scan_autolink_email(const unsigned char *p);
+bufsize_t _scan_html_tag(const unsigned char *p);
+bufsize_t _scan_html_block_tag(const unsigned char *p);
+bufsize_t _scan_link_url(const unsigned char *p);
+bufsize_t _scan_link_title(const unsigned char *p);
+bufsize_t _scan_spacechars(const unsigned char *p);
+bufsize_t _scan_atx_header_start(const unsigned char *p);
+bufsize_t _scan_setext_header_line(const unsigned char *p);
+bufsize_t _scan_hrule(const unsigned char *p);
+bufsize_t _scan_open_code_fence(const unsigned char *p);
+bufsize_t _scan_close_code_fence(const unsigned char *p);
+bufsize_t _scan_entity(const unsigned char *p);
#define scan_scheme(c, n) _scan_at(&_scan_scheme, c, n)
#define scan_autolink_uri(c, n) _scan_at(&_scan_autolink_uri, c, n)
diff --git a/src/scanners.re b/src/scanners.re
index 9411018..3722a99 100644
--- a/src/scanners.re
+++ b/src/scanners.re
@@ -2,9 +2,9 @@
#include "chunk.h"
#include "scanners.h"
-int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset)
+bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, bufsize_t offset)
{
- int res;
+ bufsize_t res;
unsigned char *ptr = (unsigned char *)c->data;
unsigned char lim = ptr[c->len];
@@ -70,29 +70,29 @@ int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset)
*/
// Try to match a scheme including colon.
-int _scan_scheme(const unsigned char *p)
+bufsize_t _scan_scheme(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
- scheme [:] { return (p - start); }
+ scheme [:] { return (bufsize_t)(p - start); }
.? { return 0; }
*/
}
// Try to match URI autolink after first <, returning number of chars matched.
-int _scan_autolink_uri(const unsigned char *p)
+bufsize_t _scan_autolink_uri(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
- scheme [:][^\x00-\x20<>]*[>] { return (p - start); }
+ scheme [:][^\x00-\x20<>]*[>] { return (bufsize_t)(p - start); }
.? { return 0; }
*/
}
// Try to match email autolink after first <, returning num of chars matched.
-int _scan_autolink_email(const unsigned char *p)
+bufsize_t _scan_autolink_email(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
@@ -101,32 +101,32 @@ int _scan_autolink_email(const unsigned char *p)
[@]
[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?
([.][a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*
- [>] { return (p - start); }
+ [>] { return (bufsize_t)(p - start); }
.? { return 0; }
*/
}
// Try to match an HTML tag after first <, returning num of chars matched.
-int _scan_html_tag(const unsigned char *p)
+bufsize_t _scan_html_tag(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
- htmltag { return (p - start); }
+ htmltag { return (bufsize_t)(p - start); }
.? { return 0; }
*/
}
// Try to match an HTML block tag including first <,
// returning num of chars matched.
-int _scan_html_block_tag(const unsigned char *p)
+bufsize_t _scan_html_block_tag(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
- [<] [/] blocktagname (spacechar | [>]) { return (p - start); }
- [<] blocktagname (spacechar | [/>]) { return (p - start); }
- [<] [!?] { return (p - start); }
+ [<] [/] blocktagname (spacechar | [>]) { return (bufsize_t)(p - start); }
+ [<] blocktagname (spacechar | [/>]) { return (bufsize_t)(p - start); }
+ [<] [!?] { return (bufsize_t)(p - start); }
.? { return 0; }
*/
}
@@ -135,13 +135,13 @@ int _scan_html_block_tag(const unsigned char *p)
// This may optionally be contained in <..>; otherwise
// whitespace and unbalanced right parentheses aren't allowed.
// Newlines aren't ever allowed.
-int _scan_link_url(const unsigned char *p)
+bufsize_t _scan_link_url(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
- [ \r\n]* [<] ([^<>\r\n\\\x00] | escaped_char | [\\])* [>] { return (p - start); }
- [ \r\n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (p - start); }
+ [ \r\n]* [<] ([^<>\r\n\\\x00] | escaped_char | [\\])* [>] { return (bufsize_t)(p - start); }
+ [ \r\n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (bufsize_t)(p - start); }
.? { return 0; }
*/
}
@@ -149,42 +149,42 @@ int _scan_link_url(const unsigned char *p)
// Try to match a link title (in single quotes, in double quotes, or
// in parentheses), returning number of chars matched. Allow one
// level of internal nesting (quotes within quotes).
-int _scan_link_title(const unsigned char *p)
+bufsize_t _scan_link_title(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
- ["] (escaped_char|[^"\x00])* ["] { return (p - start); }
- ['] (escaped_char|[^'\x00])* ['] { return (p - start); }
- [(] (escaped_char|[^)\x00])* [)] { return (p - start); }
+ ["] (escaped_char|[^"\x00])* ["] { return (bufsize_t)(p - start); }
+ ['] (escaped_char|[^'\x00])* ['] { return (bufsize_t)(p - start); }
+ [(] (escaped_char|[^)\x00])* [)] { return (bufsize_t)(p - start); }
.? { return 0; }
*/
}
// Match space characters, including newlines.
-int _scan_spacechars(const unsigned char *p)
+bufsize_t _scan_spacechars(const unsigned char *p)
{
const unsigned char *start = p; \
/*!re2c
- [ \t\v\f\r\n]* { return (p - start); }
+ [ \t\v\f\r\n]* { return (bufsize_t)(p - start); }
. { return 0; }
*/
}
// Match ATX header start.
-int _scan_atx_header_start(const unsigned char *p)
+bufsize_t _scan_atx_header_start(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
- [#]{1,6} ([ ]+|[\r\n]) { return (p - start); }
+ [#]{1,6} ([ ]+|[\r\n]) { return (bufsize_t)(p - start); }
.? { return 0; }
*/
}
// Match setext header line. Return 1 for level-1 header,
// 2 for level-2, 0 for no match.
-int _scan_setext_header_line(const unsigned char *p)
+bufsize_t _scan_setext_header_line(const unsigned char *p)
{
const unsigned char *marker = NULL;
/*!re2c
@@ -197,51 +197,51 @@ int _scan_setext_header_line(const unsigned char *p)
// Scan a horizontal rule line: "...three or more hyphens, asterisks,
// or underscores on a line by themselves. If you wish, you may use
// spaces between the hyphens or asterisks."
-int _scan_hrule(const unsigned char *p)
+bufsize_t _scan_hrule(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
- ([*][ ]*){3,} [ \t]* [\r\n] { return (p - start); }
- ([_][ ]*){3,} [ \t]* [\r\n] { return (p - start); }
- ([-][ ]*){3,} [ \t]* [\r\n] { return (p - start); }
+ ([*][ ]*){3,} [ \t]* [\r\n] { return (bufsize_t)(p - start); }
+ ([_][ ]*){3,} [ \t]* [\r\n] { return (bufsize_t)(p - start); }
+ ([-][ ]*){3,} [ \t]* [\r\n] { return (bufsize_t)(p - start); }
.? { return 0; }
*/
}
// Scan an opening code fence.
-int _scan_open_code_fence(const unsigned char *p)
+bufsize_t _scan_open_code_fence(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
- [`]{3,} / [^`\r\n\x00]*[\r\n] { return (p - start); }
- [~]{3,} / [^~\r\n\x00]*[\r\n] { return (p - start); }
+ [`]{3,} / [^`\r\n\x00]*[\r\n] { return (bufsize_t)(p - start); }
+ [~]{3,} / [^~\r\n\x00]*[\r\n] { return (bufsize_t)(p - start); }
.? { return 0; }
*/
}
// Scan a closing code fence with length at least len.
-int _scan_close_code_fence(const unsigned char *p)
+bufsize_t _scan_close_code_fence(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
- [`]{3,} / [ \t]*[\r\n] { return (p - start); }
- [~]{3,} / [ \t]*[\r\n] { return (p - start); }
+ [`]{3,} / [ \t]*[\r\n] { return (bufsize_t)(p - start); }
+ [~]{3,} / [ \t]*[\r\n] { return (bufsize_t)(p - start); }
.? { return 0; }
*/
}
// Scans an entity.
// Returns number of chars matched.
-int _scan_entity(const unsigned char *p)
+bufsize_t _scan_entity(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
[&] ([#] ([Xx][A-Fa-f0-9]{1,8}|[0-9]{1,8}) |[A-Za-z][A-Za-z0-9]{1,31} ) [;]
- { return (p - start); }
+ { return (bufsize_t)(p - start); }
.? { return 0; }
*/
}
diff --git a/src/utf8.c b/src/utf8.c
index b83c2a5..ba1d873 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -30,7 +30,7 @@ static void encode_unknown(cmark_strbuf *buf)
cmark_strbuf_put(buf, repl, 3);
}
-static int utf8proc_charlen(const uint8_t *str, int str_len)
+static int utf8proc_charlen(const uint8_t *str, bufsize_t str_len)
{
int length, i;
@@ -42,7 +42,7 @@ static int utf8proc_charlen(const uint8_t *str, int str_len)
if (!length)
return -1;
- if (str_len >= 0 && length > str_len)
+ if (str_len >= 0 && (bufsize_t)length > str_len)
return -str_len;
for (i = 1; i < length; i++) {
@@ -54,7 +54,7 @@ static int utf8proc_charlen(const uint8_t *str, int str_len)
}
// Validate a single UTF-8 character according to RFC 3629.
-static int utf8proc_valid(const uint8_t *str, int str_len)
+static int utf8proc_valid(const uint8_t *str, bufsize_t str_len)
{
int length = utf8proc_charlen(str, str_len);
@@ -109,14 +109,14 @@ static int utf8proc_valid(const uint8_t *str, int str_len)
return length;
}
-void utf8proc_detab(cmark_strbuf *ob, const uint8_t *line, size_t size)
+void utf8proc_detab(cmark_strbuf *ob, const uint8_t *line, bufsize_t size)
{
static const uint8_t whitespace[] = " ";
- size_t i = 0, tab = 0;
+ bufsize_t i = 0, tab = 0;
while (i < size) {
- size_t org = i;
+ bufsize_t org = i;
while (i < size && line[i] != '\t' && line[i] != '\0'
&& line[i] < 0x80) {
@@ -151,7 +151,7 @@ void utf8proc_detab(cmark_strbuf *ob, const uint8_t *line, size_t size)
}
}
-int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst)
+int utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst)
{
int length;
int32_t uc = -1;
@@ -191,7 +191,7 @@ int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst)
void utf8proc_encode_char(int32_t uc, cmark_strbuf *buf)
{
uint8_t dst[4];
- int len = 0;
+ bufsize_t len = 0;
assert(uc >= 0);
@@ -227,7 +227,7 @@ void utf8proc_encode_char(int32_t uc, cmark_strbuf *buf)
cmark_strbuf_put(buf, dst, len);
}
-void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, int len)
+void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, bufsize_t len)
{
int32_t c;
@@ -235,7 +235,7 @@ void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, int len)
utf8proc_encode_char(x, dest)
while (len > 0) {
- int char_len = utf8proc_iterate(str, len, &c);
+ bufsize_t char_len = utf8proc_iterate(str, len, &c);
if (char_len >= 0) {
#include "case_fold_switch.inc"
diff --git a/src/utf8.h b/src/utf8.h
index 7df1573..ed1d7ee 100644
--- a/src/utf8.h
+++ b/src/utf8.h
@@ -8,10 +8,10 @@
extern "C" {
#endif
-void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, int len);
+void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, bufsize_t len);
void utf8proc_encode_char(int32_t uc, cmark_strbuf *buf);
-int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst);
-void utf8proc_detab(cmark_strbuf *dest, const uint8_t *line, size_t size);
+int utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst);
+void utf8proc_detab(cmark_strbuf *dest, const uint8_t *line, bufsize_t size);
int utf8proc_is_space(int32_t uc);
int utf8proc_is_punctuation(int32_t uc);
diff --git a/src/xml.c b/src/xml.c
index 14f6d67..7eec5a6 100644
--- a/src/xml.c
+++ b/src/xml.c
@@ -11,14 +11,9 @@
// Functions to convert cmark_nodes to XML strings.
-static void escape_xml(cmark_strbuf *dest, const unsigned char *source, int length)
+static void escape_xml(cmark_strbuf *dest, const unsigned char *source, bufsize_t length)
{
- if (source != NULL) {
- if (length < 0)
- length = strlen((char *)source);
-
- houdini_escape_html0(dest, source, (size_t)length, 0);
- }
+ houdini_escape_html0(dest, source, length, 0);
}
struct render_state {