From 7382fd5eba48107a8190bd2d6232cc3b6e20d8fc Mon Sep 17 00:00:00 2001
From: Nick Wellnhofer <wellnhofer@aevum.de>
Date: Sun, 7 Jun 2015 16:52:44 +0200
Subject: Convert code base to strbuf_t

There are probably a couple of places I missed. But this will only
be a problem if we use a 64-bit bufsize_t at some point. Then, we'll
get warnings from -Wshorten-64-to-32.
---
 src/blocks.c         | 22 +++++++-------
 src/chunk.h          | 10 +++----
 src/commonmark.c     |  4 +--
 src/houdini.h        | 12 ++++----
 src/houdini_href_e.c |  4 +--
 src/houdini_html_e.c |  6 ++--
 src/houdini_html_u.c | 14 ++++-----
 src/html.c           | 10 +++----
 src/inlines.c        | 62 +++++++++++++++++++-------------------
 src/inlines.h        |  2 +-
 src/parser.h         |  6 ++--
 src/scanners.c       | 84 ++++++++++++++++++++++++++--------------------------
 src/scanners.h       | 30 +++++++++----------
 src/scanners.re      | 76 +++++++++++++++++++++++------------------------
 src/utf8.c           | 20 ++++++-------
 src/utf8.h           |  6 ++--
 src/xml.c            |  4 +--
 17 files changed, 186 insertions(+), 186 deletions(-)

diff --git a/src/blocks.c b/src/blocks.c
index b72c256..72b1ca5 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -30,7 +30,7 @@ S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len,
 
 static void
 S_process_line(cmark_parser *parser, const unsigned char *buffer,
-               size_t bytes);
+               bufsize_t bytes);
 
 static cmark_node* make_block(cmark_node_type tag, int start_line, int start_column)
 {
@@ -95,7 +95,7 @@ static cmark_node*
 finalize(cmark_parser *parser, cmark_node* b);
 
 // Returns true if line has only space characters, else false.
-static bool is_blank(cmark_strbuf *s, int offset)
+static bool is_blank(cmark_strbuf *s, bufsize_t offset)
 {
 	while (offset < s->size) {
 		switch (s->ptr[offset]) {
@@ -128,7 +128,7 @@ static inline bool accepts_lines(cmark_node_type block_type)
 	        block_type == NODE_CODE_BLOCK);
 }
 
-static void add_line(cmark_node* node, cmark_chunk *ch, int offset)
+static void add_line(cmark_node* node, cmark_chunk *ch, bufsize_t offset)
 {
 	assert(node->open);
 	cmark_strbuf_put(&node->string_content, ch->data + offset, ch->len - offset);
@@ -136,7 +136,7 @@ static void add_line(cmark_node* node, cmark_chunk *ch, int offset)
 
 static void remove_trailing_blank_lines(cmark_strbuf *ln)
 {
-	int i;
+	bufsize_t i;
 	unsigned char c;
 
 	for (i = ln->size - 1; i >= 0; --i) {
@@ -204,7 +204,7 @@ static int break_out_of_lists(cmark_parser *parser, cmark_node ** bptr)
 static cmark_node*
 finalize(cmark_parser *parser, cmark_node* b)
 {
-	int pos;
+	bufsize_t pos;
 	cmark_node* item;
 	cmark_node* subitem;
 	cmark_node* parent;
@@ -367,10 +367,10 @@ static void process_inlines(cmark_node* root, cmark_reference_map *refmap, int o
 // Attempts to parse a list item marker (bullet or enumerated).
 // On success, returns length of the marker, and populates
 // data with the details.  On failure, returns 0.
-static int parse_list_marker(cmark_chunk *input, int pos, cmark_list **dataptr)
+static bufsize_t parse_list_marker(cmark_chunk *input, bufsize_t pos, cmark_list **dataptr)
 {
 	unsigned char c;
-	int startpos;
+	bufsize_t startpos;
 	cmark_list *data;
 
 	startpos = pos;
@@ -533,7 +533,7 @@ S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len,
 
 static void chop_trailing_hashtags(cmark_chunk *ch)
 {
-	int n, orig_n;
+	bufsize_t n, orig_n;
 
 	cmark_chunk_rtrim(ch);
 	orig_n = n = ch->len - 1;
@@ -562,10 +562,10 @@ S_find_first_nonspace(cmark_parser *parser, cmark_chunk *input)
 }
 
 static void
-S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
+S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t bytes)
 {
 	cmark_node* last_matched_container;
-	int matched = 0;
+	bufsize_t matched = 0;
 	int lev = 0;
 	int i;
 	cmark_list *data = NULL;
@@ -712,7 +712,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
 			parser->offset = parser->first_nonspace + matched;
 			container = add_child(parser, container, NODE_HEADER, parser->offset + 1);
 
-			int hashpos = cmark_chunk_strchr(&input, '#', parser->first_nonspace);
+			bufsize_t hashpos = cmark_chunk_strchr(&input, '#', parser->first_nonspace);
 			int level = 0;
 
 			while (peek_at(&input, hashpos) == '#') {
diff --git a/src/chunk.h b/src/chunk.h
index a246a9d..364918d 100644
--- a/src/chunk.h
+++ b/src/chunk.h
@@ -11,8 +11,8 @@
 
 typedef struct {
 	unsigned char *data;
-	int len;
-	int alloc;  // also implies a NULL-terminated string
+	bufsize_t len;
+	bufsize_t alloc;  // also implies a NULL-terminated string
 } cmark_chunk;
 
 static inline void cmark_chunk_free(cmark_chunk *c)
@@ -51,10 +51,10 @@ static inline void cmark_chunk_trim(cmark_chunk *c)
 	cmark_chunk_rtrim(c);
 }
 
-static inline int cmark_chunk_strchr(cmark_chunk *ch, int c, int offset)
+static inline bufsize_t cmark_chunk_strchr(cmark_chunk *ch, int c, bufsize_t offset)
 {
 	const unsigned char *p = (unsigned char *)memchr(ch->data + offset, c, ch->len - offset);
-	return p ? (int)(p - ch->data) : ch->len;
+	return p ? (bufsize_t)(p - ch->data) : ch->len;
 }
 
 static inline const char *cmark_chunk_to_cstr(cmark_chunk *c)
@@ -100,7 +100,7 @@ static inline cmark_chunk cmark_chunk_literal(const char *data)
 	return c;
 }
 
-static inline cmark_chunk cmark_chunk_dup(const cmark_chunk *ch, int pos, int len)
+static inline cmark_chunk cmark_chunk_dup(const cmark_chunk *ch, bufsize_t pos, bufsize_t len)
 {
 	cmark_chunk c = {ch->data + pos, len, 0};
 	return c;
diff --git a/src/commonmark.c b/src/commonmark.c
index dba1fcf..2022fd5 100644
--- a/src/commonmark.c
+++ b/src/commonmark.c
@@ -20,7 +20,7 @@ struct render_state {
 	int column;
 	int width;
 	int need_cr;
-	int last_breakable;
+	bufsize_t last_breakable;
 	bool begin_line;
 	bool no_wrap;
 	bool in_tight_list_item;
@@ -293,7 +293,7 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
 	const char *title;
 	cmark_strbuf listmarker = GH_BUF_INIT;
 	char *emph_delim;
-	int marker_width;
+	bufsize_t marker_width;
 
 	// Don't adjust tight list status til we've started the list.
 	// Otherwise we loose the blank line between a paragraph and
diff --git a/src/houdini.h b/src/houdini.h
index 9f00f6d..2e7a354 100644
--- a/src/houdini.h
+++ b/src/houdini.h
@@ -31,15 +31,15 @@ extern "C" {
 #define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10)
 #define HOUDINI_UNESCAPED_SIZE(x) (x)
 
-extern size_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, size_t size, int secure);
-extern int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, size_t size);
-extern void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, size_t size);
+extern bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, bufsize_t size);
+extern int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size);
+extern int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size, int secure);
+extern int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size);
+extern void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, bufsize_t size);
 extern int houdini_escape_xml(cmark_strbuf *ob, const uint8_t *src, size_t size);
 extern int houdini_escape_uri(cmark_strbuf *ob, const uint8_t *src, size_t size);
 extern int houdini_escape_url(cmark_strbuf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size);
 extern int houdini_unescape_uri(cmark_strbuf *ob, const uint8_t *src, size_t size);
 extern int houdini_unescape_url(cmark_strbuf *ob, const uint8_t *src, size_t size);
 extern int houdini_escape_js(cmark_strbuf *ob, const uint8_t *src, size_t size);
diff --git a/src/houdini_href_e.c b/src/houdini_href_e.c
index 7527780..7fb958a 100644
--- a/src/houdini_href_e.c
+++ b/src/houdini_href_e.c
@@ -49,10 +49,10 @@ static const char HREF_SAFE[] = {
 };
 
 int
-houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, size_t size)
+houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size)
 {
 	static const uint8_t hex_chars[] = "0123456789ABCDEF";
-	size_t  i = 0, org;
+	bufsize_t i = 0, org;
 	uint8_t hex_str[3];
 
 	hex_str[0] = '%';
diff --git a/src/houdini_html_e.c b/src/houdini_html_e.c
index 1a4c3e1..7f4b91f 100644
--- a/src/houdini_html_e.c
+++ b/src/houdini_html_e.c
@@ -45,9 +45,9 @@ static const char *HTML_ESCAPES[] = {
 };
 
 int
-houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, size_t size, int secure)
+houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size, int secure)
 {
-	size_t  i = 0, org, esc = 0;
+	bufsize_t i = 0, org, esc = 0;
 
 	while (i < size) {
 		org = i;
@@ -75,7 +75,7 @@ houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, size_t size, int secu
 }
 
 int
-houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, size_t size)
+houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size)
 {
 	return houdini_escape_html0(ob, src, size, 1);
 }
diff --git a/src/houdini_html_u.c b/src/houdini_html_u.c
index eaf295e..e57894d 100644
--- a/src/houdini_html_u.c
+++ b/src/houdini_html_u.c
@@ -7,10 +7,10 @@
 #include "utf8.h"
 #include "html_unescape.h"
 
-size_t
-houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, size_t size)
+bufsize_t
+houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, bufsize_t size)
 {
-	size_t i = 0;
+	bufsize_t i = 0;
 
 	if (size >= 3 && src[0] == '#') {
 		int codepoint  = 0;
@@ -68,7 +68,7 @@ houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, size_t size)
 				const struct html_ent *entity = find_entity((char *)src, i);
 
 				if (entity != NULL) {
-					int len = 0;
+					bufsize_t len = 0;
 					while (len < 4 && entity->utf8[len] != '\0') {
 						++len;
 					}
@@ -85,9 +85,9 @@ houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, size_t size)
 }
 
 int
-houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, size_t size)
+houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size)
 {
-	size_t  i = 0, org, ent;
+	bufsize_t i = 0, org, ent;
 
 	while (i < size) {
 		org = i;
@@ -122,7 +122,7 @@ houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, size_t size)
 	return 1;
 }
 
-void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, size_t size)
+void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, bufsize_t size)
 {
 	if (!houdini_unescape_html(ob, src, size))
 		cmark_strbuf_put(ob, src, size);
diff --git a/src/html.c b/src/html.c
index d3f9fc7..317eb45 100644
--- a/src/html.c
+++ b/src/html.c
@@ -11,20 +11,20 @@
 
 // Functions to convert cmark_nodes to HTML strings.
 
-static void escape_html(cmark_strbuf *dest, const unsigned char *source, int length)
+static void escape_html(cmark_strbuf *dest, const unsigned char *source, bufsize_t length)
 {
 	if (length < 0)
 		length = strlen((char *)source);
 
-	houdini_escape_html0(dest, source, (size_t)length, 0);
+	houdini_escape_html0(dest, source, length, 0);
 }
 
-static void escape_href(cmark_strbuf *dest, const unsigned char *source, int length)
+static void escape_href(cmark_strbuf *dest, const unsigned char *source, bufsize_t length)
 {
 	if (length < 0)
 		length = strlen((char *)source);
 
-	houdini_escape_href(dest, source, (size_t)length);
+	houdini_escape_href(dest, source, length);
 }
 
 static inline void cr(cmark_strbuf *html)
@@ -165,7 +165,7 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
 			S_render_sourcepos(node, html, options);
 			cmark_strbuf_puts(html, "><code>");
 		} else {
-			int first_tag = 0;
+			bufsize_t first_tag = 0;
 			while (first_tag < node->as.code.info.len &&
 			       node->as.code.info.data[first_tag] != ' ') {
 				first_tag += 1;
diff --git a/src/inlines.c b/src/inlines.c
index 8a1ee44..7e8f806 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -36,7 +36,7 @@ typedef struct delimiter {
 	struct delimiter *previous;
 	struct delimiter *next;
 	cmark_node *inl_text;
-	int position;
+	bufsize_t position;
 	unsigned char delim_char;
 	bool can_open;
 	bool can_close;
@@ -45,7 +45,7 @@ typedef struct delimiter {
 
 typedef struct {
 	cmark_chunk input;
-	int pos;
+	bufsize_t pos;
 	cmark_reference_map *refmap;
 	delimiter *last_delim;
 } subject;
@@ -57,7 +57,7 @@ static int parse_inline(subject* subj, cmark_node * parent, int options);
 
 static void subject_from_buf(subject *e, cmark_strbuf *buffer,
                              cmark_reference_map *refmap);
-static int subject_find_special_char(subject *subj, int options);
+static bufsize_t subject_find_special_char(subject *subj, int options);
 
 static cmark_chunk cmark_clean_autolink(cmark_chunk *url, int is_email)
 {
@@ -143,7 +143,7 @@ static inline cmark_node* make_simple(cmark_node_type t)
 static cmark_chunk chunk_clone(cmark_chunk *src)
 {
 	cmark_chunk c;
-	int len = src->len;
+	bufsize_t len = src->len;
 
 	c.len   = len;
 	c.data  = (unsigned char *)malloc(len + 1);
@@ -177,7 +177,7 @@ static inline unsigned char peek_char(subject *subj)
 	return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : 0;
 }
 
-static inline unsigned char peek_at(subject *subj, int pos)
+static inline unsigned char peek_at(subject *subj, bufsize_t pos)
 {
 	return subj->input.data[pos];
 }
@@ -195,8 +195,8 @@ static inline int is_eof(subject* subj)
 static inline cmark_chunk take_while(subject* subj, int (*f)(int))
 {
 	unsigned char c;
-	int startpos = subj->pos;
-	int len = 0;
+	bufsize_t startpos = subj->pos;
+	bufsize_t len = 0;
 
 	while ((c = peek_char(subj)) && (*f)(c)) {
 		advance(subj);
@@ -211,7 +211,7 @@ static inline cmark_chunk take_while(subject* subj, int (*f)(int))
 // parsed).  Return 0 if you don't find matching closing
 // backticks, otherwise return the position in the subject
 // after the closing backticks.
-static int scan_to_closing_backticks(subject* subj, int openticklength)
+static bufsize_t scan_to_closing_backticks(subject* subj, bufsize_t openticklength)
 {
 	// read non backticks
 	unsigned char c;
@@ -221,7 +221,7 @@ static int scan_to_closing_backticks(subject* subj, int openticklength)
 	if (is_eof(subj)) {
 		return 0;  // did not find closing ticks, return 0
 	}
-	int numticks = 0;
+	bufsize_t numticks = 0;
 	while (peek_char(subj) == '`') {
 		advance(subj);
 		numticks++;
@@ -237,8 +237,8 @@ static int scan_to_closing_backticks(subject* subj, int openticklength)
 static cmark_node* handle_backticks(subject *subj)
 {
 	cmark_chunk openticks = take_while(subj, isbacktick);
-	int startpos = subj->pos;
-	int endpos = scan_to_closing_backticks(subj, openticks.len);
+	bufsize_t startpos = subj->pos;
+	bufsize_t endpos = scan_to_closing_backticks(subj, openticks.len);
 
 	if (endpos == 0) { // not found
 		subj->pos = startpos; // rewind
@@ -260,7 +260,7 @@ static int
 scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close)
 {
 	int numdelims = 0;
-	int before_char_pos;
+	bufsize_t before_char_pos;
 	int32_t after_char = 0;
 	int32_t before_char = 0;
 	int len;
@@ -376,7 +376,7 @@ static void push_delimiter(subject *subj, unsigned char c, bool can_open,
 // Assumes the subject has a c at the current position.
 static cmark_node* handle_delim(subject* subj, unsigned char c, bool smart)
 {
-	int numdelims;
+	bufsize_t numdelims;
 	cmark_node * inl_text;
 	bool can_open, can_close;
 	cmark_chunk contents;
@@ -500,11 +500,11 @@ static delimiter*
 S_insert_emph(subject *subj, delimiter *opener, delimiter *closer)
 {
 	delimiter *delim, *tmp_delim;
-	int use_delims;
+	bufsize_t use_delims;
 	cmark_node *opener_inl = opener->inl_text;
 	cmark_node *closer_inl = closer->inl_text;
-	int opener_num_chars = opener_inl->as.literal.len;
-	int closer_num_chars = closer_inl->as.literal.len;
+	bufsize_t opener_num_chars = opener_inl->as.literal.len;
+	bufsize_t closer_num_chars = closer_inl->as.literal.len;
 	cmark_node *tmp, *emph, *first_child, *last_child;
 
 	// calculate the actual number of characters used from this closer
@@ -596,7 +596,7 @@ static cmark_node* handle_backslash(subject *subj)
 static cmark_node* handle_entity(subject* subj)
 {
 	cmark_strbuf ent = GH_BUF_INIT;
-	size_t len;
+	bufsize_t len;
 
 	advance(subj);
 
@@ -618,7 +618,7 @@ static cmark_node *make_str_with_entities(cmark_chunk *content)
 {
 	cmark_strbuf unescaped = GH_BUF_INIT;
 
-	if (houdini_unescape_html(&unescaped, content->data, (size_t)content->len)) {
+	if (houdini_unescape_html(&unescaped, content->data, content->len)) {
 		return make_str(cmark_chunk_buf_detach(&unescaped));
 	} else {
 		return make_str(*content);
@@ -678,7 +678,7 @@ cmark_chunk cmark_clean_title(cmark_chunk *title)
 // Assumes the subject has a '<' character at the current position.
 static cmark_node* handle_pointy_brace(subject* subj)
 {
-	int matchlen = 0;
+	bufsize_t matchlen = 0;
 	cmark_chunk contents;
 
 	advance(subj);  // advance past first <
@@ -725,7 +725,7 @@ static cmark_node* handle_pointy_brace(subject* subj)
 // encountered.  Backticks in labels do not start code spans.
 static int link_label(subject* subj, cmark_chunk *raw_label)
 {
-	int startpos = subj->pos;
+	bufsize_t startpos = subj->pos;
 	int length = 0;
 	unsigned char c;
 
@@ -769,10 +769,10 @@ noMatch:
 // Return a link, an image, or a literal close bracket.
 static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent)
 {
-	int initial_pos;
-	int starturl, endurl, starttitle, endtitle, endall;
-	int n;
-	int sps;
+	bufsize_t initial_pos;
+	bufsize_t starturl, endurl, starttitle, endtitle, endall;
+	bufsize_t n;
+	bufsize_t sps;
 	cmark_reference *ref;
 	bool is_image = false;
 	cmark_chunk url_chunk, title_chunk;
@@ -922,7 +922,7 @@ match:
 // Assumes the subject has a newline at the current position.
 static cmark_node* handle_newline(subject *subj)
 {
-	int nlpos = subj->pos;
+	bufsize_t nlpos = subj->pos;
 	// skip over newline
 	advance(subj);
 	// skip spaces at beginning of line
@@ -938,7 +938,7 @@ static cmark_node* handle_newline(subject *subj)
 	}
 }
 
-static int subject_find_special_char(subject *subj, int options)
+static bufsize_t subject_find_special_char(subject *subj, int options)
 {
 	// "\r\n\\`&_*[]<!"
 	static const int8_t SPECIAL_CHARS[256] = {
@@ -980,7 +980,7 @@ static int subject_find_special_char(subject *subj, int options)
 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 	};
 
-	int n = subj->pos + 1;
+	bufsize_t n = subj->pos + 1;
 
 	while (n < subj->input.len) {
 		if (SPECIAL_CHARS[subj->input.data[n]])
@@ -1001,7 +1001,7 @@ static int parse_inline(subject* subj, cmark_node * parent, int options)
 	cmark_node* new_inl = NULL;
 	cmark_chunk contents;
 	unsigned char c;
-	int endpos;
+	bufsize_t endpos;
 	c = peek_char(subj);
 	if (c == 0) {
 		return 0;
@@ -1098,7 +1098,7 @@ static void spnl(subject* subj)
 // Modify refmap if a reference is encountered.
 // Return 0 if no reference found, otherwise position of subject
 // after reference is parsed.
-int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap)
+bufsize_t cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap)
 {
 	subject subj;
 
@@ -1106,8 +1106,8 @@ int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refma
 	cmark_chunk url;
 	cmark_chunk title;
 
-	int matchlen = 0;
-	int beforetitle;
+	bufsize_t matchlen = 0;
+	bufsize_t beforetitle;
 
 	subject_from_buf(&subj, input, NULL);
 
diff --git a/src/inlines.h b/src/inlines.h
index 534588e..f8847fc 100644
--- a/src/inlines.h
+++ b/src/inlines.h
@@ -10,7 +10,7 @@ cmark_chunk cmark_clean_title(cmark_chunk *title);
 
 void cmark_parse_inlines(cmark_node* parent, cmark_reference_map *refmap, int options);
 
-int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap);
+bufsize_t cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap);
 
 #ifdef __cplusplus
 }
diff --git a/src/parser.h b/src/parser.h
index ccdf84b..6e18c67 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -16,12 +16,12 @@ struct cmark_parser {
 	struct cmark_node* root;
 	struct cmark_node* current;
 	int line_number;
-	int offset;
-	int first_nonspace;
+	bufsize_t offset;
+	bufsize_t first_nonspace;
 	int indent;
 	bool blank;
 	cmark_strbuf *curline;
-	int last_line_length;
+	bufsize_t last_line_length;
 	cmark_strbuf *linebuf;
 	int options;
 };
diff --git a/src/scanners.c b/src/scanners.c
index 7f9ed2e..3f4ddac 100644
--- a/src/scanners.c
+++ b/src/scanners.c
@@ -1,11 +1,11 @@
-/* Generated by re2c 0.13.6 */
+/* Generated by re2c 0.13.5 */
 #include <stdlib.h>
 #include "chunk.h"
 #include "scanners.h"
 
-int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset)
+bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, bufsize_t offset)
 {
-	int res;
+	bufsize_t res;
 	unsigned char *ptr = (unsigned char *)c->data;
 	unsigned char lim = ptr[c->len];
 
@@ -19,7 +19,7 @@ int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset)
 
 
 // Try to match a scheme including colon.
-int _scan_scheme(const unsigned char *p)
+bufsize_t _scan_scheme(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -578,7 +578,7 @@ yy34:
 	if (yych != ':') goto yy31;
 yy35:
 	++p;
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy37:
 	yych = *++p;
 	if (yych == 'E') goto yy38;
@@ -2919,7 +2919,7 @@ yy484:
 }
 
 // Try to match URI autolink after first <, returning number of chars matched.
-int _scan_autolink_uri(const unsigned char *p)
+bufsize_t _scan_autolink_uri(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -3517,7 +3517,7 @@ yy520:
 	}
 	if (yych <= '=') goto yy516;
 	++p;
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy524:
 	yych = *++p;
 	if (yych == 'E') goto yy525;
@@ -5858,7 +5858,7 @@ yy971:
 }
 
 // Try to match email autolink after first <, returning num of chars matched.
-int _scan_autolink_email(const unsigned char *p)
+bufsize_t _scan_autolink_email(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -6060,7 +6060,7 @@ yy984:
 	}
 yy985:
 	++p;
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy987:
 	++p;
 	yych = *p;
@@ -10803,7 +10803,7 @@ yy1230:
 }
 
 // Try to match an HTML tag after first <, returning num of chars matched.
-int _scan_html_tag(const unsigned char *p)
+bufsize_t _scan_html_tag(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -10964,7 +10964,7 @@ yy1242:
 	if (yych != '>') goto yy1239;
 yy1243:
 	++p;
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy1245:
 	yych = *++p;
 	if (yych == 'C') goto yy1260;
@@ -11455,7 +11455,7 @@ yy1297:
 
 // Try to match an HTML block tag including first <,
 // returning num of chars matched.
-int _scan_html_block_tag(const unsigned char *p)
+bufsize_t _scan_html_block_tag(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -11513,7 +11513,7 @@ yy1303:
 	goto yy1301;
 yy1304:
 	++p;
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy1306:
 	yych = *++p;
 	if (yych <= '/') {
@@ -12022,7 +12022,7 @@ yy1343:
 	}
 yy1344:
 	++p;
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy1346:
 	yych = *++p;
 	if (yych <= 'R') {
@@ -12639,7 +12639,7 @@ yy1466:
 	}
 yy1467:
 	++p;
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy1469:
 	yych = *++p;
 	if (yych <= 'R') {
@@ -13243,7 +13243,7 @@ yy1585:
 // This may optionally be contained in <..>; otherwise
 // whitespace and unbalanced right parentheses aren't allowed.
 // Newlines aren't ever allowed.
-int _scan_link_url(const unsigned char *p)
+bufsize_t _scan_link_url(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -13308,7 +13308,7 @@ int _scan_link_url(const unsigned char *p)
 		}
 	}
 yy1588:
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy1589:
 	yyaccept = 0;
 	marker = ++p;
@@ -13402,7 +13402,7 @@ yy1599:
 yy1600:
 	p = marker;
 	if (yyaccept <= 1) {
-		if (yyaccept == 0) {
+		if (yyaccept <= 0) {
 			goto yy1588;
 		} else {
 			goto yy1595;
@@ -13490,7 +13490,7 @@ yy1607:
 	if (yych <= ' ') goto yy1608;
 	if (yych != ')') goto yy1603;
 yy1608:
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy1609:
 	++p;
 	yych = *p;
@@ -13732,7 +13732,7 @@ yy1623:
 // Try to match a link title (in single quotes, in double quotes, or
 // in parentheses), returning number of chars matched.  Allow one
 // level of internal nesting (quotes within quotes).
-int _scan_link_title(const unsigned char *p)
+bufsize_t _scan_link_title(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -13818,13 +13818,13 @@ yy1632:
 yy1633:
 	p = marker;
 	if (yyaccept <= 1) {
-		if (yyaccept == 0) {
+		if (yyaccept <= 0) {
 			goto yy1626;
 		} else {
 			goto yy1637;
 		}
 	} else {
-		if (yyaccept == 2) {
+		if (yyaccept <= 2) {
 			goto yy1644;
 		} else {
 			goto yy1651;
@@ -13842,7 +13842,7 @@ yy1634:
 yy1636:
 	++p;
 yy1637:
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy1638:
 	yyaccept = 1;
 	marker = ++p;
@@ -13874,7 +13874,7 @@ yy1641:
 yy1643:
 	++p;
 yy1644:
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy1645:
 	yyaccept = 2;
 	marker = ++p;
@@ -13906,7 +13906,7 @@ yy1648:
 yy1650:
 	++p;
 yy1651:
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy1652:
 	yyaccept = 3;
 	marker = ++p;
@@ -13922,7 +13922,7 @@ yy1652:
 }
 
 // Match space characters, including newlines.
-int _scan_spacechars(const unsigned char *p)
+bufsize_t _scan_spacechars(const unsigned char *p)
 {
   const unsigned char *start = p; \
 
@@ -13973,7 +13973,7 @@ int _scan_spacechars(const unsigned char *p)
 		goto yy1659;
 	}
 yy1655:
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy1656:
 	yych = *++p;
 	goto yy1658;
@@ -13993,7 +13993,7 @@ yy1659:
 }
 
 // Match ATX header start.
-int _scan_atx_header_start(const unsigned char *p)
+bufsize_t _scan_atx_header_start(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -14059,7 +14059,7 @@ yy1665:
 yy1666:
 	++p;
 yy1667:
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy1668:
 	++p;
 	yych = *p;
@@ -14128,7 +14128,7 @@ yy1672:
 
 // Match setext header line.  Return 1 for level-1 header,
 // 2 for level-2, 0 for no match.
-int _scan_setext_header_line(const unsigned char *p)
+bufsize_t _scan_setext_header_line(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
 
@@ -14269,7 +14269,7 @@ yy1693:
 // Scan a horizontal rule line: "...three or more hyphens, asterisks,
 // or underscores on a line by themselves. If you wish, you may use
 // spaces between the hyphens or asterisks."
-int _scan_hrule(const unsigned char *p)
+bufsize_t _scan_hrule(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -14384,7 +14384,7 @@ yy1709:
 	if (yych != '\r') goto yy1704;
 yy1711:
 	++p;
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy1713:
 	++p;
 	yych = *p;
@@ -14422,7 +14422,7 @@ yy1719:
 	}
 yy1721:
 	++p;
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy1723:
 	++p;
 	yych = *p;
@@ -14460,13 +14460,13 @@ yy1729:
 	}
 yy1731:
 	++p;
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 }
 
 }
 
 // Scan an opening code fence.
-int _scan_open_code_fence(const unsigned char *p)
+bufsize_t _scan_open_code_fence(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -14557,7 +14557,7 @@ yy1743:
 yy1745:
 	++p;
 	p = marker;
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy1747:
 	yych = *++p;
 	if (yybm[0+yych] & 64) {
@@ -14585,13 +14585,13 @@ yy1750:
 yy1752:
 	++p;
 	p = marker;
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 }
 
 }
 
 // Scan a closing code fence with length at least len.
-int _scan_close_code_fence(const unsigned char *p)
+bufsize_t _scan_close_code_fence(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -14687,7 +14687,7 @@ yy1764:
 yy1766:
 	++p;
 	p = marker;
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy1768:
 	yych = *++p;
 	if (yybm[0+yych] & 128) {
@@ -14725,14 +14725,14 @@ yy1771:
 yy1773:
 	++p;
 	p = marker;
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 }
 
 }
 
 // Scans an entity.
 // Returns number of chars matched.
-int _scan_entity(const unsigned char *p)
+bufsize_t _scan_entity(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -14799,7 +14799,7 @@ yy1783:
 	}
 yy1784:
 	++p;
-	{ return (p - start); }
+	{ return (bufsize_t)(p - start); }
 yy1786:
 	yych = *++p;
 	if (yych <= ';') {
diff --git a/src/scanners.h b/src/scanners.h
index 1353f3b..bc5134e 100644
--- a/src/scanners.h
+++ b/src/scanners.h
@@ -5,21 +5,21 @@
 extern "C" {
 #endif
 
-int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset);
-int _scan_scheme(const unsigned char *p);
-int _scan_autolink_uri(const unsigned char *p);
-int _scan_autolink_email(const unsigned char *p);
-int _scan_html_tag(const unsigned char *p);
-int _scan_html_block_tag(const unsigned char *p);
-int _scan_link_url(const unsigned char *p);
-int _scan_link_title(const unsigned char *p);
-int _scan_spacechars(const unsigned char *p);
-int _scan_atx_header_start(const unsigned char *p);
-int _scan_setext_header_line(const unsigned char *p);
-int _scan_hrule(const unsigned char *p);
-int _scan_open_code_fence(const unsigned char *p);
-int _scan_close_code_fence(const unsigned char *p);
-int _scan_entity(const unsigned char *p);
+bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, bufsize_t offset);
+bufsize_t _scan_scheme(const unsigned char *p);
+bufsize_t _scan_autolink_uri(const unsigned char *p);
+bufsize_t _scan_autolink_email(const unsigned char *p);
+bufsize_t _scan_html_tag(const unsigned char *p);
+bufsize_t _scan_html_block_tag(const unsigned char *p);
+bufsize_t _scan_link_url(const unsigned char *p);
+bufsize_t _scan_link_title(const unsigned char *p);
+bufsize_t _scan_spacechars(const unsigned char *p);
+bufsize_t _scan_atx_header_start(const unsigned char *p);
+bufsize_t _scan_setext_header_line(const unsigned char *p);
+bufsize_t _scan_hrule(const unsigned char *p);
+bufsize_t _scan_open_code_fence(const unsigned char *p);
+bufsize_t _scan_close_code_fence(const unsigned char *p);
+bufsize_t _scan_entity(const unsigned char *p);
 
 #define scan_scheme(c, n) _scan_at(&_scan_scheme, c, n)
 #define scan_autolink_uri(c, n) _scan_at(&_scan_autolink_uri, c, n)
diff --git a/src/scanners.re b/src/scanners.re
index 9411018..3722a99 100644
--- a/src/scanners.re
+++ b/src/scanners.re
@@ -2,9 +2,9 @@
 #include "chunk.h"
 #include "scanners.h"
 
-int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset)
+bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, bufsize_t offset)
 {
-	int res;
+	bufsize_t res;
 	unsigned char *ptr = (unsigned char *)c->data;
 	unsigned char lim = ptr[c->len];
 
@@ -70,29 +70,29 @@ int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset)
 */
 
 // Try to match a scheme including colon.
-int _scan_scheme(const unsigned char *p)
+bufsize_t _scan_scheme(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
 /*!re2c
-  scheme [:] { return (p - start); }
+  scheme [:] { return (bufsize_t)(p - start); }
   .? { return 0; }
 */
 }
 
 // Try to match URI autolink after first <, returning number of chars matched.
-int _scan_autolink_uri(const unsigned char *p)
+bufsize_t _scan_autolink_uri(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
 /*!re2c
-  scheme [:][^\x00-\x20<>]*[>]  { return (p - start); }
+  scheme [:][^\x00-\x20<>]*[>]  { return (bufsize_t)(p - start); }
   .? { return 0; }
 */
 }
 
 // Try to match email autolink after first <, returning num of chars matched.
-int _scan_autolink_email(const unsigned char *p)
+bufsize_t _scan_autolink_email(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
@@ -101,32 +101,32 @@ int _scan_autolink_email(const unsigned char *p)
     [@]
     [a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?
     ([.][a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*
-    [>] { return (p - start); }
+    [>] { return (bufsize_t)(p - start); }
   .? { return 0; }
 */
 }
 
 // Try to match an HTML tag after first <, returning num of chars matched.
-int _scan_html_tag(const unsigned char *p)
+bufsize_t _scan_html_tag(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
 /*!re2c
-  htmltag { return (p - start); }
+  htmltag { return (bufsize_t)(p - start); }
   .? { return 0; }
 */
 }
 
 // Try to match an HTML block tag including first <,
 // returning num of chars matched.
-int _scan_html_block_tag(const unsigned char *p)
+bufsize_t _scan_html_block_tag(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
 /*!re2c
-  [<] [/] blocktagname (spacechar | [>])  { return (p - start); }
-  [<] blocktagname (spacechar | [/>]) { return (p - start); }
-  [<] [!?] { return (p - start); }
+  [<] [/] blocktagname (spacechar | [>])  { return (bufsize_t)(p - start); }
+  [<] blocktagname (spacechar | [/>]) { return (bufsize_t)(p - start); }
+  [<] [!?] { return (bufsize_t)(p - start); }
   .? { return 0; }
 */
 }
@@ -135,13 +135,13 @@ int _scan_html_block_tag(const unsigned char *p)
 // This may optionally be contained in <..>; otherwise
 // whitespace and unbalanced right parentheses aren't allowed.
 // Newlines aren't ever allowed.
-int _scan_link_url(const unsigned char *p)
+bufsize_t _scan_link_url(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
 /*!re2c
-  [ \r\n]* [<] ([^<>\r\n\\\x00] | escaped_char | [\\])* [>] { return (p - start); }
-  [ \r\n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (p - start); }
+  [ \r\n]* [<] ([^<>\r\n\\\x00] | escaped_char | [\\])* [>] { return (bufsize_t)(p - start); }
+  [ \r\n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (bufsize_t)(p - start); }
   .? { return 0; }
 */
 }
@@ -149,42 +149,42 @@ int _scan_link_url(const unsigned char *p)
 // Try to match a link title (in single quotes, in double quotes, or
 // in parentheses), returning number of chars matched.  Allow one
 // level of internal nesting (quotes within quotes).
-int _scan_link_title(const unsigned char *p)
+bufsize_t _scan_link_title(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
 /*!re2c
-  ["] (escaped_char|[^"\x00])* ["]   { return (p - start); }
-  ['] (escaped_char|[^'\x00])* ['] { return (p - start); }
-  [(] (escaped_char|[^)\x00])* [)]  { return (p - start); }
+  ["] (escaped_char|[^"\x00])* ["]   { return (bufsize_t)(p - start); }
+  ['] (escaped_char|[^'\x00])* ['] { return (bufsize_t)(p - start); }
+  [(] (escaped_char|[^)\x00])* [)]  { return (bufsize_t)(p - start); }
   .? { return 0; }
 */
 }
 
 // Match space characters, including newlines.
-int _scan_spacechars(const unsigned char *p)
+bufsize_t _scan_spacechars(const unsigned char *p)
 {
   const unsigned char *start = p; \
 /*!re2c
-  [ \t\v\f\r\n]* { return (p - start); }
+  [ \t\v\f\r\n]* { return (bufsize_t)(p - start); }
   . { return 0; }
 */
 }
 
 // Match ATX header start.
-int _scan_atx_header_start(const unsigned char *p)
+bufsize_t _scan_atx_header_start(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
 /*!re2c
-  [#]{1,6} ([ ]+|[\r\n])  { return (p - start); }
+  [#]{1,6} ([ ]+|[\r\n])  { return (bufsize_t)(p - start); }
   .? { return 0; }
 */
 }
 
 // Match setext header line.  Return 1 for level-1 header,
 // 2 for level-2, 0 for no match.
-int _scan_setext_header_line(const unsigned char *p)
+bufsize_t _scan_setext_header_line(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
 /*!re2c
@@ -197,51 +197,51 @@ int _scan_setext_header_line(const unsigned char *p)
 // Scan a horizontal rule line: "...three or more hyphens, asterisks,
 // or underscores on a line by themselves. If you wish, you may use
 // spaces between the hyphens or asterisks."
-int _scan_hrule(const unsigned char *p)
+bufsize_t _scan_hrule(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
 /*!re2c
-  ([*][ ]*){3,} [ \t]* [\r\n] { return (p - start); }
-  ([_][ ]*){3,} [ \t]* [\r\n] { return (p - start); }
-  ([-][ ]*){3,} [ \t]* [\r\n] { return (p - start); }
+  ([*][ ]*){3,} [ \t]* [\r\n] { return (bufsize_t)(p - start); }
+  ([_][ ]*){3,} [ \t]* [\r\n] { return (bufsize_t)(p - start); }
+  ([-][ ]*){3,} [ \t]* [\r\n] { return (bufsize_t)(p - start); }
   .? { return 0; }
 */
 }
 
 // Scan an opening code fence.
-int _scan_open_code_fence(const unsigned char *p)
+bufsize_t _scan_open_code_fence(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
 /*!re2c
-  [`]{3,} / [^`\r\n\x00]*[\r\n] { return (p - start); }
-  [~]{3,} / [^~\r\n\x00]*[\r\n] { return (p - start); }
+  [`]{3,} / [^`\r\n\x00]*[\r\n] { return (bufsize_t)(p - start); }
+  [~]{3,} / [^~\r\n\x00]*[\r\n] { return (bufsize_t)(p - start); }
   .?                        { return 0; }
 */
 }
 
 // Scan a closing code fence with length at least len.
-int _scan_close_code_fence(const unsigned char *p)
+bufsize_t _scan_close_code_fence(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
 /*!re2c
-  [`]{3,} / [ \t]*[\r\n] { return (p - start); }
-  [~]{3,} / [ \t]*[\r\n] { return (p - start); }
+  [`]{3,} / [ \t]*[\r\n] { return (bufsize_t)(p - start); }
+  [~]{3,} / [ \t]*[\r\n] { return (bufsize_t)(p - start); }
   .? { return 0; }
 */
 }
 
 // Scans an entity.
 // Returns number of chars matched.
-int _scan_entity(const unsigned char *p)
+bufsize_t _scan_entity(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
 /*!re2c
   [&] ([#] ([Xx][A-Fa-f0-9]{1,8}|[0-9]{1,8}) |[A-Za-z][A-Za-z0-9]{1,31} ) [;]
-     { return (p - start); }
+     { return (bufsize_t)(p - start); }
   .? { return 0; }
 */
 }
diff --git a/src/utf8.c b/src/utf8.c
index b83c2a5..ba1d873 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -30,7 +30,7 @@ static void encode_unknown(cmark_strbuf *buf)
 	cmark_strbuf_put(buf, repl, 3);
 }
 
-static int utf8proc_charlen(const uint8_t *str, int str_len)
+static int utf8proc_charlen(const uint8_t *str, bufsize_t str_len)
 {
 	int length, i;
 
@@ -42,7 +42,7 @@ static int utf8proc_charlen(const uint8_t *str, int str_len)
 	if (!length)
 		return -1;
 
-	if (str_len >= 0 && length > str_len)
+	if (str_len >= 0 && (bufsize_t)length > str_len)
 		return -str_len;
 
 	for (i = 1; i < length; i++) {
@@ -54,7 +54,7 @@ static int utf8proc_charlen(const uint8_t *str, int str_len)
 }
 
 // Validate a single UTF-8 character according to RFC 3629.
-static int utf8proc_valid(const uint8_t *str, int str_len)
+static int utf8proc_valid(const uint8_t *str, bufsize_t str_len)
 {
 	int length = utf8proc_charlen(str, str_len);
 
@@ -109,14 +109,14 @@ static int utf8proc_valid(const uint8_t *str, int str_len)
 	return length;
 }
 
-void utf8proc_detab(cmark_strbuf *ob, const uint8_t *line, size_t size)
+void utf8proc_detab(cmark_strbuf *ob, const uint8_t *line, bufsize_t size)
 {
 	static const uint8_t whitespace[] = "    ";
 
-	size_t i = 0, tab = 0;
+	bufsize_t i = 0, tab = 0;
 
 	while (i < size) {
-		size_t org = i;
+		bufsize_t org = i;
 
 		while (i < size && line[i] != '\t' && line[i] != '\0'
 		       && line[i] < 0x80) {
@@ -151,7 +151,7 @@ void utf8proc_detab(cmark_strbuf *ob, const uint8_t *line, size_t size)
 	}
 }
 
-int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst)
+int utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst)
 {
 	int length;
 	int32_t uc = -1;
@@ -191,7 +191,7 @@ int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst)
 void utf8proc_encode_char(int32_t uc, cmark_strbuf *buf)
 {
 	uint8_t dst[4];
-	int len = 0;
+	bufsize_t len = 0;
 
 	assert(uc >= 0);
 
@@ -227,7 +227,7 @@ void utf8proc_encode_char(int32_t uc, cmark_strbuf *buf)
 	cmark_strbuf_put(buf, dst, len);
 }
 
-void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, int len)
+void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, bufsize_t len)
 {
 	int32_t c;
 
@@ -235,7 +235,7 @@ void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, int len)
 	utf8proc_encode_char(x, dest)
 
 	while (len > 0) {
-		int char_len = utf8proc_iterate(str, len, &c);
+		bufsize_t char_len = utf8proc_iterate(str, len, &c);
 
 		if (char_len >= 0) {
 #include "case_fold_switch.inc"
diff --git a/src/utf8.h b/src/utf8.h
index 7df1573..ed1d7ee 100644
--- a/src/utf8.h
+++ b/src/utf8.h
@@ -8,10 +8,10 @@
 extern "C" {
 #endif
 
-void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, int len);
+void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, bufsize_t len);
 void utf8proc_encode_char(int32_t uc, cmark_strbuf *buf);
-int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst);
-void utf8proc_detab(cmark_strbuf *dest, const uint8_t *line, size_t size);
+int utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst);
+void utf8proc_detab(cmark_strbuf *dest, const uint8_t *line, bufsize_t size);
 int utf8proc_is_space(int32_t uc);
 int utf8proc_is_punctuation(int32_t uc);
 
diff --git a/src/xml.c b/src/xml.c
index 14f6d67..688dd36 100644
--- a/src/xml.c
+++ b/src/xml.c
@@ -11,13 +11,13 @@
 
 // Functions to convert cmark_nodes to XML strings.
 
-static void escape_xml(cmark_strbuf *dest, const unsigned char *source, int length)
+static void escape_xml(cmark_strbuf *dest, const unsigned char *source, bufsize_t length)
 {
 	if (source != NULL) {
 		if (length < 0)
 			length = strlen((char *)source);
 
-		houdini_escape_html0(dest, source, (size_t)length, 0);
+		houdini_escape_html0(dest, source, length, 0);
 	}
 }
 
-- 
cgit v1.2.3