From 019128a8e7f74344cc03e001e8b7286b42a03002 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Sun, 7 Jun 2015 13:15:07 +0200 Subject: Switch cmark_markdown_to_html over to size_t --- man/man3/cmark.3 | 4 ++-- src/cmark.c | 2 +- src/cmark.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/man/man3/cmark.3 b/man/man3/cmark.3 index 5b68ecb..82c34cd 100644 --- a/man/man3/cmark.3 +++ b/man/man3/cmark.3 @@ -1,4 +1,4 @@ -.TH cmark 3 "March 21, 2015" "LOCAL" "Library Functions Manual" +.TH cmark 3 "June 07, 2015" "LOCAL" "Library Functions Manual" .SH NAME .PP @@ -10,7 +10,7 @@ DESCRIPTION Simple Interface .PP -\fIchar *\f[] \fBcmark_markdown_to_html\f[](\fIconst char *text\f[], \fIint len\f[], \fIint options\f[]) +\fIchar *\f[] \fBcmark_markdown_to_html\f[](\fIconst char *text\f[], \fIsize_t len\f[], \fIint options\f[]) .PP Convert \f[I]text\f[] (assumed to be a UTF\-8 encoded string with length diff --git a/src/cmark.c b/src/cmark.c index 79ceabf..35765b1 100644 --- a/src/cmark.c +++ b/src/cmark.c @@ -9,7 +9,7 @@ const int cmark_version = CMARK_VERSION; const char cmark_version_string[] = CMARK_VERSION_STRING; -char *cmark_markdown_to_html(const char *text, int len, int options) +char *cmark_markdown_to_html(const char *text, size_t len, int options) { cmark_node *doc; char *result; diff --git a/src/cmark.h b/src/cmark.h index 84c6f76..d86e13e 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -24,7 +24,7 @@ extern "C" { * UTF-8-encoded string. */ CMARK_EXPORT -char *cmark_markdown_to_html(const char *text, int len, int options); +char *cmark_markdown_to_html(const char *text, size_t len, int options); /** ## Node Structure */ -- cgit v1.2.3 From 1193050109dee6be85c82bd29a1c817532dde912 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Sun, 7 Jun 2015 13:16:13 +0200 Subject: Use custom type bufsize_t for string buffer sizes This makes it easier to change the type later. No functional change. The rest of the code base still has to be adjusted to use the new type. Also add some TODO comments in buffer.c. --- src/buffer.c | 56 ++++++++++++++++++++++++++++++++++++-------------------- src/buffer.h | 26 +++++++++++++++----------- 2 files changed, 51 insertions(+), 31 deletions(-) diff --git a/src/buffer.c b/src/buffer.c index e2ebc02..78d0a00 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -22,20 +22,21 @@ unsigned char cmark_strbuf__initbuf[1]; #define MIN(x,y) ((xasize = 0; buf->size = 0; buf->ptr = cmark_strbuf__initbuf; + // TODO: Check for negative initial_size. if (initial_size) cmark_strbuf_grow(buf, initial_size); } -void cmark_strbuf_grow(cmark_strbuf *buf, int target_size) +void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) { unsigned char *new_ptr; - int new_size; + bufsize_t new_size; if (target_size <= buf->asize) return; @@ -51,9 +52,11 @@ void cmark_strbuf_grow(cmark_strbuf *buf, int target_size) /* grow the buffer size by 1.5, until it's big enough * to fit our target size */ while (new_size < target_size) + // TODO: Check for overflow. new_size = (new_size << 1) - (new_size >> 1); /* round allocation up to multiple of 8 */ + // TODO: Check for overflow. new_size = (new_size + 7) & ~7; new_ptr = (unsigned char *)realloc(new_ptr, new_size); @@ -95,12 +98,13 @@ void cmark_strbuf_clear(cmark_strbuf *buf) buf->ptr[0] = '\0'; } -void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, int len) +void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, bufsize_t len) { if (len <= 0 || data == NULL) { cmark_strbuf_clear(buf); } else { if (data != buf->ptr) { + // TODO: Check for overflow. ENSURE_SIZE(buf, len + 1); memmove(buf->ptr, data, len); } @@ -117,16 +121,18 @@ void cmark_strbuf_sets(cmark_strbuf *buf, const char *string) void cmark_strbuf_putc(cmark_strbuf *buf, int c) { + // TODO: Check for overflow. ENSURE_SIZE(buf, buf->size + 2); buf->ptr[buf->size++] = c; buf->ptr[buf->size] = '\0'; } -void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, int len) +void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, bufsize_t len) { if (len <= 0) return; + // TODO: Check for overflow. ENSURE_SIZE(buf, buf->size + len + 1); memmove(buf->ptr + buf->size, data, len); buf->size += len; @@ -140,8 +146,8 @@ void cmark_strbuf_puts(cmark_strbuf *buf, const char *string) void cmark_strbuf_vprintf(cmark_strbuf *buf, const char *format, va_list ap) { - const int expected_size = buf->size + (strlen(format) * 2); - int len; + // TODO: Check for overflow. + const bufsize_t expected_size = buf->size + (strlen(format) * 2); ENSURE_SIZE(buf, expected_size); @@ -149,7 +155,7 @@ void cmark_strbuf_vprintf(cmark_strbuf *buf, const char *format, va_list ap) va_list args; va_copy(args, ap); - len = vsnprintf( + int len = vsnprintf( (char *)buf->ptr + buf->size, buf->asize - buf->size, format, args @@ -168,11 +174,13 @@ void cmark_strbuf_vprintf(cmark_strbuf *buf, const char *format, va_list ap) abort(); } + // TODO: Check for overflow. if (len + 1 <= buf->asize - buf->size) { buf->size += len; break; } + // TODO: Check for overflow. ENSURE_SIZE(buf, buf->size + len + 1); } } @@ -186,10 +194,11 @@ void cmark_strbuf_printf(cmark_strbuf *buf, const char *format, ...) va_end(ap); } -void cmark_strbuf_copy_cstr(char *data, int datasize, const cmark_strbuf *buf) +void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize, const cmark_strbuf *buf) { - int copylen; + bufsize_t copylen; + // TODO: Check negative datasize. assert(data && datasize && buf); data[0] = '\0'; @@ -224,16 +233,19 @@ unsigned char *cmark_strbuf_detach(cmark_strbuf *buf) return data; } -void cmark_strbuf_attach(cmark_strbuf *buf, unsigned char *ptr, int asize) +void cmark_strbuf_attach(cmark_strbuf *buf, unsigned char *ptr, bufsize_t asize) { cmark_strbuf_free(buf); if (ptr) { buf->ptr = ptr; buf->size = strlen((char *)ptr); + // TODO: Check for negative asize. if (asize) + // TODO: Check for overflow. buf->asize = (asize < buf->size) ? buf->size + 1 : asize; else /* pass 0 to fall back on strlen + 1 */ + // TODO: Check for overflow. buf->asize = buf->size + 1; } else { cmark_strbuf_grow(buf, asize); @@ -247,19 +259,21 @@ int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b) (a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0; } -int cmark_strbuf_strchr(const cmark_strbuf *buf, int c, int pos) +bufsize_t cmark_strbuf_strchr(const cmark_strbuf *buf, int c, bufsize_t pos) { + // TODO: Bounds check. const unsigned char *p = (unsigned char *)memchr(buf->ptr + pos, c, buf->size - pos); if (!p) return -1; - return (int)(p - (const unsigned char *)buf->ptr); + return (bufsize_t)(p - (const unsigned char *)buf->ptr); } -int cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, int pos) +bufsize_t cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, bufsize_t pos) { - int i; + bufsize_t i; + // TODO: Bounds check. for (i = pos; i >= 0; i--) { if (buf->ptr[i] == (unsigned char) c) return i; @@ -268,17 +282,19 @@ int cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, int pos) return -1; } -void cmark_strbuf_truncate(cmark_strbuf *buf, int len) +void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len) { + // TODO: Check for negative len. if (len < buf->size) { buf->size = len; buf->ptr[buf->size] = '\0'; } } -void cmark_strbuf_drop(cmark_strbuf *buf, int n) +void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n) { if (n > 0) { + // TODO: Bounds check. buf->size = buf->size - n; if (buf->size) memmove(buf->ptr, buf->ptr + n, buf->size); @@ -304,7 +320,7 @@ void cmark_strbuf_rtrim(cmark_strbuf *buf) void cmark_strbuf_trim(cmark_strbuf *buf) { - int i = 0; + bufsize_t i = 0; if (!buf->size) return; @@ -322,7 +338,7 @@ void cmark_strbuf_trim(cmark_strbuf *buf) void cmark_strbuf_normalize_whitespace(cmark_strbuf *s) { bool last_char_was_space = false; - int r, w; + bufsize_t r, w; for (r = 0, w = 0; r < s->size; ++r) { switch (s->ptr[r]) { @@ -347,7 +363,7 @@ void cmark_strbuf_normalize_whitespace(cmark_strbuf *s) // Destructively unescape a string: remove backslashes before punctuation chars. extern void cmark_strbuf_unescape(cmark_strbuf *buf) { - int r, w; + bufsize_t r, w; for (r = 0, w = 0; r < buf->size; ++r) { if (buf->ptr[r] == '\\' && cmark_ispunct(buf->ptr[r + 1])) diff --git a/src/buffer.h b/src/buffer.h index 417df26..99e6feb 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -3,20 +3,24 @@ #include #include +#include #include "config.h" #ifdef __cplusplus extern "C" { #endif +typedef int bufsize_t; + typedef struct { unsigned char *ptr; - int asize, size; + bufsize_t asize, size; } cmark_strbuf; extern unsigned char cmark_strbuf__initbuf[]; #define GH_BUF_INIT { cmark_strbuf__initbuf, 0, 0 } +#define BUFSIZE_MAX INT_MAX /** * Initialize a cmark_strbuf structure. @@ -24,12 +28,12 @@ extern unsigned char cmark_strbuf__initbuf[]; * For the cases where GH_BUF_INIT cannot be used to do static * initialization. */ -void cmark_strbuf_init(cmark_strbuf *buf, int initial_size); +void cmark_strbuf_init(cmark_strbuf *buf, bufsize_t initial_size); /** * Grow the buffer to hold at least `target_size` bytes. */ -void cmark_strbuf_grow(cmark_strbuf *buf, int target_size); +void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size); void cmark_strbuf_free(cmark_strbuf *buf); void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b); @@ -38,9 +42,9 @@ size_t cmark_strbuf_len(const cmark_strbuf *buf); int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b); -void cmark_strbuf_attach(cmark_strbuf *buf, unsigned char *ptr, int asize); +void cmark_strbuf_attach(cmark_strbuf *buf, unsigned char *ptr, bufsize_t asize); unsigned char *cmark_strbuf_detach(cmark_strbuf *buf); -void cmark_strbuf_copy_cstr(char *data, int datasize, const cmark_strbuf *buf); +void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize, const cmark_strbuf *buf); static inline const char *cmark_strbuf_cstr(const cmark_strbuf *buf) { @@ -49,20 +53,20 @@ static inline const char *cmark_strbuf_cstr(const cmark_strbuf *buf) #define cmark_strbuf_at(buf, n) ((buf)->ptr[n]) -void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, int len); +void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, bufsize_t len); void cmark_strbuf_sets(cmark_strbuf *buf, const char *string); void cmark_strbuf_putc(cmark_strbuf *buf, int c); -void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, int len); +void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, bufsize_t len); void cmark_strbuf_puts(cmark_strbuf *buf, const char *string); void cmark_strbuf_printf(cmark_strbuf *buf, const char *format, ...) CMARK_ATTRIBUTE((format (printf, 2, 3))); void cmark_strbuf_vprintf(cmark_strbuf *buf, const char *format, va_list ap); void cmark_strbuf_clear(cmark_strbuf *buf); -int cmark_strbuf_strchr(const cmark_strbuf *buf, int c, int pos); -int cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, int pos); -void cmark_strbuf_drop(cmark_strbuf *buf, int n); -void cmark_strbuf_truncate(cmark_strbuf *buf, int len); +bufsize_t cmark_strbuf_strchr(const cmark_strbuf *buf, int c, bufsize_t pos); +bufsize_t cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, bufsize_t pos); +void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n); +void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len); void cmark_strbuf_rtrim(cmark_strbuf *buf); void cmark_strbuf_trim(cmark_strbuf *buf); void cmark_strbuf_normalize_whitespace(cmark_strbuf *s); -- cgit v1.2.3 From 1a38daeb81db11ef2acd57690aad36b4ce3fe3da Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Sat, 30 May 2015 00:38:20 +0200 Subject: Simplify oversizing of strbufs Always add 50% on top of target size. No need for a loop. --- src/buffer.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/buffer.c b/src/buffer.c index 78d0a00..8ec38b0 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -42,18 +42,15 @@ void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) return; if (buf->asize == 0) { - new_size = target_size; new_ptr = NULL; } else { - new_size = buf->asize; new_ptr = buf->ptr; } - /* grow the buffer size by 1.5, until it's big enough - * to fit our target size */ - while (new_size < target_size) - // TODO: Check for overflow. - new_size = (new_size << 1) - (new_size >> 1); + /* Oversize the buffer by 50% to guarantee amortized linear time + * complexity on append operations. */ + // TODO: Check for overflow. + new_size = target_size + target_size / 2; /* round allocation up to multiple of 8 */ // TODO: Check for overflow. -- cgit v1.2.3 From ea23e2b42f05ac4a04154e572fc5ce8c84c29c10 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Sun, 7 Jun 2015 13:41:13 +0200 Subject: Check for overflow in cmark_strbuf_grow --- src/buffer.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/buffer.c b/src/buffer.c index 8ec38b0..8fb2652 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -36,7 +36,6 @@ void cmark_strbuf_init(cmark_strbuf *buf, bufsize_t initial_size) void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) { unsigned char *new_ptr; - bufsize_t new_size; if (target_size <= buf->asize) return; @@ -49,13 +48,18 @@ void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) /* Oversize the buffer by 50% to guarantee amortized linear time * complexity on append operations. */ - // TODO: Check for overflow. - new_size = target_size + target_size / 2; + size_t new_size = (size_t)target_size + (size_t)target_size / 2; /* round allocation up to multiple of 8 */ - // TODO: Check for overflow. new_size = (new_size + 7) & ~7; + if (new_size < (size_t)target_size /* Integer overflow. */ + || new_size > BUFSIZE_MAX /* Truncation overflow. */ + ) { + /* Oversize by the maximum possible amount. */ + new_size = BUFSIZE_MAX; + } + new_ptr = (unsigned char *)realloc(new_ptr, new_size); if (!new_ptr) { @@ -63,7 +67,7 @@ void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) abort(); } - buf->asize = new_size; + buf->asize = (bufsize_t)new_size; buf->ptr = new_ptr; /* truncate the existing buffer size if necessary */ -- cgit v1.2.3 From a87b62a8eb42ee5bf6307f6c140bb400c860bcdd Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Sun, 7 Jun 2015 14:01:09 +0200 Subject: Account for null terminator in cmark_strbuf_grow This simplifies overflow checks. --- src/buffer.c | 26 +++++++++++++++++++------- src/buffer.h | 3 +++ 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/src/buffer.c b/src/buffer.c index 8fb2652..d39828e 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -15,7 +15,7 @@ unsigned char cmark_strbuf__initbuf[1]; #define ENSURE_SIZE(b, d) \ - if ((d) > b->asize) \ + if ((d) >= b->asize) \ cmark_strbuf_grow(b, (d)); \ #ifndef MIN @@ -33,11 +33,16 @@ void cmark_strbuf_init(cmark_strbuf *buf, bufsize_t initial_size) cmark_strbuf_grow(buf, initial_size); } +void cmark_strbuf_overflow_err() { + fprintf(stderr, "String buffer overflow"); + abort(); +} + void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) { unsigned char *new_ptr; - if (target_size <= buf->asize) + if (target_size < buf->asize) return; if (buf->asize == 0) { @@ -50,12 +55,20 @@ void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) * complexity on append operations. */ size_t new_size = (size_t)target_size + (size_t)target_size / 2; + /* Account for terminating null byte. */ + new_size += 1; + /* round allocation up to multiple of 8 */ new_size = (new_size + 7) & ~7; if (new_size < (size_t)target_size /* Integer overflow. */ || new_size > BUFSIZE_MAX /* Truncation overflow. */ ) { + if (target_size >= BUFSIZE_MAX) { + /* No space for terminating null byte. */ + cmark_strbuf_overflow_err(); + return; /* unreachable */ + } /* Oversize by the maximum possible amount. */ new_size = BUFSIZE_MAX; } @@ -105,8 +118,7 @@ void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, bufsize_t le cmark_strbuf_clear(buf); } else { if (data != buf->ptr) { - // TODO: Check for overflow. - ENSURE_SIZE(buf, len + 1); + ENSURE_SIZE(buf, len); memmove(buf->ptr, data, len); } buf->size = len; @@ -123,7 +135,7 @@ void cmark_strbuf_sets(cmark_strbuf *buf, const char *string) void cmark_strbuf_putc(cmark_strbuf *buf, int c) { // TODO: Check for overflow. - ENSURE_SIZE(buf, buf->size + 2); + ENSURE_SIZE(buf, buf->size + 1); buf->ptr[buf->size++] = c; buf->ptr[buf->size] = '\0'; } @@ -134,7 +146,7 @@ void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, bufsize_t le return; // TODO: Check for overflow. - ENSURE_SIZE(buf, buf->size + len + 1); + ENSURE_SIZE(buf, buf->size + len); memmove(buf->ptr + buf->size, data, len); buf->size += len; buf->ptr[buf->size] = '\0'; @@ -182,7 +194,7 @@ void cmark_strbuf_vprintf(cmark_strbuf *buf, const char *format, va_list ap) } // TODO: Check for overflow. - ENSURE_SIZE(buf, buf->size + len + 1); + ENSURE_SIZE(buf, buf->size + len); } } diff --git a/src/buffer.h b/src/buffer.h index 99e6feb..526276d 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -72,6 +72,9 @@ void cmark_strbuf_trim(cmark_strbuf *buf); void cmark_strbuf_normalize_whitespace(cmark_strbuf *s); void cmark_strbuf_unescape(cmark_strbuf *s); +/* Print error and abort. */ +void cmark_strbuf_overflow_err(void); + #ifdef __cplusplus } #endif -- cgit v1.2.3 From dbfd2eba6427893cb48587d914719402252855b2 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Sun, 7 Jun 2015 14:04:20 +0200 Subject: Remove useless code in cmark_strbuf_grow cmark_strbuf_grow will never truncate a buffer. --- src/buffer.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/buffer.c b/src/buffer.c index d39828e..f74c8c6 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -82,11 +82,6 @@ void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) buf->asize = (bufsize_t)new_size; buf->ptr = new_ptr; - - /* truncate the existing buffer size if necessary */ - if (buf->size >= buf->asize) - buf->size = buf->asize - 1; - buf->ptr[buf->size] = '\0'; } size_t cmark_strbuf_len(const cmark_strbuf *buf) -- cgit v1.2.3 From 996bcfb2eee238cf9a07d26b90e673f2496cbbec Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Sun, 7 Jun 2015 14:33:24 +0200 Subject: Check for overflow when growing strbufs Replace macro ENSURE_SIZE with inline function S_strbuf_grow_by that checks for overflow. --- src/buffer.c | 40 +++++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/src/buffer.c b/src/buffer.c index f74c8c6..78be967 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -4,6 +4,7 @@ #include #include #include +#include #include "config.h" #include "cmark_ctype.h" @@ -14,10 +15,6 @@ */ unsigned char cmark_strbuf__initbuf[1]; -#define ENSURE_SIZE(b, d) \ - if ((d) >= b->asize) \ - cmark_strbuf_grow(b, (d)); \ - #ifndef MIN #define MIN(x,y) ((xsize + add; + + if (target_size < add /* Integer overflow. */ + || target_size > BUFSIZE_MAX /* Truncation overflow. */ + ) { + cmark_strbuf_overflow_err(); + return; /* unreachable */ + } + + if ((bufsize_t)target_size >= buf->asize) + cmark_strbuf_grow(buf, (bufsize_t)target_size); +} + void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) { unsigned char *new_ptr; @@ -113,7 +125,8 @@ void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, bufsize_t le cmark_strbuf_clear(buf); } else { if (data != buf->ptr) { - ENSURE_SIZE(buf, len); + if (len >= buf->asize) + cmark_strbuf_grow(buf, len); memmove(buf->ptr, data, len); } buf->size = len; @@ -129,8 +142,7 @@ void cmark_strbuf_sets(cmark_strbuf *buf, const char *string) void cmark_strbuf_putc(cmark_strbuf *buf, int c) { - // TODO: Check for overflow. - ENSURE_SIZE(buf, buf->size + 1); + S_strbuf_grow_by(buf, 1); buf->ptr[buf->size++] = c; buf->ptr[buf->size] = '\0'; } @@ -140,8 +152,7 @@ void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, bufsize_t le if (len <= 0) return; - // TODO: Check for overflow. - ENSURE_SIZE(buf, buf->size + len); + S_strbuf_grow_by(buf, len); memmove(buf->ptr + buf->size, data, len); buf->size += len; buf->ptr[buf->size] = '\0'; @@ -154,10 +165,10 @@ void cmark_strbuf_puts(cmark_strbuf *buf, const char *string) void cmark_strbuf_vprintf(cmark_strbuf *buf, const char *format, va_list ap) { - // TODO: Check for overflow. - const bufsize_t expected_size = buf->size + (strlen(format) * 2); - - ENSURE_SIZE(buf, expected_size); + size_t expected_size = strlen(format); + if (expected_size <= SIZE_MAX / 2) + expected_size *= 2; + S_strbuf_grow_by(buf, expected_size); while (1) { va_list args; @@ -188,8 +199,7 @@ void cmark_strbuf_vprintf(cmark_strbuf *buf, const char *format, va_list ap) break; } - // TODO: Check for overflow. - ENSURE_SIZE(buf, buf->size + len); + S_strbuf_grow_by(buf, len); } } -- cgit v1.2.3 From ad94c00083285703144e4874642fb89dac2f5446 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Sun, 7 Jun 2015 14:42:38 +0200 Subject: Check for negative lengths in buffer.c --- src/buffer.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/buffer.c b/src/buffer.c index 78be967..db575ed 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -25,8 +25,7 @@ void cmark_strbuf_init(cmark_strbuf *buf, bufsize_t initial_size) buf->size = 0; buf->ptr = cmark_strbuf__initbuf; - // TODO: Check for negative initial_size. - if (initial_size) + if (initial_size > 0) cmark_strbuf_grow(buf, initial_size); } @@ -216,8 +215,9 @@ void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize, const cmark_strbuf * { bufsize_t copylen; - // TODO: Check negative datasize. - assert(data && datasize && buf); + assert(buf); + if (!data || datasize <= 0) + return; data[0] = '\0'; @@ -258,8 +258,7 @@ void cmark_strbuf_attach(cmark_strbuf *buf, unsigned char *ptr, bufsize_t asize) if (ptr) { buf->ptr = ptr; buf->size = strlen((char *)ptr); - // TODO: Check for negative asize. - if (asize) + if (asize > 0) // TODO: Check for overflow. buf->asize = (asize < buf->size) ? buf->size + 1 : asize; else /* pass 0 to fall back on strlen + 1 */ @@ -302,7 +301,9 @@ bufsize_t cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, bufsize_t pos) void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len) { - // TODO: Check for negative len. + if (len < 0) + len = 0; + if (len < buf->size) { buf->size = len; buf->ptr[buf->size] = '\0'; -- cgit v1.2.3 From 09cee35add8c8686a3982f3f31de283450b625eb Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Sun, 7 Jun 2015 14:49:18 +0200 Subject: Fix check in cmark_strbuf_vprintf Avoid potential overflow and allow for different bufsize types. --- src/buffer.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/buffer.c b/src/buffer.c index db575ed..08192da 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -192,8 +192,7 @@ void cmark_strbuf_vprintf(cmark_strbuf *buf, const char *format, va_list ap) abort(); } - // TODO: Check for overflow. - if (len + 1 <= buf->asize - buf->size) { + if ((size_t)len < (size_t)(buf->asize - buf->size)) { buf->size += len; break; } -- cgit v1.2.3 From b0a0cabbee704740169c9e493d61fcf786251601 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Sun, 7 Jun 2015 15:01:43 +0200 Subject: Remove unused function cmark_strbuf_attach This function was missing a couple of range checks that I'm too lazy to fix. --- src/buffer.c | 18 ------------------ src/buffer.h | 1 - 2 files changed, 19 deletions(-) diff --git a/src/buffer.c b/src/buffer.c index 08192da..bc2e38f 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -250,24 +250,6 @@ unsigned char *cmark_strbuf_detach(cmark_strbuf *buf) return data; } -void cmark_strbuf_attach(cmark_strbuf *buf, unsigned char *ptr, bufsize_t asize) -{ - cmark_strbuf_free(buf); - - if (ptr) { - buf->ptr = ptr; - buf->size = strlen((char *)ptr); - if (asize > 0) - // TODO: Check for overflow. - buf->asize = (asize < buf->size) ? buf->size + 1 : asize; - else /* pass 0 to fall back on strlen + 1 */ - // TODO: Check for overflow. - buf->asize = buf->size + 1; - } else { - cmark_strbuf_grow(buf, asize); - } -} - int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b) { int result = memcmp(a->ptr, b->ptr, MIN(a->size, b->size)); diff --git a/src/buffer.h b/src/buffer.h index 526276d..d125207 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -42,7 +42,6 @@ size_t cmark_strbuf_len(const cmark_strbuf *buf); int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b); -void cmark_strbuf_attach(cmark_strbuf *buf, unsigned char *ptr, bufsize_t asize); unsigned char *cmark_strbuf_detach(cmark_strbuf *buf); void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize, const cmark_strbuf *buf); -- cgit v1.2.3 From e2b4500209821198413fb7fb6127060034b64c87 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Sun, 7 Jun 2015 15:11:07 +0200 Subject: Missing bounds checks in buffer.c --- src/buffer.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/buffer.c b/src/buffer.c index bc2e38f..19ea677 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -259,7 +259,11 @@ int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b) bufsize_t cmark_strbuf_strchr(const cmark_strbuf *buf, int c, bufsize_t pos) { - // TODO: Bounds check. + if (pos >= buf->size) + return -1; + if (pos < 0) + pos = 0; + const unsigned char *p = (unsigned char *)memchr(buf->ptr + pos, c, buf->size - pos); if (!p) return -1; @@ -269,10 +273,12 @@ bufsize_t cmark_strbuf_strchr(const cmark_strbuf *buf, int c, bufsize_t pos) bufsize_t cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, bufsize_t pos) { - bufsize_t i; + if (pos < 0 || buf->size == 0) + return -1; + if (pos >= buf->size) + pos = buf->size - 1; - // TODO: Bounds check. - for (i = pos; i >= 0; i--) { + for (bufsize_t i = pos; i >= 0; i--) { if (buf->ptr[i] == (unsigned char) c) return i; } @@ -294,7 +300,8 @@ void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len) void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n) { if (n > 0) { - // TODO: Bounds check. + if (n > buf->size) + n = buf->size; buf->size = buf->size - n; if (buf->size) memmove(buf->ptr, buf->ptr + n, buf->size); -- cgit v1.2.3 From d49d3fd7bab4a8734e5f22318e3fb538bfe20dbb Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Sun, 7 Jun 2015 16:48:01 +0200 Subject: Change return type of cmark_strbuf_len --- src/buffer.c | 2 +- src/buffer.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/buffer.c b/src/buffer.c index 19ea677..efee41d 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -95,7 +95,7 @@ void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) buf->ptr = new_ptr; } -size_t cmark_strbuf_len(const cmark_strbuf *buf) +bufsize_t cmark_strbuf_len(const cmark_strbuf *buf) { return buf->size; } diff --git a/src/buffer.h b/src/buffer.h index d125207..9c850e4 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -38,7 +38,7 @@ void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size); void cmark_strbuf_free(cmark_strbuf *buf); void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b); -size_t cmark_strbuf_len(const cmark_strbuf *buf); +bufsize_t cmark_strbuf_len(const cmark_strbuf *buf); int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b); -- cgit v1.2.3 From 7382fd5eba48107a8190bd2d6232cc3b6e20d8fc Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Sun, 7 Jun 2015 16:52:44 +0200 Subject: Convert code base to strbuf_t There are probably a couple of places I missed. But this will only be a problem if we use a 64-bit bufsize_t at some point. Then, we'll get warnings from -Wshorten-64-to-32. --- src/blocks.c | 22 +++++++------- src/chunk.h | 10 +++---- src/commonmark.c | 4 +-- src/houdini.h | 12 ++++---- src/houdini_href_e.c | 4 +-- src/houdini_html_e.c | 6 ++-- src/houdini_html_u.c | 14 ++++----- src/html.c | 10 +++---- src/inlines.c | 62 +++++++++++++++++++------------------- src/inlines.h | 2 +- src/parser.h | 6 ++-- src/scanners.c | 84 ++++++++++++++++++++++++++-------------------------- src/scanners.h | 30 +++++++++---------- src/scanners.re | 76 +++++++++++++++++++++++------------------------ src/utf8.c | 20 ++++++------- src/utf8.h | 6 ++-- src/xml.c | 4 +-- 17 files changed, 186 insertions(+), 186 deletions(-) diff --git a/src/blocks.c b/src/blocks.c index b72c256..72b1ca5 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -30,7 +30,7 @@ S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len, static void S_process_line(cmark_parser *parser, const unsigned char *buffer, - size_t bytes); + bufsize_t bytes); static cmark_node* make_block(cmark_node_type tag, int start_line, int start_column) { @@ -95,7 +95,7 @@ static cmark_node* finalize(cmark_parser *parser, cmark_node* b); // Returns true if line has only space characters, else false. -static bool is_blank(cmark_strbuf *s, int offset) +static bool is_blank(cmark_strbuf *s, bufsize_t offset) { while (offset < s->size) { switch (s->ptr[offset]) { @@ -128,7 +128,7 @@ static inline bool accepts_lines(cmark_node_type block_type) block_type == NODE_CODE_BLOCK); } -static void add_line(cmark_node* node, cmark_chunk *ch, int offset) +static void add_line(cmark_node* node, cmark_chunk *ch, bufsize_t offset) { assert(node->open); cmark_strbuf_put(&node->string_content, ch->data + offset, ch->len - offset); @@ -136,7 +136,7 @@ static void add_line(cmark_node* node, cmark_chunk *ch, int offset) static void remove_trailing_blank_lines(cmark_strbuf *ln) { - int i; + bufsize_t i; unsigned char c; for (i = ln->size - 1; i >= 0; --i) { @@ -204,7 +204,7 @@ static int break_out_of_lists(cmark_parser *parser, cmark_node ** bptr) static cmark_node* finalize(cmark_parser *parser, cmark_node* b) { - int pos; + bufsize_t pos; cmark_node* item; cmark_node* subitem; cmark_node* parent; @@ -367,10 +367,10 @@ static void process_inlines(cmark_node* root, cmark_reference_map *refmap, int o // Attempts to parse a list item marker (bullet or enumerated). // On success, returns length of the marker, and populates // data with the details. On failure, returns 0. -static int parse_list_marker(cmark_chunk *input, int pos, cmark_list **dataptr) +static bufsize_t parse_list_marker(cmark_chunk *input, bufsize_t pos, cmark_list **dataptr) { unsigned char c; - int startpos; + bufsize_t startpos; cmark_list *data; startpos = pos; @@ -533,7 +533,7 @@ S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len, static void chop_trailing_hashtags(cmark_chunk *ch) { - int n, orig_n; + bufsize_t n, orig_n; cmark_chunk_rtrim(ch); orig_n = n = ch->len - 1; @@ -562,10 +562,10 @@ S_find_first_nonspace(cmark_parser *parser, cmark_chunk *input) } static void -S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) +S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t bytes) { cmark_node* last_matched_container; - int matched = 0; + bufsize_t matched = 0; int lev = 0; int i; cmark_list *data = NULL; @@ -712,7 +712,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) parser->offset = parser->first_nonspace + matched; container = add_child(parser, container, NODE_HEADER, parser->offset + 1); - int hashpos = cmark_chunk_strchr(&input, '#', parser->first_nonspace); + bufsize_t hashpos = cmark_chunk_strchr(&input, '#', parser->first_nonspace); int level = 0; while (peek_at(&input, hashpos) == '#') { diff --git a/src/chunk.h b/src/chunk.h index a246a9d..364918d 100644 --- a/src/chunk.h +++ b/src/chunk.h @@ -11,8 +11,8 @@ typedef struct { unsigned char *data; - int len; - int alloc; // also implies a NULL-terminated string + bufsize_t len; + bufsize_t alloc; // also implies a NULL-terminated string } cmark_chunk; static inline void cmark_chunk_free(cmark_chunk *c) @@ -51,10 +51,10 @@ static inline void cmark_chunk_trim(cmark_chunk *c) cmark_chunk_rtrim(c); } -static inline int cmark_chunk_strchr(cmark_chunk *ch, int c, int offset) +static inline bufsize_t cmark_chunk_strchr(cmark_chunk *ch, int c, bufsize_t offset) { const unsigned char *p = (unsigned char *)memchr(ch->data + offset, c, ch->len - offset); - return p ? (int)(p - ch->data) : ch->len; + return p ? (bufsize_t)(p - ch->data) : ch->len; } static inline const char *cmark_chunk_to_cstr(cmark_chunk *c) @@ -100,7 +100,7 @@ static inline cmark_chunk cmark_chunk_literal(const char *data) return c; } -static inline cmark_chunk cmark_chunk_dup(const cmark_chunk *ch, int pos, int len) +static inline cmark_chunk cmark_chunk_dup(const cmark_chunk *ch, bufsize_t pos, bufsize_t len) { cmark_chunk c = {ch->data + pos, len, 0}; return c; diff --git a/src/commonmark.c b/src/commonmark.c index dba1fcf..2022fd5 100644 --- a/src/commonmark.c +++ b/src/commonmark.c @@ -20,7 +20,7 @@ struct render_state { int column; int width; int need_cr; - int last_breakable; + bufsize_t last_breakable; bool begin_line; bool no_wrap; bool in_tight_list_item; @@ -293,7 +293,7 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, const char *title; cmark_strbuf listmarker = GH_BUF_INIT; char *emph_delim; - int marker_width; + bufsize_t marker_width; // Don't adjust tight list status til we've started the list. // Otherwise we loose the blank line between a paragraph and diff --git a/src/houdini.h b/src/houdini.h index 9f00f6d..2e7a354 100644 --- a/src/houdini.h +++ b/src/houdini.h @@ -31,15 +31,15 @@ extern "C" { #define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10) #define HOUDINI_UNESCAPED_SIZE(x) (x) -extern size_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, size_t size, int secure); -extern int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, size_t size); -extern void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, size_t size); +extern bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, bufsize_t size); +extern int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size); +extern int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size, int secure); +extern int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size); +extern void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, bufsize_t size); extern int houdini_escape_xml(cmark_strbuf *ob, const uint8_t *src, size_t size); extern int houdini_escape_uri(cmark_strbuf *ob, const uint8_t *src, size_t size); extern int houdini_escape_url(cmark_strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, size_t size); +extern int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size); extern int houdini_unescape_uri(cmark_strbuf *ob, const uint8_t *src, size_t size); extern int houdini_unescape_url(cmark_strbuf *ob, const uint8_t *src, size_t size); extern int houdini_escape_js(cmark_strbuf *ob, const uint8_t *src, size_t size); diff --git a/src/houdini_href_e.c b/src/houdini_href_e.c index 7527780..7fb958a 100644 --- a/src/houdini_href_e.c +++ b/src/houdini_href_e.c @@ -49,10 +49,10 @@ static const char HREF_SAFE[] = { }; int -houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, size_t size) +houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) { static const uint8_t hex_chars[] = "0123456789ABCDEF"; - size_t i = 0, org; + bufsize_t i = 0, org; uint8_t hex_str[3]; hex_str[0] = '%'; diff --git a/src/houdini_html_e.c b/src/houdini_html_e.c index 1a4c3e1..7f4b91f 100644 --- a/src/houdini_html_e.c +++ b/src/houdini_html_e.c @@ -45,9 +45,9 @@ static const char *HTML_ESCAPES[] = { }; int -houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, size_t size, int secure) +houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size, int secure) { - size_t i = 0, org, esc = 0; + bufsize_t i = 0, org, esc = 0; while (i < size) { org = i; @@ -75,7 +75,7 @@ houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, size_t size, int secu } int -houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, size_t size) +houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) { return houdini_escape_html0(ob, src, size, 1); } diff --git a/src/houdini_html_u.c b/src/houdini_html_u.c index eaf295e..e57894d 100644 --- a/src/houdini_html_u.c +++ b/src/houdini_html_u.c @@ -7,10 +7,10 @@ #include "utf8.h" #include "html_unescape.h" -size_t -houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, size_t size) +bufsize_t +houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) { - size_t i = 0; + bufsize_t i = 0; if (size >= 3 && src[0] == '#') { int codepoint = 0; @@ -68,7 +68,7 @@ houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, size_t size) const struct html_ent *entity = find_entity((char *)src, i); if (entity != NULL) { - int len = 0; + bufsize_t len = 0; while (len < 4 && entity->utf8[len] != '\0') { ++len; } @@ -85,9 +85,9 @@ houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, size_t size) } int -houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, size_t size) +houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) { - size_t i = 0, org, ent; + bufsize_t i = 0, org, ent; while (i < size) { org = i; @@ -122,7 +122,7 @@ houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, size_t size) return 1; } -void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, size_t size) +void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) { if (!houdini_unescape_html(ob, src, size)) cmark_strbuf_put(ob, src, size); diff --git a/src/html.c b/src/html.c index d3f9fc7..317eb45 100644 --- a/src/html.c +++ b/src/html.c @@ -11,20 +11,20 @@ // Functions to convert cmark_nodes to HTML strings. -static void escape_html(cmark_strbuf *dest, const unsigned char *source, int length) +static void escape_html(cmark_strbuf *dest, const unsigned char *source, bufsize_t length) { if (length < 0) length = strlen((char *)source); - houdini_escape_html0(dest, source, (size_t)length, 0); + houdini_escape_html0(dest, source, length, 0); } -static void escape_href(cmark_strbuf *dest, const unsigned char *source, int length) +static void escape_href(cmark_strbuf *dest, const unsigned char *source, bufsize_t length) { if (length < 0) length = strlen((char *)source); - houdini_escape_href(dest, source, (size_t)length); + houdini_escape_href(dest, source, length); } static inline void cr(cmark_strbuf *html) @@ -165,7 +165,7 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, S_render_sourcepos(node, html, options); cmark_strbuf_puts(html, ">"); } else { - int first_tag = 0; + bufsize_t first_tag = 0; while (first_tag < node->as.code.info.len && node->as.code.info.data[first_tag] != ' ') { first_tag += 1; diff --git a/src/inlines.c b/src/inlines.c index 8a1ee44..7e8f806 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -36,7 +36,7 @@ typedef struct delimiter { struct delimiter *previous; struct delimiter *next; cmark_node *inl_text; - int position; + bufsize_t position; unsigned char delim_char; bool can_open; bool can_close; @@ -45,7 +45,7 @@ typedef struct delimiter { typedef struct { cmark_chunk input; - int pos; + bufsize_t pos; cmark_reference_map *refmap; delimiter *last_delim; } subject; @@ -57,7 +57,7 @@ static int parse_inline(subject* subj, cmark_node * parent, int options); static void subject_from_buf(subject *e, cmark_strbuf *buffer, cmark_reference_map *refmap); -static int subject_find_special_char(subject *subj, int options); +static bufsize_t subject_find_special_char(subject *subj, int options); static cmark_chunk cmark_clean_autolink(cmark_chunk *url, int is_email) { @@ -143,7 +143,7 @@ static inline cmark_node* make_simple(cmark_node_type t) static cmark_chunk chunk_clone(cmark_chunk *src) { cmark_chunk c; - int len = src->len; + bufsize_t len = src->len; c.len = len; c.data = (unsigned char *)malloc(len + 1); @@ -177,7 +177,7 @@ static inline unsigned char peek_char(subject *subj) return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : 0; } -static inline unsigned char peek_at(subject *subj, int pos) +static inline unsigned char peek_at(subject *subj, bufsize_t pos) { return subj->input.data[pos]; } @@ -195,8 +195,8 @@ static inline int is_eof(subject* subj) static inline cmark_chunk take_while(subject* subj, int (*f)(int)) { unsigned char c; - int startpos = subj->pos; - int len = 0; + bufsize_t startpos = subj->pos; + bufsize_t len = 0; while ((c = peek_char(subj)) && (*f)(c)) { advance(subj); @@ -211,7 +211,7 @@ static inline cmark_chunk take_while(subject* subj, int (*f)(int)) // parsed). Return 0 if you don't find matching closing // backticks, otherwise return the position in the subject // after the closing backticks. -static int scan_to_closing_backticks(subject* subj, int openticklength) +static bufsize_t scan_to_closing_backticks(subject* subj, bufsize_t openticklength) { // read non backticks unsigned char c; @@ -221,7 +221,7 @@ static int scan_to_closing_backticks(subject* subj, int openticklength) if (is_eof(subj)) { return 0; // did not find closing ticks, return 0 } - int numticks = 0; + bufsize_t numticks = 0; while (peek_char(subj) == '`') { advance(subj); numticks++; @@ -237,8 +237,8 @@ static int scan_to_closing_backticks(subject* subj, int openticklength) static cmark_node* handle_backticks(subject *subj) { cmark_chunk openticks = take_while(subj, isbacktick); - int startpos = subj->pos; - int endpos = scan_to_closing_backticks(subj, openticks.len); + bufsize_t startpos = subj->pos; + bufsize_t endpos = scan_to_closing_backticks(subj, openticks.len); if (endpos == 0) { // not found subj->pos = startpos; // rewind @@ -260,7 +260,7 @@ static int scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close) { int numdelims = 0; - int before_char_pos; + bufsize_t before_char_pos; int32_t after_char = 0; int32_t before_char = 0; int len; @@ -376,7 +376,7 @@ static void push_delimiter(subject *subj, unsigned char c, bool can_open, // Assumes the subject has a c at the current position. static cmark_node* handle_delim(subject* subj, unsigned char c, bool smart) { - int numdelims; + bufsize_t numdelims; cmark_node * inl_text; bool can_open, can_close; cmark_chunk contents; @@ -500,11 +500,11 @@ static delimiter* S_insert_emph(subject *subj, delimiter *opener, delimiter *closer) { delimiter *delim, *tmp_delim; - int use_delims; + bufsize_t use_delims; cmark_node *opener_inl = opener->inl_text; cmark_node *closer_inl = closer->inl_text; - int opener_num_chars = opener_inl->as.literal.len; - int closer_num_chars = closer_inl->as.literal.len; + bufsize_t opener_num_chars = opener_inl->as.literal.len; + bufsize_t closer_num_chars = closer_inl->as.literal.len; cmark_node *tmp, *emph, *first_child, *last_child; // calculate the actual number of characters used from this closer @@ -596,7 +596,7 @@ static cmark_node* handle_backslash(subject *subj) static cmark_node* handle_entity(subject* subj) { cmark_strbuf ent = GH_BUF_INIT; - size_t len; + bufsize_t len; advance(subj); @@ -618,7 +618,7 @@ static cmark_node *make_str_with_entities(cmark_chunk *content) { cmark_strbuf unescaped = GH_BUF_INIT; - if (houdini_unescape_html(&unescaped, content->data, (size_t)content->len)) { + if (houdini_unescape_html(&unescaped, content->data, content->len)) { return make_str(cmark_chunk_buf_detach(&unescaped)); } else { return make_str(*content); @@ -678,7 +678,7 @@ cmark_chunk cmark_clean_title(cmark_chunk *title) // Assumes the subject has a '<' character at the current position. static cmark_node* handle_pointy_brace(subject* subj) { - int matchlen = 0; + bufsize_t matchlen = 0; cmark_chunk contents; advance(subj); // advance past first < @@ -725,7 +725,7 @@ static cmark_node* handle_pointy_brace(subject* subj) // encountered. Backticks in labels do not start code spans. static int link_label(subject* subj, cmark_chunk *raw_label) { - int startpos = subj->pos; + bufsize_t startpos = subj->pos; int length = 0; unsigned char c; @@ -769,10 +769,10 @@ noMatch: // Return a link, an image, or a literal close bracket. static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent) { - int initial_pos; - int starturl, endurl, starttitle, endtitle, endall; - int n; - int sps; + bufsize_t initial_pos; + bufsize_t starturl, endurl, starttitle, endtitle, endall; + bufsize_t n; + bufsize_t sps; cmark_reference *ref; bool is_image = false; cmark_chunk url_chunk, title_chunk; @@ -922,7 +922,7 @@ match: // Assumes the subject has a newline at the current position. static cmark_node* handle_newline(subject *subj) { - int nlpos = subj->pos; + bufsize_t nlpos = subj->pos; // skip over newline advance(subj); // skip spaces at beginning of line @@ -938,7 +938,7 @@ static cmark_node* handle_newline(subject *subj) } } -static int subject_find_special_char(subject *subj, int options) +static bufsize_t subject_find_special_char(subject *subj, int options) { // "\r\n\\`&_*[]pos + 1; + bufsize_t n = subj->pos + 1; while (n < subj->input.len) { if (SPECIAL_CHARS[subj->input.data[n]]) @@ -1001,7 +1001,7 @@ static int parse_inline(subject* subj, cmark_node * parent, int options) cmark_node* new_inl = NULL; cmark_chunk contents; unsigned char c; - int endpos; + bufsize_t endpos; c = peek_char(subj); if (c == 0) { return 0; @@ -1098,7 +1098,7 @@ static void spnl(subject* subj) // Modify refmap if a reference is encountered. // Return 0 if no reference found, otherwise position of subject // after reference is parsed. -int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap) +bufsize_t cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap) { subject subj; @@ -1106,8 +1106,8 @@ int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refma cmark_chunk url; cmark_chunk title; - int matchlen = 0; - int beforetitle; + bufsize_t matchlen = 0; + bufsize_t beforetitle; subject_from_buf(&subj, input, NULL); diff --git a/src/inlines.h b/src/inlines.h index 534588e..f8847fc 100644 --- a/src/inlines.h +++ b/src/inlines.h @@ -10,7 +10,7 @@ cmark_chunk cmark_clean_title(cmark_chunk *title); void cmark_parse_inlines(cmark_node* parent, cmark_reference_map *refmap, int options); -int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap); +bufsize_t cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap); #ifdef __cplusplus } diff --git a/src/parser.h b/src/parser.h index ccdf84b..6e18c67 100644 --- a/src/parser.h +++ b/src/parser.h @@ -16,12 +16,12 @@ struct cmark_parser { struct cmark_node* root; struct cmark_node* current; int line_number; - int offset; - int first_nonspace; + bufsize_t offset; + bufsize_t first_nonspace; int indent; bool blank; cmark_strbuf *curline; - int last_line_length; + bufsize_t last_line_length; cmark_strbuf *linebuf; int options; }; diff --git a/src/scanners.c b/src/scanners.c index 7f9ed2e..3f4ddac 100644 --- a/src/scanners.c +++ b/src/scanners.c @@ -1,11 +1,11 @@ -/* Generated by re2c 0.13.6 */ +/* Generated by re2c 0.13.5 */ #include #include "chunk.h" #include "scanners.h" -int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset) +bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, bufsize_t offset) { - int res; + bufsize_t res; unsigned char *ptr = (unsigned char *)c->data; unsigned char lim = ptr[c->len]; @@ -19,7 +19,7 @@ int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset) // Try to match a scheme including colon. -int _scan_scheme(const unsigned char *p) +bufsize_t _scan_scheme(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -578,7 +578,7 @@ yy34: if (yych != ':') goto yy31; yy35: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy37: yych = *++p; if (yych == 'E') goto yy38; @@ -2919,7 +2919,7 @@ yy484: } // Try to match URI autolink after first <, returning number of chars matched. -int _scan_autolink_uri(const unsigned char *p) +bufsize_t _scan_autolink_uri(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -3517,7 +3517,7 @@ yy520: } if (yych <= '=') goto yy516; ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy524: yych = *++p; if (yych == 'E') goto yy525; @@ -5858,7 +5858,7 @@ yy971: } // Try to match email autolink after first <, returning num of chars matched. -int _scan_autolink_email(const unsigned char *p) +bufsize_t _scan_autolink_email(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -6060,7 +6060,7 @@ yy984: } yy985: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy987: ++p; yych = *p; @@ -10803,7 +10803,7 @@ yy1230: } // Try to match an HTML tag after first <, returning num of chars matched. -int _scan_html_tag(const unsigned char *p) +bufsize_t _scan_html_tag(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -10964,7 +10964,7 @@ yy1242: if (yych != '>') goto yy1239; yy1243: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1245: yych = *++p; if (yych == 'C') goto yy1260; @@ -11455,7 +11455,7 @@ yy1297: // Try to match an HTML block tag including first <, // returning num of chars matched. -int _scan_html_block_tag(const unsigned char *p) +bufsize_t _scan_html_block_tag(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -11513,7 +11513,7 @@ yy1303: goto yy1301; yy1304: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1306: yych = *++p; if (yych <= '/') { @@ -12022,7 +12022,7 @@ yy1343: } yy1344: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1346: yych = *++p; if (yych <= 'R') { @@ -12639,7 +12639,7 @@ yy1466: } yy1467: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1469: yych = *++p; if (yych <= 'R') { @@ -13243,7 +13243,7 @@ yy1585: // This may optionally be contained in <..>; otherwise // whitespace and unbalanced right parentheses aren't allowed. // Newlines aren't ever allowed. -int _scan_link_url(const unsigned char *p) +bufsize_t _scan_link_url(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -13308,7 +13308,7 @@ int _scan_link_url(const unsigned char *p) } } yy1588: - { return (p - start); } + { return (bufsize_t)(p - start); } yy1589: yyaccept = 0; marker = ++p; @@ -13402,7 +13402,7 @@ yy1599: yy1600: p = marker; if (yyaccept <= 1) { - if (yyaccept == 0) { + if (yyaccept <= 0) { goto yy1588; } else { goto yy1595; @@ -13490,7 +13490,7 @@ yy1607: if (yych <= ' ') goto yy1608; if (yych != ')') goto yy1603; yy1608: - { return (p - start); } + { return (bufsize_t)(p - start); } yy1609: ++p; yych = *p; @@ -13732,7 +13732,7 @@ yy1623: // Try to match a link title (in single quotes, in double quotes, or // in parentheses), returning number of chars matched. Allow one // level of internal nesting (quotes within quotes). -int _scan_link_title(const unsigned char *p) +bufsize_t _scan_link_title(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -13818,13 +13818,13 @@ yy1632: yy1633: p = marker; if (yyaccept <= 1) { - if (yyaccept == 0) { + if (yyaccept <= 0) { goto yy1626; } else { goto yy1637; } } else { - if (yyaccept == 2) { + if (yyaccept <= 2) { goto yy1644; } else { goto yy1651; @@ -13842,7 +13842,7 @@ yy1634: yy1636: ++p; yy1637: - { return (p - start); } + { return (bufsize_t)(p - start); } yy1638: yyaccept = 1; marker = ++p; @@ -13874,7 +13874,7 @@ yy1641: yy1643: ++p; yy1644: - { return (p - start); } + { return (bufsize_t)(p - start); } yy1645: yyaccept = 2; marker = ++p; @@ -13906,7 +13906,7 @@ yy1648: yy1650: ++p; yy1651: - { return (p - start); } + { return (bufsize_t)(p - start); } yy1652: yyaccept = 3; marker = ++p; @@ -13922,7 +13922,7 @@ yy1652: } // Match space characters, including newlines. -int _scan_spacechars(const unsigned char *p) +bufsize_t _scan_spacechars(const unsigned char *p) { const unsigned char *start = p; \ @@ -13973,7 +13973,7 @@ int _scan_spacechars(const unsigned char *p) goto yy1659; } yy1655: - { return (p - start); } + { return (bufsize_t)(p - start); } yy1656: yych = *++p; goto yy1658; @@ -13993,7 +13993,7 @@ yy1659: } // Match ATX header start. -int _scan_atx_header_start(const unsigned char *p) +bufsize_t _scan_atx_header_start(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -14059,7 +14059,7 @@ yy1665: yy1666: ++p; yy1667: - { return (p - start); } + { return (bufsize_t)(p - start); } yy1668: ++p; yych = *p; @@ -14128,7 +14128,7 @@ yy1672: // Match setext header line. Return 1 for level-1 header, // 2 for level-2, 0 for no match. -int _scan_setext_header_line(const unsigned char *p) +bufsize_t _scan_setext_header_line(const unsigned char *p) { const unsigned char *marker = NULL; @@ -14269,7 +14269,7 @@ yy1693: // Scan a horizontal rule line: "...three or more hyphens, asterisks, // or underscores on a line by themselves. If you wish, you may use // spaces between the hyphens or asterisks." -int _scan_hrule(const unsigned char *p) +bufsize_t _scan_hrule(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -14384,7 +14384,7 @@ yy1709: if (yych != '\r') goto yy1704; yy1711: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1713: ++p; yych = *p; @@ -14422,7 +14422,7 @@ yy1719: } yy1721: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1723: ++p; yych = *p; @@ -14460,13 +14460,13 @@ yy1729: } yy1731: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } } } // Scan an opening code fence. -int _scan_open_code_fence(const unsigned char *p) +bufsize_t _scan_open_code_fence(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -14557,7 +14557,7 @@ yy1743: yy1745: ++p; p = marker; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1747: yych = *++p; if (yybm[0+yych] & 64) { @@ -14585,13 +14585,13 @@ yy1750: yy1752: ++p; p = marker; - { return (p - start); } + { return (bufsize_t)(p - start); } } } // Scan a closing code fence with length at least len. -int _scan_close_code_fence(const unsigned char *p) +bufsize_t _scan_close_code_fence(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -14687,7 +14687,7 @@ yy1764: yy1766: ++p; p = marker; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1768: yych = *++p; if (yybm[0+yych] & 128) { @@ -14725,14 +14725,14 @@ yy1771: yy1773: ++p; p = marker; - { return (p - start); } + { return (bufsize_t)(p - start); } } } // Scans an entity. // Returns number of chars matched. -int _scan_entity(const unsigned char *p) +bufsize_t _scan_entity(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -14799,7 +14799,7 @@ yy1783: } yy1784: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1786: yych = *++p; if (yych <= ';') { diff --git a/src/scanners.h b/src/scanners.h index 1353f3b..bc5134e 100644 --- a/src/scanners.h +++ b/src/scanners.h @@ -5,21 +5,21 @@ extern "C" { #endif -int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset); -int _scan_scheme(const unsigned char *p); -int _scan_autolink_uri(const unsigned char *p); -int _scan_autolink_email(const unsigned char *p); -int _scan_html_tag(const unsigned char *p); -int _scan_html_block_tag(const unsigned char *p); -int _scan_link_url(const unsigned char *p); -int _scan_link_title(const unsigned char *p); -int _scan_spacechars(const unsigned char *p); -int _scan_atx_header_start(const unsigned char *p); -int _scan_setext_header_line(const unsigned char *p); -int _scan_hrule(const unsigned char *p); -int _scan_open_code_fence(const unsigned char *p); -int _scan_close_code_fence(const unsigned char *p); -int _scan_entity(const unsigned char *p); +bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, bufsize_t offset); +bufsize_t _scan_scheme(const unsigned char *p); +bufsize_t _scan_autolink_uri(const unsigned char *p); +bufsize_t _scan_autolink_email(const unsigned char *p); +bufsize_t _scan_html_tag(const unsigned char *p); +bufsize_t _scan_html_block_tag(const unsigned char *p); +bufsize_t _scan_link_url(const unsigned char *p); +bufsize_t _scan_link_title(const unsigned char *p); +bufsize_t _scan_spacechars(const unsigned char *p); +bufsize_t _scan_atx_header_start(const unsigned char *p); +bufsize_t _scan_setext_header_line(const unsigned char *p); +bufsize_t _scan_hrule(const unsigned char *p); +bufsize_t _scan_open_code_fence(const unsigned char *p); +bufsize_t _scan_close_code_fence(const unsigned char *p); +bufsize_t _scan_entity(const unsigned char *p); #define scan_scheme(c, n) _scan_at(&_scan_scheme, c, n) #define scan_autolink_uri(c, n) _scan_at(&_scan_autolink_uri, c, n) diff --git a/src/scanners.re b/src/scanners.re index 9411018..3722a99 100644 --- a/src/scanners.re +++ b/src/scanners.re @@ -2,9 +2,9 @@ #include "chunk.h" #include "scanners.h" -int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset) +bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, bufsize_t offset) { - int res; + bufsize_t res; unsigned char *ptr = (unsigned char *)c->data; unsigned char lim = ptr[c->len]; @@ -70,29 +70,29 @@ int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset) */ // Try to match a scheme including colon. -int _scan_scheme(const unsigned char *p) +bufsize_t _scan_scheme(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - scheme [:] { return (p - start); } + scheme [:] { return (bufsize_t)(p - start); } .? { return 0; } */ } // Try to match URI autolink after first <, returning number of chars matched. -int _scan_autolink_uri(const unsigned char *p) +bufsize_t _scan_autolink_uri(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - scheme [:][^\x00-\x20<>]*[>] { return (p - start); } + scheme [:][^\x00-\x20<>]*[>] { return (bufsize_t)(p - start); } .? { return 0; } */ } // Try to match email autolink after first <, returning num of chars matched. -int _scan_autolink_email(const unsigned char *p) +bufsize_t _scan_autolink_email(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -101,32 +101,32 @@ int _scan_autolink_email(const unsigned char *p) [@] [a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])? ([.][a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)* - [>] { return (p - start); } + [>] { return (bufsize_t)(p - start); } .? { return 0; } */ } // Try to match an HTML tag after first <, returning num of chars matched. -int _scan_html_tag(const unsigned char *p) +bufsize_t _scan_html_tag(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - htmltag { return (p - start); } + htmltag { return (bufsize_t)(p - start); } .? { return 0; } */ } // Try to match an HTML block tag including first <, // returning num of chars matched. -int _scan_html_block_tag(const unsigned char *p) +bufsize_t _scan_html_block_tag(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - [<] [/] blocktagname (spacechar | [>]) { return (p - start); } - [<] blocktagname (spacechar | [/>]) { return (p - start); } - [<] [!?] { return (p - start); } + [<] [/] blocktagname (spacechar | [>]) { return (bufsize_t)(p - start); } + [<] blocktagname (spacechar | [/>]) { return (bufsize_t)(p - start); } + [<] [!?] { return (bufsize_t)(p - start); } .? { return 0; } */ } @@ -135,13 +135,13 @@ int _scan_html_block_tag(const unsigned char *p) // This may optionally be contained in <..>; otherwise // whitespace and unbalanced right parentheses aren't allowed. // Newlines aren't ever allowed. -int _scan_link_url(const unsigned char *p) +bufsize_t _scan_link_url(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - [ \r\n]* [<] ([^<>\r\n\\\x00] | escaped_char | [\\])* [>] { return (p - start); } - [ \r\n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (p - start); } + [ \r\n]* [<] ([^<>\r\n\\\x00] | escaped_char | [\\])* [>] { return (bufsize_t)(p - start); } + [ \r\n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (bufsize_t)(p - start); } .? { return 0; } */ } @@ -149,42 +149,42 @@ int _scan_link_url(const unsigned char *p) // Try to match a link title (in single quotes, in double quotes, or // in parentheses), returning number of chars matched. Allow one // level of internal nesting (quotes within quotes). -int _scan_link_title(const unsigned char *p) +bufsize_t _scan_link_title(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - ["] (escaped_char|[^"\x00])* ["] { return (p - start); } - ['] (escaped_char|[^'\x00])* ['] { return (p - start); } - [(] (escaped_char|[^)\x00])* [)] { return (p - start); } + ["] (escaped_char|[^"\x00])* ["] { return (bufsize_t)(p - start); } + ['] (escaped_char|[^'\x00])* ['] { return (bufsize_t)(p - start); } + [(] (escaped_char|[^)\x00])* [)] { return (bufsize_t)(p - start); } .? { return 0; } */ } // Match space characters, including newlines. -int _scan_spacechars(const unsigned char *p) +bufsize_t _scan_spacechars(const unsigned char *p) { const unsigned char *start = p; \ /*!re2c - [ \t\v\f\r\n]* { return (p - start); } + [ \t\v\f\r\n]* { return (bufsize_t)(p - start); } . { return 0; } */ } // Match ATX header start. -int _scan_atx_header_start(const unsigned char *p) +bufsize_t _scan_atx_header_start(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - [#]{1,6} ([ ]+|[\r\n]) { return (p - start); } + [#]{1,6} ([ ]+|[\r\n]) { return (bufsize_t)(p - start); } .? { return 0; } */ } // Match setext header line. Return 1 for level-1 header, // 2 for level-2, 0 for no match. -int _scan_setext_header_line(const unsigned char *p) +bufsize_t _scan_setext_header_line(const unsigned char *p) { const unsigned char *marker = NULL; /*!re2c @@ -197,51 +197,51 @@ int _scan_setext_header_line(const unsigned char *p) // Scan a horizontal rule line: "...three or more hyphens, asterisks, // or underscores on a line by themselves. If you wish, you may use // spaces between the hyphens or asterisks." -int _scan_hrule(const unsigned char *p) +bufsize_t _scan_hrule(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - ([*][ ]*){3,} [ \t]* [\r\n] { return (p - start); } - ([_][ ]*){3,} [ \t]* [\r\n] { return (p - start); } - ([-][ ]*){3,} [ \t]* [\r\n] { return (p - start); } + ([*][ ]*){3,} [ \t]* [\r\n] { return (bufsize_t)(p - start); } + ([_][ ]*){3,} [ \t]* [\r\n] { return (bufsize_t)(p - start); } + ([-][ ]*){3,} [ \t]* [\r\n] { return (bufsize_t)(p - start); } .? { return 0; } */ } // Scan an opening code fence. -int _scan_open_code_fence(const unsigned char *p) +bufsize_t _scan_open_code_fence(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - [`]{3,} / [^`\r\n\x00]*[\r\n] { return (p - start); } - [~]{3,} / [^~\r\n\x00]*[\r\n] { return (p - start); } + [`]{3,} / [^`\r\n\x00]*[\r\n] { return (bufsize_t)(p - start); } + [~]{3,} / [^~\r\n\x00]*[\r\n] { return (bufsize_t)(p - start); } .? { return 0; } */ } // Scan a closing code fence with length at least len. -int _scan_close_code_fence(const unsigned char *p) +bufsize_t _scan_close_code_fence(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - [`]{3,} / [ \t]*[\r\n] { return (p - start); } - [~]{3,} / [ \t]*[\r\n] { return (p - start); } + [`]{3,} / [ \t]*[\r\n] { return (bufsize_t)(p - start); } + [~]{3,} / [ \t]*[\r\n] { return (bufsize_t)(p - start); } .? { return 0; } */ } // Scans an entity. // Returns number of chars matched. -int _scan_entity(const unsigned char *p) +bufsize_t _scan_entity(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c [&] ([#] ([Xx][A-Fa-f0-9]{1,8}|[0-9]{1,8}) |[A-Za-z][A-Za-z0-9]{1,31} ) [;] - { return (p - start); } + { return (bufsize_t)(p - start); } .? { return 0; } */ } diff --git a/src/utf8.c b/src/utf8.c index b83c2a5..ba1d873 100644 --- a/src/utf8.c +++ b/src/utf8.c @@ -30,7 +30,7 @@ static void encode_unknown(cmark_strbuf *buf) cmark_strbuf_put(buf, repl, 3); } -static int utf8proc_charlen(const uint8_t *str, int str_len) +static int utf8proc_charlen(const uint8_t *str, bufsize_t str_len) { int length, i; @@ -42,7 +42,7 @@ static int utf8proc_charlen(const uint8_t *str, int str_len) if (!length) return -1; - if (str_len >= 0 && length > str_len) + if (str_len >= 0 && (bufsize_t)length > str_len) return -str_len; for (i = 1; i < length; i++) { @@ -54,7 +54,7 @@ static int utf8proc_charlen(const uint8_t *str, int str_len) } // Validate a single UTF-8 character according to RFC 3629. -static int utf8proc_valid(const uint8_t *str, int str_len) +static int utf8proc_valid(const uint8_t *str, bufsize_t str_len) { int length = utf8proc_charlen(str, str_len); @@ -109,14 +109,14 @@ static int utf8proc_valid(const uint8_t *str, int str_len) return length; } -void utf8proc_detab(cmark_strbuf *ob, const uint8_t *line, size_t size) +void utf8proc_detab(cmark_strbuf *ob, const uint8_t *line, bufsize_t size) { static const uint8_t whitespace[] = " "; - size_t i = 0, tab = 0; + bufsize_t i = 0, tab = 0; while (i < size) { - size_t org = i; + bufsize_t org = i; while (i < size && line[i] != '\t' && line[i] != '\0' && line[i] < 0x80) { @@ -151,7 +151,7 @@ void utf8proc_detab(cmark_strbuf *ob, const uint8_t *line, size_t size) } } -int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst) +int utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst) { int length; int32_t uc = -1; @@ -191,7 +191,7 @@ int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst) void utf8proc_encode_char(int32_t uc, cmark_strbuf *buf) { uint8_t dst[4]; - int len = 0; + bufsize_t len = 0; assert(uc >= 0); @@ -227,7 +227,7 @@ void utf8proc_encode_char(int32_t uc, cmark_strbuf *buf) cmark_strbuf_put(buf, dst, len); } -void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, int len) +void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, bufsize_t len) { int32_t c; @@ -235,7 +235,7 @@ void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, int len) utf8proc_encode_char(x, dest) while (len > 0) { - int char_len = utf8proc_iterate(str, len, &c); + bufsize_t char_len = utf8proc_iterate(str, len, &c); if (char_len >= 0) { #include "case_fold_switch.inc" diff --git a/src/utf8.h b/src/utf8.h index 7df1573..ed1d7ee 100644 --- a/src/utf8.h +++ b/src/utf8.h @@ -8,10 +8,10 @@ extern "C" { #endif -void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, int len); +void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, bufsize_t len); void utf8proc_encode_char(int32_t uc, cmark_strbuf *buf); -int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst); -void utf8proc_detab(cmark_strbuf *dest, const uint8_t *line, size_t size); +int utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst); +void utf8proc_detab(cmark_strbuf *dest, const uint8_t *line, bufsize_t size); int utf8proc_is_space(int32_t uc); int utf8proc_is_punctuation(int32_t uc); diff --git a/src/xml.c b/src/xml.c index 14f6d67..688dd36 100644 --- a/src/xml.c +++ b/src/xml.c @@ -11,13 +11,13 @@ // Functions to convert cmark_nodes to XML strings. -static void escape_xml(cmark_strbuf *dest, const unsigned char *source, int length) +static void escape_xml(cmark_strbuf *dest, const unsigned char *source, bufsize_t length) { if (source != NULL) { if (length < 0) length = strlen((char *)source); - houdini_escape_html0(dest, source, (size_t)length, 0); + houdini_escape_html0(dest, source, length, 0); } } -- cgit v1.2.3 From bef240d45b5eda3a584ca1a495f54cb17ff8895f Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Sun, 7 Jun 2015 16:54:37 +0200 Subject: Check for overflow in S_parser_feed Guard against too large chunks passed via the API. --- src/blocks.c | 9 ++++++--- src/buffer.h | 8 ++++++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/blocks.c b/src/blocks.c index 72b1ca5..a3ac712 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -497,6 +497,7 @@ S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len, while (buffer < end) { const unsigned char *eol; size_t line_len; + bufsize_t bufsize; for (eol = buffer; eol < end; ++eol) { if (S_is_line_end_char(*eol)) @@ -514,17 +515,19 @@ S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len, } else if (eof) { line_len = end - buffer; } else { - cmark_strbuf_put(parser->linebuf, buffer, end - buffer); + bufsize = cmark_strbuf_check_bufsize(end - buffer); + cmark_strbuf_put(parser->linebuf, buffer, bufsize); break; } + bufsize = cmark_strbuf_check_bufsize(line_len); if (parser->linebuf->size > 0) { - cmark_strbuf_put(parser->linebuf, buffer, line_len); + cmark_strbuf_put(parser->linebuf, buffer, bufsize); S_process_line(parser, parser->linebuf->ptr, parser->linebuf->size); cmark_strbuf_clear(parser->linebuf); } else { - S_process_line(parser, buffer, line_len); + S_process_line(parser, buffer, bufsize); } buffer += line_len; diff --git a/src/buffer.h b/src/buffer.h index 9c850e4..f9696e0 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -74,6 +74,14 @@ void cmark_strbuf_unescape(cmark_strbuf *s); /* Print error and abort. */ void cmark_strbuf_overflow_err(void); +static inline bufsize_t +cmark_strbuf_check_bufsize(size_t size) { + if (size > BUFSIZE_MAX) { + cmark_strbuf_overflow_err(); + } + return (bufsize_t)size; +} + #ifdef __cplusplus } #endif -- cgit v1.2.3 From abda63215d69b6245be853566481c09c4f015c14 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Sun, 7 Jun 2015 17:32:51 +0200 Subject: Avoid strlen in commonmark.c --- src/commonmark.c | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/src/commonmark.c b/src/commonmark.c index 2022fd5..4594748 100644 --- a/src/commonmark.c +++ b/src/commonmark.c @@ -237,30 +237,29 @@ shortest_unused_backtick_sequence(cmark_chunk *code) static bool is_autolink(cmark_node *node) { - const char *title; - const char *url; + cmark_chunk *title; + cmark_chunk *url; cmark_node *link_text; if (node->type != CMARK_NODE_LINK) { return false; } - url = cmark_node_get_url(node); - if (url == NULL || - _scan_scheme((unsigned char *)url) == 0) { + url = &node->as.link.url; + if (url->len == 0 || scan_scheme(url, 0) == 0) { return false; } - title = cmark_node_get_title(node); + title = &node->as.link.title; // if it has a title, we can't treat it as an autolink: - if (title != NULL && strlen(title) > 0) { + if (title->len > 0) { return false; } link_text = node->first_child; cmark_consolidate_text_nodes(link_text); - return ((int)strlen(url) == link_text->as.literal.len && - strncmp(url, + return (url->len == link_text->as.literal.len && + strncmp((char*)url->data, (char*)link_text->as.literal.data, link_text->as.literal.len) == 0); } @@ -289,8 +288,8 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, int numticks; int i; bool entering = (ev_type == CMARK_EVENT_ENTER); - const char *info; - const char *title; + cmark_chunk *info; + cmark_chunk *title; cmark_strbuf listmarker = GH_BUF_INIT; char *emph_delim; bufsize_t marker_width; @@ -396,12 +395,12 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, case CMARK_NODE_CODE_BLOCK: blankline(state); - info = cmark_node_get_fence_info(node); + info = &node->as.code.info; code = &node->as.code.literal; // use indented form if no info, and code doesn't // begin or end with a blank line, and code isn't // first thing in a list item - if ((info == NULL || strlen(info) == 0) && + if (info->len == 0 && (code->len > 2 && !isspace(code->data[0]) && !(isspace(code->data[code->len - 1]) && @@ -422,7 +421,7 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, lit(state, "`", false); } lit(state, " ", false); - out(state, cmark_chunk_literal(info), false, LITERAL); + out(state, *info, false, LITERAL); cr(state); out(state, node->as.code.literal, false, LITERAL); cr(state); @@ -542,11 +541,10 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, out(state, cmark_chunk_literal(cmark_node_get_url(node)), false, URL); - title = cmark_node_get_title(node); - if (title && strlen(title) > 0) { + title = &node->as.link.title; + if (title->len > 0) { lit(state, " \"", true); - out(state, cmark_chunk_literal(title), - false, TITLE); + out(state, *title, false, TITLE); lit(state, "\"", false); } lit(state, ")", false); @@ -560,10 +558,10 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, } else { lit(state, "](", false); out(state, cmark_chunk_literal(cmark_node_get_url(node)), false, URL); - title = cmark_node_get_title(node); - if (title && strlen(title) > 0) { + title = &node->as.link.title; + if (title->len > 0) { lit(state, " \"", true); - out(state, cmark_chunk_literal(title), false, TITLE); + out(state, *title, false, TITLE); lit(state, "\"", false); } lit(state, ")", false); -- cgit v1.2.3 From e477618d40cb3db39667f06085c67cdfa33710a2 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Sun, 7 Jun 2015 17:33:25 +0200 Subject: Avoid strlen in xml.c --- src/xml.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/xml.c b/src/xml.c index 688dd36..7eec5a6 100644 --- a/src/xml.c +++ b/src/xml.c @@ -13,12 +13,7 @@ static void escape_xml(cmark_strbuf *dest, const unsigned char *source, bufsize_t length) { - if (source != NULL) { - if (length < 0) - length = strlen((char *)source); - - houdini_escape_html0(dest, source, length, 0); - } + houdini_escape_html0(dest, source, length, 0); } struct render_state { -- cgit v1.2.3 From 9087645812433abdc51eb7bb82ade8c3c3bccfa5 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Sun, 7 Jun 2015 17:34:34 +0200 Subject: Avoid strlen in html.c --- src/html.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/src/html.c b/src/html.c index 317eb45..a30bbca 100644 --- a/src/html.c +++ b/src/html.c @@ -13,20 +13,9 @@ static void escape_html(cmark_strbuf *dest, const unsigned char *source, bufsize_t length) { - if (length < 0) - length = strlen((char *)source); - houdini_escape_html0(dest, source, length, 0); } -static void escape_href(cmark_strbuf *dest, const unsigned char *source, bufsize_t length) -{ - if (length < 0) - length = strlen((char *)source); - - houdini_escape_href(dest, source, length); -} - static inline void cr(cmark_strbuf *html) { if (html->size && html->ptr[html->size - 1] != '\n') @@ -261,8 +250,8 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, case CMARK_NODE_LINK: if (entering) { cmark_strbuf_puts(html, "as.link.url.data, - node->as.link.url.len); + houdini_escape_href(html, node->as.link.url.data, + node->as.link.url.len); if (node->as.link.title.len) { cmark_strbuf_puts(html, "\" title=\""); @@ -279,8 +268,8 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, case CMARK_NODE_IMAGE: if (entering) { cmark_strbuf_puts(html, "as.link.url.data, - node->as.link.url.len); + houdini_escape_href(html, node->as.link.url.data, + node->as.link.url.len); cmark_strbuf_puts(html, "\" alt=\""); state->plain = node; -- cgit v1.2.3 From 54abb08e615ab4d094b37ce3102df81779ee2c7a Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Sun, 7 Jun 2015 17:42:07 +0200 Subject: Use size_t for strlen result in API test --- api_test/main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/api_test/main.c b/api_test/main.c index 01df51d..132d48c 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -643,16 +643,16 @@ test_incomplete_char(test_batch_runner *runner, const char *utf8, static void test_continuation_byte(test_batch_runner *runner, const char *utf8) { - int len = strlen(utf8); + size_t len = strlen(utf8); - for (int pos = 1; pos < len; ++pos) { + for (size_t pos = 1; pos < len; ++pos) { char buf[20]; sprintf(buf, "((((%s))))", utf8); buf[4+pos] = '\x20'; char expected[50]; strcpy(expected, "

((((" UTF8_REPL "\x20"); - for (int i = pos + 1; i < len; ++i) { + for (size_t i = pos + 1; i < len; ++i) { strcat(expected, UTF8_REPL); } strcat(expected, "))))

\n"); -- cgit v1.2.3 From 0641c5e4e9727ddde0d2be1aebe3fd7009d010f5 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Sun, 7 Jun 2015 17:44:11 +0200 Subject: Helper to safely call strlen --- src/buffer.c | 5 +++-- src/buffer.h | 6 ++++++ src/chunk.h | 5 +++-- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/buffer.c b/src/buffer.c index efee41d..7d16af8 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -136,7 +136,7 @@ void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, bufsize_t le void cmark_strbuf_sets(cmark_strbuf *buf, const char *string) { cmark_strbuf_set(buf, (const unsigned char *)string, - string ? strlen(string) : 0); + string ? cmark_strbuf_safe_strlen(string) : 0); } void cmark_strbuf_putc(cmark_strbuf *buf, int c) @@ -159,7 +159,8 @@ void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, bufsize_t le void cmark_strbuf_puts(cmark_strbuf *buf, const char *string) { - cmark_strbuf_put(buf, (const unsigned char *)string, strlen(string)); + cmark_strbuf_put(buf, (const unsigned char *)string, + cmark_strbuf_safe_strlen(string)); } void cmark_strbuf_vprintf(cmark_strbuf *buf, const char *format, va_list ap) diff --git a/src/buffer.h b/src/buffer.h index f9696e0..babd051 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -3,6 +3,7 @@ #include #include +#include #include #include "config.h" @@ -82,6 +83,11 @@ cmark_strbuf_check_bufsize(size_t size) { return (bufsize_t)size; } +static inline bufsize_t +cmark_strbuf_safe_strlen(const char *str) { + return cmark_strbuf_check_bufsize(strlen(str)); +} + #ifdef __cplusplus } #endif diff --git a/src/chunk.h b/src/chunk.h index 364918d..f23a02d 100644 --- a/src/chunk.h +++ b/src/chunk.h @@ -87,7 +87,7 @@ static inline void cmark_chunk_set_cstr(cmark_chunk *c, const char *str) c->data = NULL; c->alloc = 0; } else { - c->len = strlen(str); + c->len = cmark_strbuf_safe_strlen(str); c->data = (unsigned char *)malloc(c->len + 1); c->alloc = 1; memcpy(c->data, str, c->len + 1); @@ -96,7 +96,8 @@ static inline void cmark_chunk_set_cstr(cmark_chunk *c, const char *str) static inline cmark_chunk cmark_chunk_literal(const char *data) { - cmark_chunk c = {(unsigned char *)data, data ? strlen(data) : 0, 0}; + bufsize_t len = data ? cmark_strbuf_safe_strlen(data) : 0; + cmark_chunk c = {(unsigned char *)data, len, 0}; return c; } -- cgit v1.2.3 From fdfa1e4bedf95691389efb9991ac8a6a4599c158 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Sun, 7 Jun 2015 22:04:38 +0200 Subject: Remove unimplemented functions from houdini.h --- src/houdini.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/houdini.h b/src/houdini.h index 2e7a354..b926cf3 100644 --- a/src/houdini.h +++ b/src/houdini.h @@ -36,14 +36,7 @@ extern int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t s extern int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size, int secure); extern int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size); extern void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, bufsize_t size); -extern int houdini_escape_xml(cmark_strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_escape_uri(cmark_strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_escape_url(cmark_strbuf *ob, const uint8_t *src, size_t size); extern int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size); -extern int houdini_unescape_uri(cmark_strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_unescape_url(cmark_strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_escape_js(cmark_strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_unescape_js(cmark_strbuf *ob, const uint8_t *src, size_t size); #ifdef __cplusplus } -- cgit v1.2.3