From 7382fd5eba48107a8190bd2d6232cc3b6e20d8fc Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Sun, 7 Jun 2015 16:52:44 +0200 Subject: Convert code base to strbuf_t There are probably a couple of places I missed. But this will only be a problem if we use a 64-bit bufsize_t at some point. Then, we'll get warnings from -Wshorten-64-to-32. --- src/blocks.c | 22 +++++++------- src/chunk.h | 10 +++---- src/commonmark.c | 4 +-- src/houdini.h | 12 ++++---- src/houdini_href_e.c | 4 +-- src/houdini_html_e.c | 6 ++-- src/houdini_html_u.c | 14 ++++----- src/html.c | 10 +++---- src/inlines.c | 62 +++++++++++++++++++------------------- src/inlines.h | 2 +- src/parser.h | 6 ++-- src/scanners.c | 84 ++++++++++++++++++++++++++-------------------------- src/scanners.h | 30 +++++++++---------- src/scanners.re | 76 +++++++++++++++++++++++------------------------ src/utf8.c | 20 ++++++------- src/utf8.h | 6 ++-- src/xml.c | 4 +-- 17 files changed, 186 insertions(+), 186 deletions(-) diff --git a/src/blocks.c b/src/blocks.c index b72c256..72b1ca5 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -30,7 +30,7 @@ S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len, static void S_process_line(cmark_parser *parser, const unsigned char *buffer, - size_t bytes); + bufsize_t bytes); static cmark_node* make_block(cmark_node_type tag, int start_line, int start_column) { @@ -95,7 +95,7 @@ static cmark_node* finalize(cmark_parser *parser, cmark_node* b); // Returns true if line has only space characters, else false. -static bool is_blank(cmark_strbuf *s, int offset) +static bool is_blank(cmark_strbuf *s, bufsize_t offset) { while (offset < s->size) { switch (s->ptr[offset]) { @@ -128,7 +128,7 @@ static inline bool accepts_lines(cmark_node_type block_type) block_type == NODE_CODE_BLOCK); } -static void add_line(cmark_node* node, cmark_chunk *ch, int offset) +static void add_line(cmark_node* node, cmark_chunk *ch, bufsize_t offset) { assert(node->open); cmark_strbuf_put(&node->string_content, ch->data + offset, ch->len - offset); @@ -136,7 +136,7 @@ static void add_line(cmark_node* node, cmark_chunk *ch, int offset) static void remove_trailing_blank_lines(cmark_strbuf *ln) { - int i; + bufsize_t i; unsigned char c; for (i = ln->size - 1; i >= 0; --i) { @@ -204,7 +204,7 @@ static int break_out_of_lists(cmark_parser *parser, cmark_node ** bptr) static cmark_node* finalize(cmark_parser *parser, cmark_node* b) { - int pos; + bufsize_t pos; cmark_node* item; cmark_node* subitem; cmark_node* parent; @@ -367,10 +367,10 @@ static void process_inlines(cmark_node* root, cmark_reference_map *refmap, int o // Attempts to parse a list item marker (bullet or enumerated). // On success, returns length of the marker, and populates // data with the details. On failure, returns 0. -static int parse_list_marker(cmark_chunk *input, int pos, cmark_list **dataptr) +static bufsize_t parse_list_marker(cmark_chunk *input, bufsize_t pos, cmark_list **dataptr) { unsigned char c; - int startpos; + bufsize_t startpos; cmark_list *data; startpos = pos; @@ -533,7 +533,7 @@ S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len, static void chop_trailing_hashtags(cmark_chunk *ch) { - int n, orig_n; + bufsize_t n, orig_n; cmark_chunk_rtrim(ch); orig_n = n = ch->len - 1; @@ -562,10 +562,10 @@ S_find_first_nonspace(cmark_parser *parser, cmark_chunk *input) } static void -S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) +S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t bytes) { cmark_node* last_matched_container; - int matched = 0; + bufsize_t matched = 0; int lev = 0; int i; cmark_list *data = NULL; @@ -712,7 +712,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) parser->offset = parser->first_nonspace + matched; container = add_child(parser, container, NODE_HEADER, parser->offset + 1); - int hashpos = cmark_chunk_strchr(&input, '#', parser->first_nonspace); + bufsize_t hashpos = cmark_chunk_strchr(&input, '#', parser->first_nonspace); int level = 0; while (peek_at(&input, hashpos) == '#') { diff --git a/src/chunk.h b/src/chunk.h index a246a9d..364918d 100644 --- a/src/chunk.h +++ b/src/chunk.h @@ -11,8 +11,8 @@ typedef struct { unsigned char *data; - int len; - int alloc; // also implies a NULL-terminated string + bufsize_t len; + bufsize_t alloc; // also implies a NULL-terminated string } cmark_chunk; static inline void cmark_chunk_free(cmark_chunk *c) @@ -51,10 +51,10 @@ static inline void cmark_chunk_trim(cmark_chunk *c) cmark_chunk_rtrim(c); } -static inline int cmark_chunk_strchr(cmark_chunk *ch, int c, int offset) +static inline bufsize_t cmark_chunk_strchr(cmark_chunk *ch, int c, bufsize_t offset) { const unsigned char *p = (unsigned char *)memchr(ch->data + offset, c, ch->len - offset); - return p ? (int)(p - ch->data) : ch->len; + return p ? (bufsize_t)(p - ch->data) : ch->len; } static inline const char *cmark_chunk_to_cstr(cmark_chunk *c) @@ -100,7 +100,7 @@ static inline cmark_chunk cmark_chunk_literal(const char *data) return c; } -static inline cmark_chunk cmark_chunk_dup(const cmark_chunk *ch, int pos, int len) +static inline cmark_chunk cmark_chunk_dup(const cmark_chunk *ch, bufsize_t pos, bufsize_t len) { cmark_chunk c = {ch->data + pos, len, 0}; return c; diff --git a/src/commonmark.c b/src/commonmark.c index dba1fcf..2022fd5 100644 --- a/src/commonmark.c +++ b/src/commonmark.c @@ -20,7 +20,7 @@ struct render_state { int column; int width; int need_cr; - int last_breakable; + bufsize_t last_breakable; bool begin_line; bool no_wrap; bool in_tight_list_item; @@ -293,7 +293,7 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, const char *title; cmark_strbuf listmarker = GH_BUF_INIT; char *emph_delim; - int marker_width; + bufsize_t marker_width; // Don't adjust tight list status til we've started the list. // Otherwise we loose the blank line between a paragraph and diff --git a/src/houdini.h b/src/houdini.h index 9f00f6d..2e7a354 100644 --- a/src/houdini.h +++ b/src/houdini.h @@ -31,15 +31,15 @@ extern "C" { #define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10) #define HOUDINI_UNESCAPED_SIZE(x) (x) -extern size_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, size_t size, int secure); -extern int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, size_t size); -extern void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, size_t size); +extern bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, bufsize_t size); +extern int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size); +extern int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size, int secure); +extern int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size); +extern void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, bufsize_t size); extern int houdini_escape_xml(cmark_strbuf *ob, const uint8_t *src, size_t size); extern int houdini_escape_uri(cmark_strbuf *ob, const uint8_t *src, size_t size); extern int houdini_escape_url(cmark_strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, size_t size); +extern int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size); extern int houdini_unescape_uri(cmark_strbuf *ob, const uint8_t *src, size_t size); extern int houdini_unescape_url(cmark_strbuf *ob, const uint8_t *src, size_t size); extern int houdini_escape_js(cmark_strbuf *ob, const uint8_t *src, size_t size); diff --git a/src/houdini_href_e.c b/src/houdini_href_e.c index 7527780..7fb958a 100644 --- a/src/houdini_href_e.c +++ b/src/houdini_href_e.c @@ -49,10 +49,10 @@ static const char HREF_SAFE[] = { }; int -houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, size_t size) +houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) { static const uint8_t hex_chars[] = "0123456789ABCDEF"; - size_t i = 0, org; + bufsize_t i = 0, org; uint8_t hex_str[3]; hex_str[0] = '%'; diff --git a/src/houdini_html_e.c b/src/houdini_html_e.c index 1a4c3e1..7f4b91f 100644 --- a/src/houdini_html_e.c +++ b/src/houdini_html_e.c @@ -45,9 +45,9 @@ static const char *HTML_ESCAPES[] = { }; int -houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, size_t size, int secure) +houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size, int secure) { - size_t i = 0, org, esc = 0; + bufsize_t i = 0, org, esc = 0; while (i < size) { org = i; @@ -75,7 +75,7 @@ houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, size_t size, int secu } int -houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, size_t size) +houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) { return houdini_escape_html0(ob, src, size, 1); } diff --git a/src/houdini_html_u.c b/src/houdini_html_u.c index eaf295e..e57894d 100644 --- a/src/houdini_html_u.c +++ b/src/houdini_html_u.c @@ -7,10 +7,10 @@ #include "utf8.h" #include "html_unescape.h" -size_t -houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, size_t size) +bufsize_t +houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) { - size_t i = 0; + bufsize_t i = 0; if (size >= 3 && src[0] == '#') { int codepoint = 0; @@ -68,7 +68,7 @@ houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, size_t size) const struct html_ent *entity = find_entity((char *)src, i); if (entity != NULL) { - int len = 0; + bufsize_t len = 0; while (len < 4 && entity->utf8[len] != '\0') { ++len; } @@ -85,9 +85,9 @@ houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, size_t size) } int -houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, size_t size) +houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) { - size_t i = 0, org, ent; + bufsize_t i = 0, org, ent; while (i < size) { org = i; @@ -122,7 +122,7 @@ houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, size_t size) return 1; } -void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, size_t size) +void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) { if (!houdini_unescape_html(ob, src, size)) cmark_strbuf_put(ob, src, size); diff --git a/src/html.c b/src/html.c index d3f9fc7..317eb45 100644 --- a/src/html.c +++ b/src/html.c @@ -11,20 +11,20 @@ // Functions to convert cmark_nodes to HTML strings. -static void escape_html(cmark_strbuf *dest, const unsigned char *source, int length) +static void escape_html(cmark_strbuf *dest, const unsigned char *source, bufsize_t length) { if (length < 0) length = strlen((char *)source); - houdini_escape_html0(dest, source, (size_t)length, 0); + houdini_escape_html0(dest, source, length, 0); } -static void escape_href(cmark_strbuf *dest, const unsigned char *source, int length) +static void escape_href(cmark_strbuf *dest, const unsigned char *source, bufsize_t length) { if (length < 0) length = strlen((char *)source); - houdini_escape_href(dest, source, (size_t)length); + houdini_escape_href(dest, source, length); } static inline void cr(cmark_strbuf *html) @@ -165,7 +165,7 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, S_render_sourcepos(node, html, options); cmark_strbuf_puts(html, ">"); } else { - int first_tag = 0; + bufsize_t first_tag = 0; while (first_tag < node->as.code.info.len && node->as.code.info.data[first_tag] != ' ') { first_tag += 1; diff --git a/src/inlines.c b/src/inlines.c index 8a1ee44..7e8f806 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -36,7 +36,7 @@ typedef struct delimiter { struct delimiter *previous; struct delimiter *next; cmark_node *inl_text; - int position; + bufsize_t position; unsigned char delim_char; bool can_open; bool can_close; @@ -45,7 +45,7 @@ typedef struct delimiter { typedef struct { cmark_chunk input; - int pos; + bufsize_t pos; cmark_reference_map *refmap; delimiter *last_delim; } subject; @@ -57,7 +57,7 @@ static int parse_inline(subject* subj, cmark_node * parent, int options); static void subject_from_buf(subject *e, cmark_strbuf *buffer, cmark_reference_map *refmap); -static int subject_find_special_char(subject *subj, int options); +static bufsize_t subject_find_special_char(subject *subj, int options); static cmark_chunk cmark_clean_autolink(cmark_chunk *url, int is_email) { @@ -143,7 +143,7 @@ static inline cmark_node* make_simple(cmark_node_type t) static cmark_chunk chunk_clone(cmark_chunk *src) { cmark_chunk c; - int len = src->len; + bufsize_t len = src->len; c.len = len; c.data = (unsigned char *)malloc(len + 1); @@ -177,7 +177,7 @@ static inline unsigned char peek_char(subject *subj) return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : 0; } -static inline unsigned char peek_at(subject *subj, int pos) +static inline unsigned char peek_at(subject *subj, bufsize_t pos) { return subj->input.data[pos]; } @@ -195,8 +195,8 @@ static inline int is_eof(subject* subj) static inline cmark_chunk take_while(subject* subj, int (*f)(int)) { unsigned char c; - int startpos = subj->pos; - int len = 0; + bufsize_t startpos = subj->pos; + bufsize_t len = 0; while ((c = peek_char(subj)) && (*f)(c)) { advance(subj); @@ -211,7 +211,7 @@ static inline cmark_chunk take_while(subject* subj, int (*f)(int)) // parsed). Return 0 if you don't find matching closing // backticks, otherwise return the position in the subject // after the closing backticks. -static int scan_to_closing_backticks(subject* subj, int openticklength) +static bufsize_t scan_to_closing_backticks(subject* subj, bufsize_t openticklength) { // read non backticks unsigned char c; @@ -221,7 +221,7 @@ static int scan_to_closing_backticks(subject* subj, int openticklength) if (is_eof(subj)) { return 0; // did not find closing ticks, return 0 } - int numticks = 0; + bufsize_t numticks = 0; while (peek_char(subj) == '`') { advance(subj); numticks++; @@ -237,8 +237,8 @@ static int scan_to_closing_backticks(subject* subj, int openticklength) static cmark_node* handle_backticks(subject *subj) { cmark_chunk openticks = take_while(subj, isbacktick); - int startpos = subj->pos; - int endpos = scan_to_closing_backticks(subj, openticks.len); + bufsize_t startpos = subj->pos; + bufsize_t endpos = scan_to_closing_backticks(subj, openticks.len); if (endpos == 0) { // not found subj->pos = startpos; // rewind @@ -260,7 +260,7 @@ static int scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close) { int numdelims = 0; - int before_char_pos; + bufsize_t before_char_pos; int32_t after_char = 0; int32_t before_char = 0; int len; @@ -376,7 +376,7 @@ static void push_delimiter(subject *subj, unsigned char c, bool can_open, // Assumes the subject has a c at the current position. static cmark_node* handle_delim(subject* subj, unsigned char c, bool smart) { - int numdelims; + bufsize_t numdelims; cmark_node * inl_text; bool can_open, can_close; cmark_chunk contents; @@ -500,11 +500,11 @@ static delimiter* S_insert_emph(subject *subj, delimiter *opener, delimiter *closer) { delimiter *delim, *tmp_delim; - int use_delims; + bufsize_t use_delims; cmark_node *opener_inl = opener->inl_text; cmark_node *closer_inl = closer->inl_text; - int opener_num_chars = opener_inl->as.literal.len; - int closer_num_chars = closer_inl->as.literal.len; + bufsize_t opener_num_chars = opener_inl->as.literal.len; + bufsize_t closer_num_chars = closer_inl->as.literal.len; cmark_node *tmp, *emph, *first_child, *last_child; // calculate the actual number of characters used from this closer @@ -596,7 +596,7 @@ static cmark_node* handle_backslash(subject *subj) static cmark_node* handle_entity(subject* subj) { cmark_strbuf ent = GH_BUF_INIT; - size_t len; + bufsize_t len; advance(subj); @@ -618,7 +618,7 @@ static cmark_node *make_str_with_entities(cmark_chunk *content) { cmark_strbuf unescaped = GH_BUF_INIT; - if (houdini_unescape_html(&unescaped, content->data, (size_t)content->len)) { + if (houdini_unescape_html(&unescaped, content->data, content->len)) { return make_str(cmark_chunk_buf_detach(&unescaped)); } else { return make_str(*content); @@ -678,7 +678,7 @@ cmark_chunk cmark_clean_title(cmark_chunk *title) // Assumes the subject has a '<' character at the current position. static cmark_node* handle_pointy_brace(subject* subj) { - int matchlen = 0; + bufsize_t matchlen = 0; cmark_chunk contents; advance(subj); // advance past first < @@ -725,7 +725,7 @@ static cmark_node* handle_pointy_brace(subject* subj) // encountered. Backticks in labels do not start code spans. static int link_label(subject* subj, cmark_chunk *raw_label) { - int startpos = subj->pos; + bufsize_t startpos = subj->pos; int length = 0; unsigned char c; @@ -769,10 +769,10 @@ noMatch: // Return a link, an image, or a literal close bracket. static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent) { - int initial_pos; - int starturl, endurl, starttitle, endtitle, endall; - int n; - int sps; + bufsize_t initial_pos; + bufsize_t starturl, endurl, starttitle, endtitle, endall; + bufsize_t n; + bufsize_t sps; cmark_reference *ref; bool is_image = false; cmark_chunk url_chunk, title_chunk; @@ -922,7 +922,7 @@ match: // Assumes the subject has a newline at the current position. static cmark_node* handle_newline(subject *subj) { - int nlpos = subj->pos; + bufsize_t nlpos = subj->pos; // skip over newline advance(subj); // skip spaces at beginning of line @@ -938,7 +938,7 @@ static cmark_node* handle_newline(subject *subj) } } -static int subject_find_special_char(subject *subj, int options) +static bufsize_t subject_find_special_char(subject *subj, int options) { // "\r\n\\`&_*[]pos + 1; + bufsize_t n = subj->pos + 1; while (n < subj->input.len) { if (SPECIAL_CHARS[subj->input.data[n]]) @@ -1001,7 +1001,7 @@ static int parse_inline(subject* subj, cmark_node * parent, int options) cmark_node* new_inl = NULL; cmark_chunk contents; unsigned char c; - int endpos; + bufsize_t endpos; c = peek_char(subj); if (c == 0) { return 0; @@ -1098,7 +1098,7 @@ static void spnl(subject* subj) // Modify refmap if a reference is encountered. // Return 0 if no reference found, otherwise position of subject // after reference is parsed. -int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap) +bufsize_t cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap) { subject subj; @@ -1106,8 +1106,8 @@ int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refma cmark_chunk url; cmark_chunk title; - int matchlen = 0; - int beforetitle; + bufsize_t matchlen = 0; + bufsize_t beforetitle; subject_from_buf(&subj, input, NULL); diff --git a/src/inlines.h b/src/inlines.h index 534588e..f8847fc 100644 --- a/src/inlines.h +++ b/src/inlines.h @@ -10,7 +10,7 @@ cmark_chunk cmark_clean_title(cmark_chunk *title); void cmark_parse_inlines(cmark_node* parent, cmark_reference_map *refmap, int options); -int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap); +bufsize_t cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap); #ifdef __cplusplus } diff --git a/src/parser.h b/src/parser.h index ccdf84b..6e18c67 100644 --- a/src/parser.h +++ b/src/parser.h @@ -16,12 +16,12 @@ struct cmark_parser { struct cmark_node* root; struct cmark_node* current; int line_number; - int offset; - int first_nonspace; + bufsize_t offset; + bufsize_t first_nonspace; int indent; bool blank; cmark_strbuf *curline; - int last_line_length; + bufsize_t last_line_length; cmark_strbuf *linebuf; int options; }; diff --git a/src/scanners.c b/src/scanners.c index 7f9ed2e..3f4ddac 100644 --- a/src/scanners.c +++ b/src/scanners.c @@ -1,11 +1,11 @@ -/* Generated by re2c 0.13.6 */ +/* Generated by re2c 0.13.5 */ #include #include "chunk.h" #include "scanners.h" -int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset) +bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, bufsize_t offset) { - int res; + bufsize_t res; unsigned char *ptr = (unsigned char *)c->data; unsigned char lim = ptr[c->len]; @@ -19,7 +19,7 @@ int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset) // Try to match a scheme including colon. -int _scan_scheme(const unsigned char *p) +bufsize_t _scan_scheme(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -578,7 +578,7 @@ yy34: if (yych != ':') goto yy31; yy35: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy37: yych = *++p; if (yych == 'E') goto yy38; @@ -2919,7 +2919,7 @@ yy484: } // Try to match URI autolink after first <, returning number of chars matched. -int _scan_autolink_uri(const unsigned char *p) +bufsize_t _scan_autolink_uri(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -3517,7 +3517,7 @@ yy520: } if (yych <= '=') goto yy516; ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy524: yych = *++p; if (yych == 'E') goto yy525; @@ -5858,7 +5858,7 @@ yy971: } // Try to match email autolink after first <, returning num of chars matched. -int _scan_autolink_email(const unsigned char *p) +bufsize_t _scan_autolink_email(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -6060,7 +6060,7 @@ yy984: } yy985: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy987: ++p; yych = *p; @@ -10803,7 +10803,7 @@ yy1230: } // Try to match an HTML tag after first <, returning num of chars matched. -int _scan_html_tag(const unsigned char *p) +bufsize_t _scan_html_tag(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -10964,7 +10964,7 @@ yy1242: if (yych != '>') goto yy1239; yy1243: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1245: yych = *++p; if (yych == 'C') goto yy1260; @@ -11455,7 +11455,7 @@ yy1297: // Try to match an HTML block tag including first <, // returning num of chars matched. -int _scan_html_block_tag(const unsigned char *p) +bufsize_t _scan_html_block_tag(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -11513,7 +11513,7 @@ yy1303: goto yy1301; yy1304: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1306: yych = *++p; if (yych <= '/') { @@ -12022,7 +12022,7 @@ yy1343: } yy1344: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1346: yych = *++p; if (yych <= 'R') { @@ -12639,7 +12639,7 @@ yy1466: } yy1467: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1469: yych = *++p; if (yych <= 'R') { @@ -13243,7 +13243,7 @@ yy1585: // This may optionally be contained in <..>; otherwise // whitespace and unbalanced right parentheses aren't allowed. // Newlines aren't ever allowed. -int _scan_link_url(const unsigned char *p) +bufsize_t _scan_link_url(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -13308,7 +13308,7 @@ int _scan_link_url(const unsigned char *p) } } yy1588: - { return (p - start); } + { return (bufsize_t)(p - start); } yy1589: yyaccept = 0; marker = ++p; @@ -13402,7 +13402,7 @@ yy1599: yy1600: p = marker; if (yyaccept <= 1) { - if (yyaccept == 0) { + if (yyaccept <= 0) { goto yy1588; } else { goto yy1595; @@ -13490,7 +13490,7 @@ yy1607: if (yych <= ' ') goto yy1608; if (yych != ')') goto yy1603; yy1608: - { return (p - start); } + { return (bufsize_t)(p - start); } yy1609: ++p; yych = *p; @@ -13732,7 +13732,7 @@ yy1623: // Try to match a link title (in single quotes, in double quotes, or // in parentheses), returning number of chars matched. Allow one // level of internal nesting (quotes within quotes). -int _scan_link_title(const unsigned char *p) +bufsize_t _scan_link_title(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -13818,13 +13818,13 @@ yy1632: yy1633: p = marker; if (yyaccept <= 1) { - if (yyaccept == 0) { + if (yyaccept <= 0) { goto yy1626; } else { goto yy1637; } } else { - if (yyaccept == 2) { + if (yyaccept <= 2) { goto yy1644; } else { goto yy1651; @@ -13842,7 +13842,7 @@ yy1634: yy1636: ++p; yy1637: - { return (p - start); } + { return (bufsize_t)(p - start); } yy1638: yyaccept = 1; marker = ++p; @@ -13874,7 +13874,7 @@ yy1641: yy1643: ++p; yy1644: - { return (p - start); } + { return (bufsize_t)(p - start); } yy1645: yyaccept = 2; marker = ++p; @@ -13906,7 +13906,7 @@ yy1648: yy1650: ++p; yy1651: - { return (p - start); } + { return (bufsize_t)(p - start); } yy1652: yyaccept = 3; marker = ++p; @@ -13922,7 +13922,7 @@ yy1652: } // Match space characters, including newlines. -int _scan_spacechars(const unsigned char *p) +bufsize_t _scan_spacechars(const unsigned char *p) { const unsigned char *start = p; \ @@ -13973,7 +13973,7 @@ int _scan_spacechars(const unsigned char *p) goto yy1659; } yy1655: - { return (p - start); } + { return (bufsize_t)(p - start); } yy1656: yych = *++p; goto yy1658; @@ -13993,7 +13993,7 @@ yy1659: } // Match ATX header start. -int _scan_atx_header_start(const unsigned char *p) +bufsize_t _scan_atx_header_start(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -14059,7 +14059,7 @@ yy1665: yy1666: ++p; yy1667: - { return (p - start); } + { return (bufsize_t)(p - start); } yy1668: ++p; yych = *p; @@ -14128,7 +14128,7 @@ yy1672: // Match setext header line. Return 1 for level-1 header, // 2 for level-2, 0 for no match. -int _scan_setext_header_line(const unsigned char *p) +bufsize_t _scan_setext_header_line(const unsigned char *p) { const unsigned char *marker = NULL; @@ -14269,7 +14269,7 @@ yy1693: // Scan a horizontal rule line: "...three or more hyphens, asterisks, // or underscores on a line by themselves. If you wish, you may use // spaces between the hyphens or asterisks." -int _scan_hrule(const unsigned char *p) +bufsize_t _scan_hrule(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -14384,7 +14384,7 @@ yy1709: if (yych != '\r') goto yy1704; yy1711: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1713: ++p; yych = *p; @@ -14422,7 +14422,7 @@ yy1719: } yy1721: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1723: ++p; yych = *p; @@ -14460,13 +14460,13 @@ yy1729: } yy1731: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } } } // Scan an opening code fence. -int _scan_open_code_fence(const unsigned char *p) +bufsize_t _scan_open_code_fence(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -14557,7 +14557,7 @@ yy1743: yy1745: ++p; p = marker; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1747: yych = *++p; if (yybm[0+yych] & 64) { @@ -14585,13 +14585,13 @@ yy1750: yy1752: ++p; p = marker; - { return (p - start); } + { return (bufsize_t)(p - start); } } } // Scan a closing code fence with length at least len. -int _scan_close_code_fence(const unsigned char *p) +bufsize_t _scan_close_code_fence(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -14687,7 +14687,7 @@ yy1764: yy1766: ++p; p = marker; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1768: yych = *++p; if (yybm[0+yych] & 128) { @@ -14725,14 +14725,14 @@ yy1771: yy1773: ++p; p = marker; - { return (p - start); } + { return (bufsize_t)(p - start); } } } // Scans an entity. // Returns number of chars matched. -int _scan_entity(const unsigned char *p) +bufsize_t _scan_entity(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -14799,7 +14799,7 @@ yy1783: } yy1784: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1786: yych = *++p; if (yych <= ';') { diff --git a/src/scanners.h b/src/scanners.h index 1353f3b..bc5134e 100644 --- a/src/scanners.h +++ b/src/scanners.h @@ -5,21 +5,21 @@ extern "C" { #endif -int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset); -int _scan_scheme(const unsigned char *p); -int _scan_autolink_uri(const unsigned char *p); -int _scan_autolink_email(const unsigned char *p); -int _scan_html_tag(const unsigned char *p); -int _scan_html_block_tag(const unsigned char *p); -int _scan_link_url(const unsigned char *p); -int _scan_link_title(const unsigned char *p); -int _scan_spacechars(const unsigned char *p); -int _scan_atx_header_start(const unsigned char *p); -int _scan_setext_header_line(const unsigned char *p); -int _scan_hrule(const unsigned char *p); -int _scan_open_code_fence(const unsigned char *p); -int _scan_close_code_fence(const unsigned char *p); -int _scan_entity(const unsigned char *p); +bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, bufsize_t offset); +bufsize_t _scan_scheme(const unsigned char *p); +bufsize_t _scan_autolink_uri(const unsigned char *p); +bufsize_t _scan_autolink_email(const unsigned char *p); +bufsize_t _scan_html_tag(const unsigned char *p); +bufsize_t _scan_html_block_tag(const unsigned char *p); +bufsize_t _scan_link_url(const unsigned char *p); +bufsize_t _scan_link_title(const unsigned char *p); +bufsize_t _scan_spacechars(const unsigned char *p); +bufsize_t _scan_atx_header_start(const unsigned char *p); +bufsize_t _scan_setext_header_line(const unsigned char *p); +bufsize_t _scan_hrule(const unsigned char *p); +bufsize_t _scan_open_code_fence(const unsigned char *p); +bufsize_t _scan_close_code_fence(const unsigned char *p); +bufsize_t _scan_entity(const unsigned char *p); #define scan_scheme(c, n) _scan_at(&_scan_scheme, c, n) #define scan_autolink_uri(c, n) _scan_at(&_scan_autolink_uri, c, n) diff --git a/src/scanners.re b/src/scanners.re index 9411018..3722a99 100644 --- a/src/scanners.re +++ b/src/scanners.re @@ -2,9 +2,9 @@ #include "chunk.h" #include "scanners.h" -int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset) +bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, bufsize_t offset) { - int res; + bufsize_t res; unsigned char *ptr = (unsigned char *)c->data; unsigned char lim = ptr[c->len]; @@ -70,29 +70,29 @@ int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset) */ // Try to match a scheme including colon. -int _scan_scheme(const unsigned char *p) +bufsize_t _scan_scheme(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - scheme [:] { return (p - start); } + scheme [:] { return (bufsize_t)(p - start); } .? { return 0; } */ } // Try to match URI autolink after first <, returning number of chars matched. -int _scan_autolink_uri(const unsigned char *p) +bufsize_t _scan_autolink_uri(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - scheme [:][^\x00-\x20<>]*[>] { return (p - start); } + scheme [:][^\x00-\x20<>]*[>] { return (bufsize_t)(p - start); } .? { return 0; } */ } // Try to match email autolink after first <, returning num of chars matched. -int _scan_autolink_email(const unsigned char *p) +bufsize_t _scan_autolink_email(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -101,32 +101,32 @@ int _scan_autolink_email(const unsigned char *p) [@] [a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])? ([.][a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)* - [>] { return (p - start); } + [>] { return (bufsize_t)(p - start); } .? { return 0; } */ } // Try to match an HTML tag after first <, returning num of chars matched. -int _scan_html_tag(const unsigned char *p) +bufsize_t _scan_html_tag(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - htmltag { return (p - start); } + htmltag { return (bufsize_t)(p - start); } .? { return 0; } */ } // Try to match an HTML block tag including first <, // returning num of chars matched. -int _scan_html_block_tag(const unsigned char *p) +bufsize_t _scan_html_block_tag(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - [<] [/] blocktagname (spacechar | [>]) { return (p - start); } - [<] blocktagname (spacechar | [/>]) { return (p - start); } - [<] [!?] { return (p - start); } + [<] [/] blocktagname (spacechar | [>]) { return (bufsize_t)(p - start); } + [<] blocktagname (spacechar | [/>]) { return (bufsize_t)(p - start); } + [<] [!?] { return (bufsize_t)(p - start); } .? { return 0; } */ } @@ -135,13 +135,13 @@ int _scan_html_block_tag(const unsigned char *p) // This may optionally be contained in <..>; otherwise // whitespace and unbalanced right parentheses aren't allowed. // Newlines aren't ever allowed. -int _scan_link_url(const unsigned char *p) +bufsize_t _scan_link_url(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - [ \r\n]* [<] ([^<>\r\n\\\x00] | escaped_char | [\\])* [>] { return (p - start); } - [ \r\n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (p - start); } + [ \r\n]* [<] ([^<>\r\n\\\x00] | escaped_char | [\\])* [>] { return (bufsize_t)(p - start); } + [ \r\n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (bufsize_t)(p - start); } .? { return 0; } */ } @@ -149,42 +149,42 @@ int _scan_link_url(const unsigned char *p) // Try to match a link title (in single quotes, in double quotes, or // in parentheses), returning number of chars matched. Allow one // level of internal nesting (quotes within quotes). -int _scan_link_title(const unsigned char *p) +bufsize_t _scan_link_title(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - ["] (escaped_char|[^"\x00])* ["] { return (p - start); } - ['] (escaped_char|[^'\x00])* ['] { return (p - start); } - [(] (escaped_char|[^)\x00])* [)] { return (p - start); } + ["] (escaped_char|[^"\x00])* ["] { return (bufsize_t)(p - start); } + ['] (escaped_char|[^'\x00])* ['] { return (bufsize_t)(p - start); } + [(] (escaped_char|[^)\x00])* [)] { return (bufsize_t)(p - start); } .? { return 0; } */ } // Match space characters, including newlines. -int _scan_spacechars(const unsigned char *p) +bufsize_t _scan_spacechars(const unsigned char *p) { const unsigned char *start = p; \ /*!re2c - [ \t\v\f\r\n]* { return (p - start); } + [ \t\v\f\r\n]* { return (bufsize_t)(p - start); } . { return 0; } */ } // Match ATX header start. -int _scan_atx_header_start(const unsigned char *p) +bufsize_t _scan_atx_header_start(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - [#]{1,6} ([ ]+|[\r\n]) { return (p - start); } + [#]{1,6} ([ ]+|[\r\n]) { return (bufsize_t)(p - start); } .? { return 0; } */ } // Match setext header line. Return 1 for level-1 header, // 2 for level-2, 0 for no match. -int _scan_setext_header_line(const unsigned char *p) +bufsize_t _scan_setext_header_line(const unsigned char *p) { const unsigned char *marker = NULL; /*!re2c @@ -197,51 +197,51 @@ int _scan_setext_header_line(const unsigned char *p) // Scan a horizontal rule line: "...three or more hyphens, asterisks, // or underscores on a line by themselves. If you wish, you may use // spaces between the hyphens or asterisks." -int _scan_hrule(const unsigned char *p) +bufsize_t _scan_hrule(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - ([*][ ]*){3,} [ \t]* [\r\n] { return (p - start); } - ([_][ ]*){3,} [ \t]* [\r\n] { return (p - start); } - ([-][ ]*){3,} [ \t]* [\r\n] { return (p - start); } + ([*][ ]*){3,} [ \t]* [\r\n] { return (bufsize_t)(p - start); } + ([_][ ]*){3,} [ \t]* [\r\n] { return (bufsize_t)(p - start); } + ([-][ ]*){3,} [ \t]* [\r\n] { return (bufsize_t)(p - start); } .? { return 0; } */ } // Scan an opening code fence. -int _scan_open_code_fence(const unsigned char *p) +bufsize_t _scan_open_code_fence(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - [`]{3,} / [^`\r\n\x00]*[\r\n] { return (p - start); } - [~]{3,} / [^~\r\n\x00]*[\r\n] { return (p - start); } + [`]{3,} / [^`\r\n\x00]*[\r\n] { return (bufsize_t)(p - start); } + [~]{3,} / [^~\r\n\x00]*[\r\n] { return (bufsize_t)(p - start); } .? { return 0; } */ } // Scan a closing code fence with length at least len. -int _scan_close_code_fence(const unsigned char *p) +bufsize_t _scan_close_code_fence(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - [`]{3,} / [ \t]*[\r\n] { return (p - start); } - [~]{3,} / [ \t]*[\r\n] { return (p - start); } + [`]{3,} / [ \t]*[\r\n] { return (bufsize_t)(p - start); } + [~]{3,} / [ \t]*[\r\n] { return (bufsize_t)(p - start); } .? { return 0; } */ } // Scans an entity. // Returns number of chars matched. -int _scan_entity(const unsigned char *p) +bufsize_t _scan_entity(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c [&] ([#] ([Xx][A-Fa-f0-9]{1,8}|[0-9]{1,8}) |[A-Za-z][A-Za-z0-9]{1,31} ) [;] - { return (p - start); } + { return (bufsize_t)(p - start); } .? { return 0; } */ } diff --git a/src/utf8.c b/src/utf8.c index b83c2a5..ba1d873 100644 --- a/src/utf8.c +++ b/src/utf8.c @@ -30,7 +30,7 @@ static void encode_unknown(cmark_strbuf *buf) cmark_strbuf_put(buf, repl, 3); } -static int utf8proc_charlen(const uint8_t *str, int str_len) +static int utf8proc_charlen(const uint8_t *str, bufsize_t str_len) { int length, i; @@ -42,7 +42,7 @@ static int utf8proc_charlen(const uint8_t *str, int str_len) if (!length) return -1; - if (str_len >= 0 && length > str_len) + if (str_len >= 0 && (bufsize_t)length > str_len) return -str_len; for (i = 1; i < length; i++) { @@ -54,7 +54,7 @@ static int utf8proc_charlen(const uint8_t *str, int str_len) } // Validate a single UTF-8 character according to RFC 3629. -static int utf8proc_valid(const uint8_t *str, int str_len) +static int utf8proc_valid(const uint8_t *str, bufsize_t str_len) { int length = utf8proc_charlen(str, str_len); @@ -109,14 +109,14 @@ static int utf8proc_valid(const uint8_t *str, int str_len) return length; } -void utf8proc_detab(cmark_strbuf *ob, const uint8_t *line, size_t size) +void utf8proc_detab(cmark_strbuf *ob, const uint8_t *line, bufsize_t size) { static const uint8_t whitespace[] = " "; - size_t i = 0, tab = 0; + bufsize_t i = 0, tab = 0; while (i < size) { - size_t org = i; + bufsize_t org = i; while (i < size && line[i] != '\t' && line[i] != '\0' && line[i] < 0x80) { @@ -151,7 +151,7 @@ void utf8proc_detab(cmark_strbuf *ob, const uint8_t *line, size_t size) } } -int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst) +int utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst) { int length; int32_t uc = -1; @@ -191,7 +191,7 @@ int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst) void utf8proc_encode_char(int32_t uc, cmark_strbuf *buf) { uint8_t dst[4]; - int len = 0; + bufsize_t len = 0; assert(uc >= 0); @@ -227,7 +227,7 @@ void utf8proc_encode_char(int32_t uc, cmark_strbuf *buf) cmark_strbuf_put(buf, dst, len); } -void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, int len) +void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, bufsize_t len) { int32_t c; @@ -235,7 +235,7 @@ void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, int len) utf8proc_encode_char(x, dest) while (len > 0) { - int char_len = utf8proc_iterate(str, len, &c); + bufsize_t char_len = utf8proc_iterate(str, len, &c); if (char_len >= 0) { #include "case_fold_switch.inc" diff --git a/src/utf8.h b/src/utf8.h index 7df1573..ed1d7ee 100644 --- a/src/utf8.h +++ b/src/utf8.h @@ -8,10 +8,10 @@ extern "C" { #endif -void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, int len); +void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, bufsize_t len); void utf8proc_encode_char(int32_t uc, cmark_strbuf *buf); -int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst); -void utf8proc_detab(cmark_strbuf *dest, const uint8_t *line, size_t size); +int utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst); +void utf8proc_detab(cmark_strbuf *dest, const uint8_t *line, bufsize_t size); int utf8proc_is_space(int32_t uc); int utf8proc_is_punctuation(int32_t uc); diff --git a/src/xml.c b/src/xml.c index 14f6d67..688dd36 100644 --- a/src/xml.c +++ b/src/xml.c @@ -11,13 +11,13 @@ // Functions to convert cmark_nodes to XML strings. -static void escape_xml(cmark_strbuf *dest, const unsigned char *source, int length) +static void escape_xml(cmark_strbuf *dest, const unsigned char *source, bufsize_t length) { if (source != NULL) { if (length < 0) length = strlen((char *)source); - houdini_escape_html0(dest, source, (size_t)length, 0); + houdini_escape_html0(dest, source, length, 0); } } -- cgit v1.2.3