From 3acbdf0965859c55fa36c65a4c0e17e92012687c Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Sat, 18 Jan 2020 22:27:13 +0100 Subject: Use C string instead of chunk for code info and literal Use zero-terminated C strings instead of cmark_chunks without storing the length. The length of code literals will be readded in a later commit. strlen overhead for code info should be negligible. Reduces size of struct cmark_node by 8 bytes. --- src/blocks.c | 20 ++++++++++++-------- src/html.c | 11 ++++++----- src/node.c | 12 ++++++------ src/node.h | 4 ++-- src/xml.c | 7 ++++--- 5 files changed, 30 insertions(+), 24 deletions(-) diff --git a/src/blocks.c b/src/blocks.c index b6077eb..5214f47 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -302,11 +302,15 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) { } assert(pos < node_content->size); - cmark_strbuf tmp = CMARK_BUF_INIT(parser->mem); - houdini_unescape_html_f(&tmp, node_content->ptr, pos); - cmark_strbuf_trim(&tmp); - cmark_strbuf_unescape(&tmp); - b->as.code.info = cmark_chunk_buf_detach(&tmp); + if (pos == 0) { + b->as.code.info = NULL; + } else { + cmark_strbuf tmp = CMARK_BUF_INIT(parser->mem); + houdini_unescape_html_f(&tmp, node_content->ptr, pos); + cmark_strbuf_trim(&tmp); + cmark_strbuf_unescape(&tmp); + b->as.code.info = cmark_strbuf_detach(&tmp); + } if (node_content->ptr[pos] == '\r') pos += 1; @@ -314,7 +318,7 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) { pos += 1; cmark_strbuf_drop(node_content, pos); } - b->as.code.literal = cmark_chunk_buf_detach(node_content); + b->as.code.literal = cmark_strbuf_detach(node_content); break; case CMARK_NODE_HTML_BLOCK: @@ -972,7 +976,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, (*container)->as.code.fence_length = (matched > 255) ? 255 : matched; (*container)->as.code.fence_offset = (int8_t)(parser->first_nonspace - parser->offset); - (*container)->as.code.info = cmark_chunk_literal(""); + (*container)->as.code.info = NULL; S_advance_offset(parser, input, parser->first_nonspace + matched - parser->offset, false); @@ -1074,7 +1078,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, (*container)->as.code.fence_char = 0; (*container)->as.code.fence_length = 0; (*container)->as.code.fence_offset = 0; - (*container)->as.code.info = cmark_chunk_literal(""); + (*container)->as.code.info = NULL; } else { break; diff --git a/src/html.c b/src/html.c index a13d016..161e9f9 100644 --- a/src/html.c +++ b/src/html.c @@ -146,25 +146,26 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, case CMARK_NODE_CODE_BLOCK: cr(html); - if (node->as.code.info.len == 0) { + if (node->as.code.info == NULL || node->as.code.info[0] == 0) { cmark_strbuf_puts(html, ""); } else { bufsize_t first_tag = 0; - while (first_tag < node->as.code.info.len && - !cmark_isspace(node->as.code.info.data[first_tag])) { + while (node->as.code.info[first_tag] && + !cmark_isspace(node->as.code.info[first_tag])) { first_tag += 1; } cmark_strbuf_puts(html, "as.code.info.data, first_tag); + escape_html(html, node->as.code.info, first_tag); cmark_strbuf_puts(html, "\">"); } - escape_html(html, node->as.code.literal.data, node->as.code.literal.len); + escape_html(html, node->as.code.literal, + strlen((char *)node->as.code.literal)); cmark_strbuf_puts(html, "\n"); break; diff --git a/src/node.c b/src/node.c index 147debf..fa538bc 100644 --- a/src/node.c +++ b/src/node.c @@ -109,8 +109,8 @@ static void S_free_nodes(cmark_node *e) { cmark_strbuf_free(&e->content); switch (e->type) { case CMARK_NODE_CODE_BLOCK: - cmark_chunk_free(NODE_MEM(e), &e->as.code.info); - cmark_chunk_free(NODE_MEM(e), &e->as.code.literal); + NODE_MEM(e)->free(e->as.code.info); + NODE_MEM(e)->free(e->as.code.literal); break; case CMARK_NODE_TEXT: case CMARK_NODE_HTML_INLINE: @@ -298,7 +298,7 @@ const char *cmark_node_get_literal(cmark_node *node) { return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.literal); case CMARK_NODE_CODE_BLOCK: - return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.code.literal); + return (char *)node->as.code.literal; default: break; @@ -321,7 +321,7 @@ int cmark_node_set_literal(cmark_node *node, const char *content) { return 1; case CMARK_NODE_CODE_BLOCK: - cmark_chunk_set_cstr(NODE_MEM(node), &node->as.code.literal, content); + cmark_set_cstr(NODE_MEM(node), &node->as.code.literal, content); return 1; default: @@ -478,7 +478,7 @@ const char *cmark_node_get_fence_info(cmark_node *node) { } if (node->type == CMARK_NODE_CODE_BLOCK) { - return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.code.info); + return node->as.code.info ? (char *)node->as.code.info : ""; } else { return NULL; } @@ -490,7 +490,7 @@ int cmark_node_set_fence_info(cmark_node *node, const char *info) { } if (node->type == CMARK_NODE_CODE_BLOCK) { - cmark_chunk_set_cstr(NODE_MEM(node), &node->as.code.info, info); + cmark_set_cstr(NODE_MEM(node), &node->as.code.info, info); return 1; } else { return 0; diff --git a/src/node.h b/src/node.h index 2163230..327c568 100644 --- a/src/node.h +++ b/src/node.h @@ -23,8 +23,8 @@ typedef struct { } cmark_list; typedef struct { - cmark_chunk info; - cmark_chunk literal; + unsigned char *info; + unsigned char *literal; uint8_t fence_length; uint8_t fence_offset; unsigned char fence_char; diff --git a/src/xml.c b/src/xml.c index 48674cc..bd82f5f 100644 --- a/src/xml.c +++ b/src/xml.c @@ -95,13 +95,14 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, cmark_strbuf_puts(xml, buffer); break; case CMARK_NODE_CODE_BLOCK: - if (node->as.code.info.len > 0) { + if (node->as.code.info) { cmark_strbuf_puts(xml, " info=\""); - escape_xml(xml, node->as.code.info.data, node->as.code.info.len); + escape_xml(xml, node->as.code.info, strlen((char *)node->as.code.info)); cmark_strbuf_putc(xml, '"'); } cmark_strbuf_puts(xml, " xml:space=\"preserve\">"); - escape_xml(xml, node->as.code.literal.data, node->as.code.literal.len); + escape_xml(xml, node->as.code.literal, + strlen((char *)node->as.code.literal)); cmark_strbuf_puts(xml, "