From b237924585e61532ada774bf9e70eadff00666dc Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Sat, 18 Jan 2020 23:12:37 +0100 Subject: Use C string instead of chunk for link URL and title Use zero-terminated C strings instead of cmark_chunks without storing the length. This introduces a few additional strlen computations, but overhead should be low. Allows to reduce size of struct cmark_node later. --- api_test/main.c | 2 +- src/commonmark.c | 26 ++++++++++---------------- src/html.c | 22 ++++++++++++---------- src/inlines.c | 57 +++++++++++++++++++++----------------------------------- src/inlines.h | 7 +++++-- src/node.c | 12 ++++++------ src/node.h | 4 ++-- src/references.c | 4 ++-- src/references.h | 4 ++-- src/xml.c | 11 +++++++---- 10 files changed, 68 insertions(+), 81 deletions(-) diff --git a/api_test/main.c b/api_test/main.c index 9b7ba41..e7fccbd 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -915,7 +915,7 @@ static void source_pos(test_batch_runner *runner) { " \n" " \n" " Hello “ \n" - " \n" + " \n" " http://www.google.com\n" " \n" " \n" diff --git a/src/commonmark.c b/src/commonmark.c index b89462b..89aef5b 100644 --- a/src/commonmark.c +++ b/src/commonmark.c @@ -119,24 +119,22 @@ static int shortest_unused_backtick_sequence(const char *code) { } static bool is_autolink(cmark_node *node) { - cmark_chunk *title; - cmark_chunk *url; + const unsigned char *title; + const unsigned char *url; cmark_node *link_text; - char *realurl; - int realurllen; if (node->type != CMARK_NODE_LINK) { return false; } - url = &node->as.link.url; - if (url->len == 0 || scan_scheme(url, 0) == 0) { + url = node->as.link.url; + if (url == NULL || _scan_scheme(url) == 0) { return false; } - title = &node->as.link.title; + title = node->as.link.title; // if it has a title, we can't treat it as an autolink: - if (title->len > 0) { + if (title && title[0]) { return false; } @@ -145,15 +143,11 @@ static bool is_autolink(cmark_node *node) { return false; } cmark_consolidate_text_nodes(link_text); - realurl = (char *)url->data; - realurllen = url->len; - if (strncmp(realurl, "mailto:", 7) == 0) { - realurl += 7; - realurllen -= 7; + if (strcmp((const char *)url, "mailto:") == 0) { + url += 7; } - return (realurllen == link_text->as.literal.len && - strncmp(realurl, (char *)link_text->as.literal.data, - link_text->as.literal.len) == 0); + return strncmp((const char *)url, (char *)link_text->as.literal.data, + link_text->as.literal.len) == 0; } // if node is a block node, returns node. diff --git a/src/html.c b/src/html.c index 161e9f9..85bd704 100644 --- a/src/html.c +++ b/src/html.c @@ -280,13 +280,14 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, if (entering) { cmark_strbuf_puts(html, "as.link.url, 0))) { - houdini_escape_href(html, node->as.link.url.data, - node->as.link.url.len); + !(_scan_dangerous_url(node->as.link.url))) { + houdini_escape_href(html, node->as.link.url, + strlen((char *)node->as.link.url)); } - if (node->as.link.title.len) { + if (node->as.link.title) { cmark_strbuf_puts(html, "\" title=\""); - escape_html(html, node->as.link.title.data, node->as.link.title.len); + escape_html(html, node->as.link.title, + strlen((char *)node->as.link.title)); } cmark_strbuf_puts(html, "\">"); } else { @@ -298,16 +299,17 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, if (entering) { cmark_strbuf_puts(html, "as.link.url, 0))) { - houdini_escape_href(html, node->as.link.url.data, - node->as.link.url.len); + !(_scan_dangerous_url(node->as.link.url))) { + houdini_escape_href(html, node->as.link.url, + strlen((char *)node->as.link.url)); } cmark_strbuf_puts(html, "\" alt=\""); state->plain = node; } else { - if (node->as.link.title.len) { + if (node->as.link.title) { cmark_strbuf_puts(html, "\" title=\""); - escape_html(html, node->as.link.title.data, node->as.link.title.len); + escape_html(html, node->as.link.title, + strlen((char *)node->as.link.title)); } cmark_strbuf_puts(html, "\" />"); diff --git a/src/inlines.c b/src/inlines.c index 6b77311..7d584ca 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -117,36 +117,27 @@ static cmark_node *make_str_with_entities(subject *subj, // Duplicate a chunk by creating a copy of the buffer not by reusing the // buffer like cmark_chunk_dup does. -static cmark_chunk chunk_clone(cmark_mem *mem, cmark_chunk *src) { - cmark_chunk c; - bufsize_t len = src->len; - - c.len = len; - c.data = (unsigned char *)mem->calloc(len + 1, 1); - c.alloc = 1; - if (len) - memcpy(c.data, src->data, len); - c.data[len] = '\0'; - - return c; +static unsigned char *cmark_strdup(cmark_mem *mem, unsigned char *src) { + if (src == NULL) { + return NULL; + } + size_t len = strlen((char *)src); + unsigned char *data = (unsigned char *)mem->realloc(NULL, len + 1); + memcpy(data, src, len + 1); + return data; } -static cmark_chunk cmark_clean_autolink(cmark_mem *mem, cmark_chunk *url, - int is_email) { +static unsigned char *cmark_clean_autolink(cmark_mem *mem, cmark_chunk *url, + int is_email) { cmark_strbuf buf = CMARK_BUF_INIT(mem); cmark_chunk_trim(url); - if (url->len == 0) { - cmark_chunk result = CMARK_CHUNK_EMPTY; - return result; - } - if (is_email) cmark_strbuf_puts(&buf, "mailto:"); houdini_unescape_html_f(&buf, url->data, url->len); - return cmark_chunk_buf_detach(&buf); + return cmark_strbuf_detach(&buf); } static CMARK_INLINE cmark_node *make_autolink(subject *subj, @@ -154,7 +145,7 @@ static CMARK_INLINE cmark_node *make_autolink(subject *subj, cmark_chunk url, int is_email) { cmark_node *link = make_simple(subj->mem, CMARK_NODE_LINK); link->as.link.url = cmark_clean_autolink(subj->mem, &url, is_email); - link->as.link.title = cmark_chunk_literal(""); + link->as.link.title = NULL; link->start_line = link->end_line = subj->line; link->start_column = start_column + 1; link->end_column = end_column + 1; @@ -799,29 +790,23 @@ static cmark_node *handle_entity(subject *subj) { // Clean a URL: remove surrounding whitespace, and remove \ that escape // punctuation. -cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url) { +unsigned char *cmark_clean_url(cmark_mem *mem, cmark_chunk *url) { cmark_strbuf buf = CMARK_BUF_INIT(mem); cmark_chunk_trim(url); - if (url->len == 0) { - cmark_chunk result = CMARK_CHUNK_EMPTY; - return result; - } - - houdini_unescape_html_f(&buf, url->data, url->len); + houdini_unescape_html_f(&buf, url->data, url->len); cmark_strbuf_unescape(&buf); - return cmark_chunk_buf_detach(&buf); + return cmark_strbuf_detach(&buf); } -cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title) { +unsigned char *cmark_clean_title(cmark_mem *mem, cmark_chunk *title) { cmark_strbuf buf = CMARK_BUF_INIT(mem); unsigned char first, last; if (title->len == 0) { - cmark_chunk result = CMARK_CHUNK_EMPTY; - return result; + return NULL; } first = title->data[0]; @@ -836,7 +821,7 @@ cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title) { } cmark_strbuf_unescape(&buf); - return cmark_chunk_buf_detach(&buf); + return cmark_strbuf_detach(&buf); } // Parse an autolink or HTML tag. @@ -1003,7 +988,7 @@ static cmark_node *handle_close_bracket(subject *subj) { bufsize_t sps, n; cmark_reference *ref = NULL; cmark_chunk url_chunk, title_chunk; - cmark_chunk url, title; + unsigned char *url, *title; bracket *opener; cmark_node *inl; cmark_chunk raw_label; @@ -1090,8 +1075,8 @@ static cmark_node *handle_close_bracket(subject *subj) { } if (ref != NULL) { // found - url = chunk_clone(subj->mem, &ref->url); - title = chunk_clone(subj->mem, &ref->title); + url = cmark_strdup(subj->mem, ref->url); + title = cmark_strdup(subj->mem, ref->title); goto match; } else { goto noMatch; diff --git a/src/inlines.h b/src/inlines.h index 39d3363..800ed0c 100644 --- a/src/inlines.h +++ b/src/inlines.h @@ -1,12 +1,15 @@ #ifndef CMARK_INLINES_H #define CMARK_INLINES_H +#include "chunk.h" +#include "references.h" + #ifdef __cplusplus extern "C" { #endif -cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url); -cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title); +unsigned char *cmark_clean_url(cmark_mem *mem, cmark_chunk *url); +unsigned char *cmark_clean_title(cmark_mem *mem, cmark_chunk *title); void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent, cmark_reference_map *refmap, int options); diff --git a/src/node.c b/src/node.c index fa538bc..f67d07e 100644 --- a/src/node.c +++ b/src/node.c @@ -120,8 +120,8 @@ static void S_free_nodes(cmark_node *e) { break; case CMARK_NODE_LINK: case CMARK_NODE_IMAGE: - cmark_chunk_free(NODE_MEM(e), &e->as.link.url); - cmark_chunk_free(NODE_MEM(e), &e->as.link.title); + NODE_MEM(e)->free(e->as.link.url); + NODE_MEM(e)->free(e->as.link.title); break; case CMARK_NODE_CUSTOM_BLOCK: case CMARK_NODE_CUSTOM_INLINE: @@ -505,7 +505,7 @@ const char *cmark_node_get_url(cmark_node *node) { switch (node->type) { case CMARK_NODE_LINK: case CMARK_NODE_IMAGE: - return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.link.url); + return node->as.link.url ? (char *)node->as.link.url : ""; default: break; } @@ -521,7 +521,7 @@ int cmark_node_set_url(cmark_node *node, const char *url) { switch (node->type) { case CMARK_NODE_LINK: case CMARK_NODE_IMAGE: - cmark_chunk_set_cstr(NODE_MEM(node), &node->as.link.url, url); + cmark_set_cstr(NODE_MEM(node), &node->as.link.url, url); return 1; default: break; @@ -538,7 +538,7 @@ const char *cmark_node_get_title(cmark_node *node) { switch (node->type) { case CMARK_NODE_LINK: case CMARK_NODE_IMAGE: - return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.link.title); + return node->as.link.title ? (char *)node->as.link.title : ""; default: break; } @@ -554,7 +554,7 @@ int cmark_node_set_title(cmark_node *node, const char *title) { switch (node->type) { case CMARK_NODE_LINK: case CMARK_NODE_IMAGE: - cmark_chunk_set_cstr(NODE_MEM(node), &node->as.link.title, title); + cmark_set_cstr(NODE_MEM(node), &node->as.link.title, title); return 1; default: break; diff --git a/src/node.h b/src/node.h index 327c568..9658d1f 100644 --- a/src/node.h +++ b/src/node.h @@ -37,8 +37,8 @@ typedef struct { } cmark_heading; typedef struct { - cmark_chunk url; - cmark_chunk title; + unsigned char *url; + unsigned char *title; } cmark_link; typedef struct { diff --git a/src/references.c b/src/references.c index 89f2dc8..f0d871f 100644 --- a/src/references.c +++ b/src/references.c @@ -18,8 +18,8 @@ static void reference_free(cmark_reference_map *map, cmark_reference *ref) { cmark_mem *mem = map->mem; if (ref != NULL) { mem->free(ref->label); - cmark_chunk_free(mem, &ref->url); - cmark_chunk_free(mem, &ref->title); + mem->free(ref->url); + mem->free(ref->title); mem->free(ref); } } diff --git a/src/references.h b/src/references.h index 8d3631f..5038c49 100644 --- a/src/references.h +++ b/src/references.h @@ -12,8 +12,8 @@ extern "C" { struct cmark_reference { struct cmark_reference *next; unsigned char *label; - cmark_chunk url; - cmark_chunk title; + unsigned char *url; + unsigned char *title; unsigned int hash; }; diff --git a/src/xml.c b/src/xml.c index bd82f5f..4bede85 100644 --- a/src/xml.c +++ b/src/xml.c @@ -121,11 +121,14 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, case CMARK_NODE_LINK: case CMARK_NODE_IMAGE: cmark_strbuf_puts(xml, " destination=\""); - escape_xml(xml, node->as.link.url.data, node->as.link.url.len); - cmark_strbuf_putc(xml, '"'); - cmark_strbuf_puts(xml, " title=\""); - escape_xml(xml, node->as.link.title.data, node->as.link.title.len); + escape_xml(xml, node->as.link.url, strlen((char *)node->as.link.url)); cmark_strbuf_putc(xml, '"'); + if (node->as.link.title) { + cmark_strbuf_puts(xml, " title=\""); + escape_xml(xml, node->as.link.title, + strlen((char *)node->as.link.title)); + cmark_strbuf_putc(xml, '"'); + } break; default: break; -- cgit v1.2.3