summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Wellnhofer <wellnhofer@aevum.de>2020-01-18 23:12:37 +0100
committerJohn MacFarlane <jgm@berkeley.edu>2020-01-23 08:25:54 -0800
commitb237924585e61532ada774bf9e70eadff00666dc (patch)
tree4355e86e19736d1eeffd905cdbc2d2b6ea7a6430
parent3acbdf0965859c55fa36c65a4c0e17e92012687c (diff)
Use C string instead of chunk for link URL and title
Use zero-terminated C strings instead of cmark_chunks without storing the length. This introduces a few additional strlen computations, but overhead should be low. Allows to reduce size of struct cmark_node later.
-rw-r--r--api_test/main.c2
-rw-r--r--src/commonmark.c26
-rw-r--r--src/html.c22
-rw-r--r--src/inlines.c57
-rw-r--r--src/inlines.h7
-rw-r--r--src/node.c12
-rw-r--r--src/node.h4
-rw-r--r--src/references.c4
-rw-r--r--src/references.h4
-rw-r--r--src/xml.c11
10 files changed, 68 insertions, 81 deletions
diff --git a/api_test/main.c b/api_test/main.c
index 9b7ba41..e7fccbd 100644
--- a/api_test/main.c
+++ b/api_test/main.c
@@ -915,7 +915,7 @@ static void source_pos(test_batch_runner *runner) {
" </heading>\n"
" <paragraph sourcepos=\"3:1-4:42\">\n"
" <text sourcepos=\"3:1-3:14\" xml:space=\"preserve\">Hello “ </text>\n"
- " <link sourcepos=\"3:15-3:37\" destination=\"http://www.google.com\" title=\"\">\n"
+ " <link sourcepos=\"3:15-3:37\" destination=\"http://www.google.com\">\n"
" <text sourcepos=\"3:16-3:36\" xml:space=\"preserve\">http://www.google.com</text>\n"
" </link>\n"
" <softbreak />\n"
diff --git a/src/commonmark.c b/src/commonmark.c
index b89462b..89aef5b 100644
--- a/src/commonmark.c
+++ b/src/commonmark.c
@@ -119,24 +119,22 @@ static int shortest_unused_backtick_sequence(const char *code) {
}
static bool is_autolink(cmark_node *node) {
- cmark_chunk *title;
- cmark_chunk *url;
+ const unsigned char *title;
+ const unsigned char *url;
cmark_node *link_text;
- char *realurl;
- int realurllen;
if (node->type != CMARK_NODE_LINK) {
return false;
}
- url = &node->as.link.url;
- if (url->len == 0 || scan_scheme(url, 0) == 0) {
+ url = node->as.link.url;
+ if (url == NULL || _scan_scheme(url) == 0) {
return false;
}
- title = &node->as.link.title;
+ title = node->as.link.title;
// if it has a title, we can't treat it as an autolink:
- if (title->len > 0) {
+ if (title && title[0]) {
return false;
}
@@ -145,15 +143,11 @@ static bool is_autolink(cmark_node *node) {
return false;
}
cmark_consolidate_text_nodes(link_text);
- realurl = (char *)url->data;
- realurllen = url->len;
- if (strncmp(realurl, "mailto:", 7) == 0) {
- realurl += 7;
- realurllen -= 7;
+ if (strcmp((const char *)url, "mailto:") == 0) {
+ url += 7;
}
- return (realurllen == link_text->as.literal.len &&
- strncmp(realurl, (char *)link_text->as.literal.data,
- link_text->as.literal.len) == 0);
+ return strncmp((const char *)url, (char *)link_text->as.literal.data,
+ link_text->as.literal.len) == 0;
}
// if node is a block node, returns node.
diff --git a/src/html.c b/src/html.c
index 161e9f9..85bd704 100644
--- a/src/html.c
+++ b/src/html.c
@@ -280,13 +280,14 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,
if (entering) {
cmark_strbuf_puts(html, "<a href=\"");
if ((options & CMARK_OPT_UNSAFE) ||
- !(scan_dangerous_url(&node->as.link.url, 0))) {
- houdini_escape_href(html, node->as.link.url.data,
- node->as.link.url.len);
+ !(_scan_dangerous_url(node->as.link.url))) {
+ houdini_escape_href(html, node->as.link.url,
+ strlen((char *)node->as.link.url));
}
- if (node->as.link.title.len) {
+ if (node->as.link.title) {
cmark_strbuf_puts(html, "\" title=\"");
- escape_html(html, node->as.link.title.data, node->as.link.title.len);
+ escape_html(html, node->as.link.title,
+ strlen((char *)node->as.link.title));
}
cmark_strbuf_puts(html, "\">");
} else {
@@ -298,16 +299,17 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,
if (entering) {
cmark_strbuf_puts(html, "<img src=\"");
if ((options & CMARK_OPT_UNSAFE) ||
- !(scan_dangerous_url(&node->as.link.url, 0))) {
- houdini_escape_href(html, node->as.link.url.data,
- node->as.link.url.len);
+ !(_scan_dangerous_url(node->as.link.url))) {
+ houdini_escape_href(html, node->as.link.url,
+ strlen((char *)node->as.link.url));
}
cmark_strbuf_puts(html, "\" alt=\"");
state->plain = node;
} else {
- if (node->as.link.title.len) {
+ if (node->as.link.title) {
cmark_strbuf_puts(html, "\" title=\"");
- escape_html(html, node->as.link.title.data, node->as.link.title.len);
+ escape_html(html, node->as.link.title,
+ strlen((char *)node->as.link.title));
}
cmark_strbuf_puts(html, "\" />");
diff --git a/src/inlines.c b/src/inlines.c
index 6b77311..7d584ca 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -117,36 +117,27 @@ static cmark_node *make_str_with_entities(subject *subj,
// Duplicate a chunk by creating a copy of the buffer not by reusing the
// buffer like cmark_chunk_dup does.
-static cmark_chunk chunk_clone(cmark_mem *mem, cmark_chunk *src) {
- cmark_chunk c;
- bufsize_t len = src->len;
-
- c.len = len;
- c.data = (unsigned char *)mem->calloc(len + 1, 1);
- c.alloc = 1;
- if (len)
- memcpy(c.data, src->data, len);
- c.data[len] = '\0';
-
- return c;
+static unsigned char *cmark_strdup(cmark_mem *mem, unsigned char *src) {
+ if (src == NULL) {
+ return NULL;
+ }
+ size_t len = strlen((char *)src);
+ unsigned char *data = (unsigned char *)mem->realloc(NULL, len + 1);
+ memcpy(data, src, len + 1);
+ return data;
}
-static cmark_chunk cmark_clean_autolink(cmark_mem *mem, cmark_chunk *url,
- int is_email) {
+static unsigned char *cmark_clean_autolink(cmark_mem *mem, cmark_chunk *url,
+ int is_email) {
cmark_strbuf buf = CMARK_BUF_INIT(mem);
cmark_chunk_trim(url);
- if (url->len == 0) {
- cmark_chunk result = CMARK_CHUNK_EMPTY;
- return result;
- }
-
if (is_email)
cmark_strbuf_puts(&buf, "mailto:");
houdini_unescape_html_f(&buf, url->data, url->len);
- return cmark_chunk_buf_detach(&buf);
+ return cmark_strbuf_detach(&buf);
}
static CMARK_INLINE cmark_node *make_autolink(subject *subj,
@@ -154,7 +145,7 @@ static CMARK_INLINE cmark_node *make_autolink(subject *subj,
cmark_chunk url, int is_email) {
cmark_node *link = make_simple(subj->mem, CMARK_NODE_LINK);
link->as.link.url = cmark_clean_autolink(subj->mem, &url, is_email);
- link->as.link.title = cmark_chunk_literal("");
+ link->as.link.title = NULL;
link->start_line = link->end_line = subj->line;
link->start_column = start_column + 1;
link->end_column = end_column + 1;
@@ -799,29 +790,23 @@ static cmark_node *handle_entity(subject *subj) {
// Clean a URL: remove surrounding whitespace, and remove \ that escape
// punctuation.
-cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url) {
+unsigned char *cmark_clean_url(cmark_mem *mem, cmark_chunk *url) {
cmark_strbuf buf = CMARK_BUF_INIT(mem);
cmark_chunk_trim(url);
- if (url->len == 0) {
- cmark_chunk result = CMARK_CHUNK_EMPTY;
- return result;
- }
-
- houdini_unescape_html_f(&buf, url->data, url->len);
+ houdini_unescape_html_f(&buf, url->data, url->len);
cmark_strbuf_unescape(&buf);
- return cmark_chunk_buf_detach(&buf);
+ return cmark_strbuf_detach(&buf);
}
-cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title) {
+unsigned char *cmark_clean_title(cmark_mem *mem, cmark_chunk *title) {
cmark_strbuf buf = CMARK_BUF_INIT(mem);
unsigned char first, last;
if (title->len == 0) {
- cmark_chunk result = CMARK_CHUNK_EMPTY;
- return result;
+ return NULL;
}
first = title->data[0];
@@ -836,7 +821,7 @@ cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title) {
}
cmark_strbuf_unescape(&buf);
- return cmark_chunk_buf_detach(&buf);
+ return cmark_strbuf_detach(&buf);
}
// Parse an autolink or HTML tag.
@@ -1003,7 +988,7 @@ static cmark_node *handle_close_bracket(subject *subj) {
bufsize_t sps, n;
cmark_reference *ref = NULL;
cmark_chunk url_chunk, title_chunk;
- cmark_chunk url, title;
+ unsigned char *url, *title;
bracket *opener;
cmark_node *inl;
cmark_chunk raw_label;
@@ -1090,8 +1075,8 @@ static cmark_node *handle_close_bracket(subject *subj) {
}
if (ref != NULL) { // found
- url = chunk_clone(subj->mem, &ref->url);
- title = chunk_clone(subj->mem, &ref->title);
+ url = cmark_strdup(subj->mem, ref->url);
+ title = cmark_strdup(subj->mem, ref->title);
goto match;
} else {
goto noMatch;
diff --git a/src/inlines.h b/src/inlines.h
index 39d3363..800ed0c 100644
--- a/src/inlines.h
+++ b/src/inlines.h
@@ -1,12 +1,15 @@
#ifndef CMARK_INLINES_H
#define CMARK_INLINES_H
+#include "chunk.h"
+#include "references.h"
+
#ifdef __cplusplus
extern "C" {
#endif
-cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url);
-cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title);
+unsigned char *cmark_clean_url(cmark_mem *mem, cmark_chunk *url);
+unsigned char *cmark_clean_title(cmark_mem *mem, cmark_chunk *title);
void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent,
cmark_reference_map *refmap, int options);
diff --git a/src/node.c b/src/node.c
index fa538bc..f67d07e 100644
--- a/src/node.c
+++ b/src/node.c
@@ -120,8 +120,8 @@ static void S_free_nodes(cmark_node *e) {
break;
case CMARK_NODE_LINK:
case CMARK_NODE_IMAGE:
- cmark_chunk_free(NODE_MEM(e), &e->as.link.url);
- cmark_chunk_free(NODE_MEM(e), &e->as.link.title);
+ NODE_MEM(e)->free(e->as.link.url);
+ NODE_MEM(e)->free(e->as.link.title);
break;
case CMARK_NODE_CUSTOM_BLOCK:
case CMARK_NODE_CUSTOM_INLINE:
@@ -505,7 +505,7 @@ const char *cmark_node_get_url(cmark_node *node) {
switch (node->type) {
case CMARK_NODE_LINK:
case CMARK_NODE_IMAGE:
- return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.link.url);
+ return node->as.link.url ? (char *)node->as.link.url : "";
default:
break;
}
@@ -521,7 +521,7 @@ int cmark_node_set_url(cmark_node *node, const char *url) {
switch (node->type) {
case CMARK_NODE_LINK:
case CMARK_NODE_IMAGE:
- cmark_chunk_set_cstr(NODE_MEM(node), &node->as.link.url, url);
+ cmark_set_cstr(NODE_MEM(node), &node->as.link.url, url);
return 1;
default:
break;
@@ -538,7 +538,7 @@ const char *cmark_node_get_title(cmark_node *node) {
switch (node->type) {
case CMARK_NODE_LINK:
case CMARK_NODE_IMAGE:
- return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.link.title);
+ return node->as.link.title ? (char *)node->as.link.title : "";
default:
break;
}
@@ -554,7 +554,7 @@ int cmark_node_set_title(cmark_node *node, const char *title) {
switch (node->type) {
case CMARK_NODE_LINK:
case CMARK_NODE_IMAGE:
- cmark_chunk_set_cstr(NODE_MEM(node), &node->as.link.title, title);
+ cmark_set_cstr(NODE_MEM(node), &node->as.link.title, title);
return 1;
default:
break;
diff --git a/src/node.h b/src/node.h
index 327c568..9658d1f 100644
--- a/src/node.h
+++ b/src/node.h
@@ -37,8 +37,8 @@ typedef struct {
} cmark_heading;
typedef struct {
- cmark_chunk url;
- cmark_chunk title;
+ unsigned char *url;
+ unsigned char *title;
} cmark_link;
typedef struct {
diff --git a/src/references.c b/src/references.c
index 89f2dc8..f0d871f 100644
--- a/src/references.c
+++ b/src/references.c
@@ -18,8 +18,8 @@ static void reference_free(cmark_reference_map *map, cmark_reference *ref) {
cmark_mem *mem = map->mem;
if (ref != NULL) {
mem->free(ref->label);
- cmark_chunk_free(mem, &ref->url);
- cmark_chunk_free(mem, &ref->title);
+ mem->free(ref->url);
+ mem->free(ref->title);
mem->free(ref);
}
}
diff --git a/src/references.h b/src/references.h
index 8d3631f..5038c49 100644
--- a/src/references.h
+++ b/src/references.h
@@ -12,8 +12,8 @@ extern "C" {
struct cmark_reference {
struct cmark_reference *next;
unsigned char *label;
- cmark_chunk url;
- cmark_chunk title;
+ unsigned char *url;
+ unsigned char *title;
unsigned int hash;
};
diff --git a/src/xml.c b/src/xml.c
index bd82f5f..4bede85 100644
--- a/src/xml.c
+++ b/src/xml.c
@@ -121,11 +121,14 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,
case CMARK_NODE_LINK:
case CMARK_NODE_IMAGE:
cmark_strbuf_puts(xml, " destination=\"");
- escape_xml(xml, node->as.link.url.data, node->as.link.url.len);
- cmark_strbuf_putc(xml, '"');
- cmark_strbuf_puts(xml, " title=\"");
- escape_xml(xml, node->as.link.title.data, node->as.link.title.len);
+ escape_xml(xml, node->as.link.url, strlen((char *)node->as.link.url));
cmark_strbuf_putc(xml, '"');
+ if (node->as.link.title) {
+ cmark_strbuf_puts(xml, " title=\"");
+ escape_xml(xml, node->as.link.title,
+ strlen((char *)node->as.link.title));
+ cmark_strbuf_putc(xml, '"');
+ }
break;
default:
break;