summaryrefslogtreecommitdiff
path: root/src/inlines.c
diff options
context:
space:
mode:
authorYuki Izumi <ashe@kivikakk.ee>2017-07-17 15:20:12 +1000
committerYuki Izumi <ashe@kivikakk.ee>2017-07-17 15:24:24 +1000
commit7423feff96dc1331b3578440e417224e96dac448 (patch)
tree4e828370274443b7178c4543495884adc621f3f4 /src/inlines.c
parent1ea9cd8ceaec181dd945d82af812a7712f232573 (diff)
Fix URL scanner.
Diffstat (limited to 'src/inlines.c')
-rw-r--r--src/inlines.c90
1 files changed, 52 insertions, 38 deletions
diff --git a/src/inlines.c b/src/inlines.c
index b864e12..f223baf 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -695,8 +695,8 @@ static cmark_node *handle_entity(subject *subj) {
return make_str(subj->mem, cmark_chunk_buf_detach(&ent));
}
-// Clean a URL: remove surrounding whitespace and surrounding <>,
-// and remove \ that escape punctuation.
+// Clean a URL: remove surrounding whitespace, and remove \ that escape
+// punctuation.
cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url) {
cmark_strbuf buf = CMARK_BUF_INIT(mem);
@@ -707,11 +707,7 @@ cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url) {
return result;
}
- if (url->data[0] == '<' && url->data[url->len - 1] == '>') {
- houdini_unescape_html_f(&buf, url->data + 1, url->len - 2);
- } else {
- houdini_unescape_html_f(&buf, url->data, url->len);
- }
+ houdini_unescape_html_f(&buf, url->data, url->len);
cmark_strbuf_unescape(&buf);
return cmark_chunk_buf_detach(&buf);
@@ -824,10 +820,43 @@ noMatch:
subj->pos = startpos; // rewind
return 0;
}
-static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset) {
+
+static bufsize_t manual_scan_link_url_2(cmark_chunk *input, bufsize_t offset, cmark_chunk *output) {
bufsize_t i = offset;
size_t nb_p = 0;
+ while (i < input->len) {
+ if (input->data[i] == '\\' &&
+ i + 1 < input-> len &&
+ cmark_ispunct(input->data[i+1]))
+ i += 2;
+ else if (input->data[i] == '(') {
+ ++nb_p;
+ ++i;
+ } else if (input->data[i] == ')') {
+ if (nb_p == 0)
+ break;
+ --nb_p;
+ ++i;
+ } else if (cmark_isspace(input->data[i]))
+ break;
+ else
+ ++i;
+ }
+
+ if (i >= input->len)
+ return -1;
+
+ {
+ cmark_chunk result = {input->data + offset, i - offset, 0};
+ *output = result;
+ }
+ return i - offset;
+}
+
+static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset, cmark_chunk *output) {
+ bufsize_t i = offset;
+
if (i < input->len && input->data[i] == '<') {
++i;
while (i < input->len) {
@@ -836,42 +865,30 @@ static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset) {
break;
} else if (input->data[i] == '\\')
i += 2;
- else if (cmark_isspace(input->data[i]))
- return -1;
+ else if (cmark_isspace(input->data[i]) || input->data[i] == '<')
+ return manual_scan_link_url_2(input, offset, output);
else
++i;
}
} else {
- while (i < input->len) {
- if (input->data[i] == '\\' &&
- i + 1 < input-> len &&
- cmark_ispunct(input->data[i+1]))
- i += 2;
- else if (input->data[i] == '(') {
- ++nb_p;
- ++i;
- } else if (input->data[i] == ')') {
- if (nb_p == 0)
- break;
- --nb_p;
- ++i;
- } else if (cmark_isspace(input->data[i]))
- break;
- else
- ++i;
- }
+ return manual_scan_link_url_2(input, offset, output);
}
if (i >= input->len)
return -1;
+
+ {
+ cmark_chunk result = {input->data + offset + 1, i - 2 - offset, 0};
+ *output = result;
+ }
return i - offset;
}
+
// Return a link, an image, or a literal close bracket.
static cmark_node *handle_close_bracket(subject *subj) {
bufsize_t initial_pos, after_link_text_pos;
- bufsize_t starturl, endurl, starttitle, endtitle, endall;
- bufsize_t n;
- bufsize_t sps;
+ bufsize_t endurl, starttitle, endtitle, endall;
+ bufsize_t sps, n;
cmark_reference *ref = NULL;
cmark_chunk url_chunk, title_chunk;
cmark_chunk url, title;
@@ -907,11 +924,10 @@ static cmark_node *handle_close_bracket(subject *subj) {
// First, look for an inline link.
if (peek_char(subj) == '(' &&
((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) &&
- ((n = manual_scan_link_url(&subj->input, subj->pos + 1 + sps)) > -1)) {
+ ((n = manual_scan_link_url(&subj->input, subj->pos + 1 + sps, &url_chunk)) > -1)) {
// try to parse an explicit link:
- starturl = subj->pos + 1 + sps; // after (
- endurl = starturl + n;
+ endurl = subj->pos + 1 + sps + n;
starttitle = endurl + scan_spacechars(&subj->input, endurl);
// ensure there are spaces btw url and title
@@ -924,7 +940,6 @@ static cmark_node *handle_close_bracket(subject *subj) {
if (peek_at(subj, endall) == ')') {
subj->pos = endall + 1;
- url_chunk = cmark_chunk_dup(&subj->input, starturl, endurl - starturl);
title_chunk =
cmark_chunk_dup(&subj->input, starttitle, endtitle - starttitle);
url = cmark_clean_url(subj->mem, &url_chunk);
@@ -1213,9 +1228,8 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input,
// parse link url:
spnl(&subj);
- matchlen = manual_scan_link_url(&subj.input, subj.pos);
- if (matchlen > 0) {
- url = cmark_chunk_dup(&subj.input, subj.pos, matchlen);
+ if ((matchlen = manual_scan_link_url(&subj.input, subj.pos, &url)) > -1 &&
+ url.len > 0) {
subj.pos += matchlen;
} else {
return 0;