diff options
-rw-r--r-- | runtests.pl | 3 | ||||
-rw-r--r-- | spec.txt | 6 | ||||
-rw-r--r-- | src/html/html.c | 22 | ||||
-rw-r--r-- | src/inlines.c | 105 |
4 files changed, 69 insertions, 67 deletions
diff --git a/runtests.pl b/runtests.pl index 2e2b795..e53938d 100644 --- a/runtests.pl +++ b/runtests.pl @@ -49,6 +49,7 @@ sub tidy s/ */ /; # collapse space before /> in tag s/ *\/>/\/>/; + s/>\n$/>/; # skip blank line if (/^$/) { next; @@ -89,8 +90,10 @@ sub dotest print $markdown; print "=== expected ===============\n"; print $html; + print "\n"; print "=== got ====================\n"; print $actual; + print "\n"; print color "black"; return 0; } @@ -1682,7 +1682,7 @@ them. [Foo bar] . -<p><a href="my url" title="title">Foo bar</a></p> +<p><a href="my%20url" title="title">Foo bar</a></p> . The title may be omitted: @@ -1745,7 +1745,7 @@ case-insensitive (see [matches](#matches)). [αγω] . -<p><a href="/φου">αγω</a></p> +<p><a href="/%CF%86%CE%BF%CF%85">αγω</a></p> . Here is a link reference definition with no corresponding link. @@ -3688,7 +3688,7 @@ raw HTML: . <http://google.com?find=\*> . -<p><a href="http://google.com?find=\*">http://google.com?find=\*</a></p> +<p><a href="http://google.com?find=%5C*">http://google.com?find=\*</a></p> . . diff --git a/src/html/html.c b/src/html/html.c index 2a65a63..cdccf2a 100644 --- a/src/html/html.c +++ b/src/html/html.c @@ -50,17 +50,15 @@ void blocks_to_html(gh_buf *html, block *b, bool tight) cr(html); gh_buf_puts(html, "<p>"); inlines_to_html(html, b->inline_content); - gh_buf_puts(html, "</p>"); - cr(html); + gh_buf_puts(html, "</p>\n"); } break; case block_quote: cr(html); - gh_buf_puts(html, "<blockquote>"); + gh_buf_puts(html, "<blockquote>\n"); blocks_to_html(html, b->children, false); - gh_buf_puts(html, "</blockquote>"); - cr(html); + gh_buf_puts(html, "</blockquote>\n"); break; case list_item: @@ -68,8 +66,7 @@ void blocks_to_html(gh_buf *html, block *b, bool tight) gh_buf_puts(html, "<li>"); blocks_to_html(html, b->children, tight); gh_buf_trim(html); /* TODO: rtrim */ - gh_buf_puts(html, "</li>"); - cr(html); + gh_buf_puts(html, "</li>\n"); break; case list: @@ -87,7 +84,7 @@ void blocks_to_html(gh_buf *html, block *b, bool tight) blocks_to_html(html, b->children, data->tight); gh_buf_puts(html, data->list_type == bullet ? "</ul>" : "</ol>"); - cr(html); + gh_buf_putc(html, '\n'); break; case atx_header: @@ -95,8 +92,7 @@ void blocks_to_html(gh_buf *html, block *b, bool tight) cr(html); gh_buf_printf(html, "<h%d>", b->attributes.header_level); inlines_to_html(html, b->inline_content); - gh_buf_printf(html, "</h%d>", b->attributes.header_level); - cr(html); + gh_buf_printf(html, "</h%d>\n", b->attributes.header_level); break; case indented_code: @@ -122,8 +118,7 @@ void blocks_to_html(gh_buf *html, block *b, bool tight) gh_buf_puts(html, "><code>"); escape_html(html, b->string_content.ptr, b->string_content.size); - gh_buf_puts(html, "</code></pre>"); - cr(html); + gh_buf_puts(html, "</code></pre>\n"); break; case html_block: @@ -131,8 +126,7 @@ void blocks_to_html(gh_buf *html, block *b, bool tight) break; case hrule: - gh_buf_puts(html, "<hr />"); - cr(html); + gh_buf_puts(html, "<hr />\n"); break; case reference_def: diff --git a/src/inlines.c b/src/inlines.c index ced4673..a0dcac9 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -1,8 +1,8 @@ #include <stdlib.h> +#include <string.h> #include <stdio.h> #include <stdbool.h> #include <ctype.h> -#include <string.h> #include "stmd.h" #include "uthash.h" @@ -18,7 +18,7 @@ typedef struct Subject { reference* lookup_reference(reference** refmap, chunk *label); reference* make_reference(chunk *label, chunk *url, chunk *title); -static unsigned char *clean_url(chunk *url); +static unsigned char *clean_url(chunk *url, int is_email); static unsigned char *clean_title(chunk *title); inline static unsigned char *chunk_to_cstr(chunk *c); @@ -97,7 +97,7 @@ extern reference* make_reference(chunk *label, chunk *url, chunk *title) reference *ref; ref = malloc(sizeof(reference)); ref->label = normalize_reference(label); - ref->url = clean_url(url); + ref->url = clean_url(url, 0); ref->title = clean_title(title); return ref; } @@ -116,14 +116,25 @@ extern void add_reference(reference** refmap, reference* ref) } } +inline static inl* make_link_from_reference(inl* label, reference *ref) +{ + inl* e = (inl*) malloc(sizeof(inl)); + e->tag = INL_LINK; + e->content.linkable.label = label; + e->content.linkable.url = strdup(ref->url); + e->content.linkable.title = ref->title ? strdup(ref->title) : NULL; + e->next = NULL; + return e; +} + // Create an inline with a linkable string value. -inline static inl* make_linkable(int t, inl* label, chunk url, chunk title) +inline static inl* make_link(inl* label, chunk url, chunk title, int is_email) { inl* e = (inl*) malloc(sizeof(inl)); - e->tag = t; + e->tag = INL_LINK; e->content.linkable.label = label; - e->content.linkable.url = chunk_to_cstr(&url); - e->content.linkable.title = title.len ? chunk_to_cstr(&title) : NULL; + e->content.linkable.url = clean_url(&url, is_email); + e->content.linkable.title = clean_title(&title); e->next = NULL; return e; } @@ -163,7 +174,6 @@ inline static inl* make_simple(int t) #define make_entity(s) make_literal(INL_ENTITY, s) #define make_linebreak() make_simple(INL_LINEBREAK) #define make_softbreak() make_simple(INL_SOFTBREAK) -#define make_link(label, url, title) make_linkable(INL_LINK, label, url, title) #define make_emph(contents) make_inlines(INL_EMPH, contents) #define make_strong(contents) make_inlines(INL_STRONG, contents) @@ -309,37 +319,27 @@ static int scan_to_closing_backticks(subject* subj, int openticklength) // space and newline characters into a single space. static void normalize_whitespace(gh_buf *s) { - /* TODO */ -#if 0 bool last_char_was_space = false; - int pos = 0; - char c; - while ((c = gh_buf_at(s, pos))) { - switch (c) { - case ' ': - if (last_char_was_space) { - bdelete(s, pos, 1); - } else { - pos++; - } - last_char_was_space = true; - break; - case '\n': - if (last_char_was_space) { - bdelete(s, pos, 1); - } else { - bdelete(s, pos, 1); - binsertch(s, pos, 1, ' '); - pos++; - } - last_char_was_space = true; + int r, w; + + for (r = 0, w = 0; r < s->size; ++r) { + switch (s->ptr[r]) { + case ' ': + case '\n': + if (last_char_was_space) break; - default: - pos++; - last_char_was_space = false; + + s->ptr[w++] = ' '; + last_char_was_space = true; + break; + + default: + s->ptr[w++] = s->ptr[r]; + last_char_was_space = false; } } -#endif + + gh_buf_truncate(s, w); } // Parse backtick code section or raw backticks, return an inline. @@ -593,16 +593,19 @@ extern void unescape_buffer(gh_buf *buf) // Clean a URL: remove surrounding whitespace and surrounding <>, // and remove \ that escape punctuation. -static unsigned char *clean_url(chunk *url) +static unsigned char *clean_url(chunk *url, int is_email) { gh_buf buf = GH_BUF_INIT; chunk_trim(url); + if (is_email) + gh_buf_puts(&buf, "mailto:"); + if (url->data[0] == '<' && url->data[url->len - 1] == '>') { - gh_buf_set(&buf, url->data + 1, url->len - 2); + gh_buf_put(&buf, url->data + 1, url->len - 2); } else { - gh_buf_set(&buf, url->data, url->len); + gh_buf_put(&buf, url->data, url->len); } unescape_buffer(&buf); @@ -613,8 +616,13 @@ static unsigned char *clean_url(chunk *url) static unsigned char *clean_title(chunk *title) { gh_buf buf = GH_BUF_INIT; - unsigned char first = title->data[0]; - unsigned char last = title->data[title->len - 1]; + unsigned char first, last; + + if (title->len == 0) + return NULL; + + first = title->data[0]; + last = title->data[title->len - 1]; // remove surrounding quotes if any: if ((first == '\'' && last == '\'') || @@ -647,25 +655,22 @@ static inl* handle_pointy_brace(subject* subj) return make_link( make_str_with_entities(&contents), contents, - chunk_literal("") + chunk_literal(""), + 0 ); } // next try to match an email autolink matchlen = scan_autolink_email(&subj->input, subj->pos); if (matchlen > 0) { - gh_buf mail_url = GH_BUF_INIT; - contents = chunk_dup(&subj->input, subj->pos, matchlen - 1); subj->pos += matchlen; - gh_buf_puts(&mail_url, "mailto:"); - gh_buf_put(&mail_url, contents.data, contents.len); - return make_link( make_str_with_entities(&contents), - chunk_buf_detach(&mail_url), - chunk_literal("") + contents, + chunk_literal(""), + 1 ); } @@ -790,7 +795,7 @@ static inl* handle_left_bracket(subject* subj) title = chunk_dup(&subj->input, starttitle, endtitle - starttitle); lab = parse_chunk_inlines(&rawlabel, NULL); - return make_link(lab, url, title); + return make_link(lab, url, title, 0); } else { // if we get here, we matched a label but didn't get further: subj->pos = endlabel; @@ -821,7 +826,7 @@ static inl* handle_left_bracket(subject* subj) ref = lookup_reference(subj->reference_map, &reflabel); if (ref != NULL) { // found lab = parse_chunk_inlines(&rawlabel, NULL); - result = make_link(lab, chunk_literal(ref->url), chunk_literal(ref->title)); + result = make_link_from_reference(lab, ref); } else { subj->pos = endlabel; lab = parse_chunk_inlines(&rawlabel, subj->reference_map); |