From c28af79329264a7cf331a1b1c414919e4ed9e9f9 Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Tue, 2 Sep 2014 13:37:34 +0200 Subject: It buiiiilds --- src/html/houdini_href_e.c | 115 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 src/html/houdini_href_e.c (limited to 'src/html/houdini_href_e.c') diff --git a/src/html/houdini_href_e.c b/src/html/houdini_href_e.c new file mode 100644 index 0000000..59fe850 --- /dev/null +++ b/src/html/houdini_href_e.c @@ -0,0 +1,115 @@ +#include +#include +#include + +#include "html/houdini.h" + +/* + * The following characters will not be escaped: + * + * -_.+!*'(),%#@?=;:/,+&$ alphanum + * + * Note that this character set is the addition of: + * + * - The characters which are safe to be in an URL + * - The characters which are *not* safe to be in + * an URL because they are RESERVED characters. + * + * We asume (lazily) that any RESERVED char that + * appears inside an URL is actually meant to + * have its native function (i.e. as an URL + * component/separator) and hence needs no escaping. + * + * There are two exceptions: the chacters & (amp) + * and ' (single quote) do not appear in the table. + * They are meant to appear in the URL as components, + * yet they require special HTML-entity escaping + * to generate valid HTML markup. + * + * All other characters will be escaped to %XX. + * + */ +static const char HREF_SAFE[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +int +houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size) +{ + static const uint8_t hex_chars[] = "0123456789ABCDEF"; + size_t i = 0, org; + uint8_t hex_str[3]; + + hex_str[0] = '%'; + + while (i < size) { + org = i; + while (i < size && HREF_SAFE[src[i]] != 0) + i++; + + if (likely(i > org)) { + if (unlikely(org == 0)) { + if (i >= size) + return 0; + + gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size)); + } + + gh_buf_put(ob, src + org, i - org); + } + + /* escaping */ + if (i >= size) + break; + + switch (src[i]) { + /* amp appears all the time in URLs, but needs + * HTML-entity escaping to be inside an href */ + case '&': + gh_buf_puts(ob, "&"); + break; + + /* the single quote is a valid URL character + * according to the standard; it needs HTML + * entity escaping too */ + case '\'': + gh_buf_puts(ob, "'"); + break; + + /* the space can be escaped to %20 or a plus + * sign. we're going with the generic escape + * for now. the plus thing is more commonly seen + * when building GET strings */ +#if 0 + case ' ': + gh_buf_putc(ob, '+'); + break; +#endif + + /* every other character goes with a %XX escaping */ + default: + hex_str[1] = hex_chars[(src[i] >> 4) & 0xF]; + hex_str[2] = hex_chars[src[i] & 0xF]; + gh_buf_put(ob, hex_str, 3); + } + + i++; + } + + return 1; +} -- cgit v1.2.3 From a7314deae649646f1f7ce5ede972641b5b62538c Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Wed, 3 Sep 2014 03:40:23 +0200 Subject: 338/103 --- src/html/houdini_href_e.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'src/html/houdini_href_e.c') diff --git a/src/html/houdini_href_e.c b/src/html/houdini_href_e.c index 59fe850..b2a7d79 100644 --- a/src/html/houdini_href_e.c +++ b/src/html/houdini_href_e.c @@ -62,16 +62,8 @@ houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size) while (i < size && HREF_SAFE[src[i]] != 0) i++; - if (likely(i > org)) { - if (unlikely(org == 0)) { - if (i >= size) - return 0; - - gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size)); - } - + if (likely(i > org)) gh_buf_put(ob, src + org, i - org); - } /* escaping */ if (i >= size) -- cgit v1.2.3 From 543c2c94d71adee42c7bd2f8027d75c87ed8120d Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Thu, 4 Sep 2014 18:38:14 +0200 Subject: Rename to strbuf --- src/html/houdini_href_e.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src/html/houdini_href_e.c') diff --git a/src/html/houdini_href_e.c b/src/html/houdini_href_e.c index b2a7d79..12456ce 100644 --- a/src/html/houdini_href_e.c +++ b/src/html/houdini_href_e.c @@ -49,7 +49,7 @@ static const char HREF_SAFE[] = { }; int -houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size) +houdini_escape_href(strbuf *ob, const uint8_t *src, size_t size) { static const uint8_t hex_chars[] = "0123456789ABCDEF"; size_t i = 0, org; @@ -63,7 +63,7 @@ houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size) i++; if (likely(i > org)) - gh_buf_put(ob, src + org, i - org); + strbuf_put(ob, src + org, i - org); /* escaping */ if (i >= size) @@ -73,14 +73,14 @@ houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size) /* amp appears all the time in URLs, but needs * HTML-entity escaping to be inside an href */ case '&': - gh_buf_puts(ob, "&"); + strbuf_puts(ob, "&"); break; /* the single quote is a valid URL character * according to the standard; it needs HTML * entity escaping too */ case '\'': - gh_buf_puts(ob, "'"); + strbuf_puts(ob, "'"); break; /* the space can be escaped to %20 or a plus @@ -89,7 +89,7 @@ houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size) * when building GET strings */ #if 0 case ' ': - gh_buf_putc(ob, '+'); + strbuf_putc(ob, '+'); break; #endif @@ -97,7 +97,7 @@ houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size) default: hex_str[1] = hex_chars[(src[i] >> 4) & 0xF]; hex_str[2] = hex_chars[src[i] & 0xF]; - gh_buf_put(ob, hex_str, 3); + strbuf_put(ob, hex_str, 3); } i++; -- cgit v1.2.3