From 27bd6c0b18318a9c43801409bbababf2ceb6302e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 4 Dec 2014 23:13:54 -0800 Subject: Moved source files from src/html into src. The separate directory presents problems for some simple extension building systems, like luarocks. --- src/html/houdini_href_e.c | 107 ---------------------------------------------- 1 file changed, 107 deletions(-) delete mode 100644 src/html/houdini_href_e.c (limited to 'src/html/houdini_href_e.c') diff --git a/src/html/houdini_href_e.c b/src/html/houdini_href_e.c deleted file mode 100644 index 12456ce..0000000 --- a/src/html/houdini_href_e.c +++ /dev/null @@ -1,107 +0,0 @@ -#include -#include -#include - -#include "html/houdini.h" - -/* - * The following characters will not be escaped: - * - * -_.+!*'(),%#@?=;:/,+&$ alphanum - * - * Note that this character set is the addition of: - * - * - The characters which are safe to be in an URL - * - The characters which are *not* safe to be in - * an URL because they are RESERVED characters. - * - * We asume (lazily) that any RESERVED char that - * appears inside an URL is actually meant to - * have its native function (i.e. as an URL - * component/separator) and hence needs no escaping. - * - * There are two exceptions: the chacters & (amp) - * and ' (single quote) do not appear in the table. - * They are meant to appear in the URL as components, - * yet they require special HTML-entity escaping - * to generate valid HTML markup. - * - * All other characters will be escaped to %XX. - * - */ -static const char HREF_SAFE[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -int -houdini_escape_href(strbuf *ob, const uint8_t *src, size_t size) -{ - static const uint8_t hex_chars[] = "0123456789ABCDEF"; - size_t i = 0, org; - uint8_t hex_str[3]; - - hex_str[0] = '%'; - - while (i < size) { - org = i; - while (i < size && HREF_SAFE[src[i]] != 0) - i++; - - if (likely(i > org)) - strbuf_put(ob, src + org, i - org); - - /* escaping */ - if (i >= size) - break; - - switch (src[i]) { - /* amp appears all the time in URLs, but needs - * HTML-entity escaping to be inside an href */ - case '&': - strbuf_puts(ob, "&"); - break; - - /* the single quote is a valid URL character - * according to the standard; it needs HTML - * entity escaping too */ - case '\'': - strbuf_puts(ob, "'"); - break; - - /* the space can be escaped to %20 or a plus - * sign. we're going with the generic escape - * for now. the plus thing is more commonly seen - * when building GET strings */ -#if 0 - case ' ': - strbuf_putc(ob, '+'); - break; -#endif - - /* every other character goes with a %XX escaping */ - default: - hex_str[1] = hex_chars[(src[i] >> 4) & 0xF]; - hex_str[2] = hex_chars[src[i] & 0xF]; - strbuf_put(ob, hex_str, 3); - } - - i++; - } - - return 1; -} -- cgit v1.2.3