summaryrefslogtreecommitdiff
path: root/src/houdini_html_u.c
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2015-06-16 17:28:53 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2015-06-16 17:33:48 -0700
commit7e7819e05ec91bc7ca7859d119f1274cf3a83913 (patch)
treee10c78ca2bbf0033b381cdc27076ada30f38b39c /src/houdini_html_u.c
parent54c087d1272b4ce756e56de68e8e6dfac6d159fc (diff)
Simpler approach for entity lookup.
We dispense with the hashes and just do string comparsions. Since the array is in order, we can search intelligently and should never need to do more than 8 or so comparisons. This reduces binary size even further, at a small cost in performance. (This shouldn't matter too much, as it's only detectable in really entity-heavy sources.)
Diffstat (limited to 'src/houdini_html_u.c')
-rw-r--r--src/houdini_html_u.c39
1 files changed, 15 insertions, 24 deletions
diff --git a/src/houdini_html_u.c b/src/houdini_html_u.c
index 51e3565..e4cf8fe 100644
--- a/src/houdini_html_u.c
+++ b/src/houdini_html_u.c
@@ -9,39 +9,30 @@
/* Binary tree lookup code for entities added by JGM */
-static unsigned long
-S_hash(const unsigned char *str, int len)
-{
- unsigned long hash = 5381;
- int i;
-
- for (i = 0; i < len; i++) {
- hash = (((hash << 5) + hash) + str[i]) & 0xFFFFFFFF; /* hash * 33 + c */
- }
-
- return hash;
-}
-
static unsigned char *
-S_lookup(int i, unsigned long key)
+S_lookup(int i, int low, int hi, const unsigned char *s, int len)
{
- if (cmark_entities[i].value == key) {
- return cmark_entities[i].bytes;
+ int j;
+ int cmp = strncmp((char *)s, (char *)cmark_entities[i].entity, len);
+ if (cmp == 0 && cmark_entities[i].entity[len] == 0) {
+ return (unsigned char *)cmark_entities[i].bytes;
+ } else if (cmp < 0 && i > low) {
+ j = i - ((i - low) / 2);
+ if (j == i) j -= 1;
+ return S_lookup(j, low, i - 1, s, len);
+ } else if (cmp > 0 && i < hi) {
+ j = i + ((hi - i) / 2);
+ if (j == i) j += 1;
+ return S_lookup(j, i + 1, hi, s, len);
} else {
- int next = key < cmark_entities[i].value ?
- cmark_entities[i].less : cmark_entities[i].greater;
- if (next == -1) { // leaf node
- return NULL;
- } else {
- return S_lookup(next, key);
- }
+ return NULL;
}
}
static unsigned char *
S_lookup_entity(const unsigned char *s, int len)
{
- return S_lookup(cmark_entities_root, S_hash(s, len));
+ return S_lookup(CMARK_NUM_ENTITIES / 2, 0, CMARK_NUM_ENTITIES - 1, s, len);
}
bufsize_t