From 870e63be7360b5a0097a27656048e853bc720464 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 21 Jul 2014 22:29:16 -0700 Subject: Initial commit --- src/blocks.c | 747 ++++++++++++ src/bstrlib.c | 2979 ++++++++++++++++++++++++++++++++++++++++++++++++ src/bstrlib.h | 304 +++++ src/case_fold_switch.c | 2637 ++++++++++++++++++++++++++++++++++++++++++ src/casefold.c | 2699 +++++++++++++++++++++++++++++++++++++++++++ src/debug.h | 36 + src/detab.c | 48 + src/getopt.c | 199 ++++ src/html.c | 276 +++++ src/inlines.c | 998 ++++++++++++++++ src/main.c | 102 ++ src/print.c | 168 +++ src/scanners.h | 15 + src/scanners.re | 238 ++++ src/stmd.h | 121 ++ src/utf8.c | 106 ++ src/utf8.h | 6 + src/uthash.h | 948 +++++++++++++++ 18 files changed, 12627 insertions(+) create mode 100644 src/blocks.c create mode 100644 src/bstrlib.c create mode 100644 src/bstrlib.h create mode 100644 src/case_fold_switch.c create mode 100644 src/casefold.c create mode 100644 src/debug.h create mode 100644 src/detab.c create mode 100644 src/getopt.c create mode 100644 src/html.c create mode 100644 src/inlines.c create mode 100644 src/main.c create mode 100644 src/print.c create mode 100644 src/scanners.h create mode 100644 src/scanners.re create mode 100644 src/stmd.h create mode 100644 src/utf8.c create mode 100644 src/utf8.h create mode 100644 src/uthash.h (limited to 'src') diff --git a/src/blocks.c b/src/blocks.c new file mode 100644 index 0000000..2776231 --- /dev/null +++ b/src/blocks.c @@ -0,0 +1,747 @@ +#include +#include +#include +#include +#include "bstrlib.h" +#include "stmd.h" +#include "uthash.h" +#include "debug.h" +#include "scanners.h" + +static block* make_block(int tag, int start_line, int start_column) +{ + block* e; + e = (block*) malloc(sizeof(block)); + e->tag = tag; + e->open = true; + e->last_line_blank = false; + e->start_line = start_line; + e->start_column = start_column; + e->end_line = start_line; + e->children = NULL; + e->last_child = NULL; + e->parent = NULL; + e->top = NULL; + e->attributes.refmap = NULL; + e->string_content = bfromcstr(""); + e->inline_content = NULL; + e->next = NULL; + e->prev = NULL; + return e; +} + +// Create a root document block. +extern block* make_document() +{ + block * e = make_block(document, 1, 1); + reference * map = NULL; + reference ** refmap; + refmap = (reference**) malloc(sizeof(reference*)); + *refmap = map; + e->attributes.refmap = refmap; + e->top = e; + return e; +} + +// Returns true if line has only space characters, else false. +bool is_blank(bstring s, int offset) +{ + char c; + while ((c = bchar(s, offset))) { + if (c == '\n') { + return true; + } else if (c == ' ') { + offset++; + } else { + return false; + } + } + return true; +} + +static inline bool can_contain(int parent_type, int child_type) +{ + return ( parent_type == document || + parent_type == block_quote || + parent_type == list_item || + (parent_type == list && child_type == list_item) ); +} + +static inline bool accepts_lines(int block_type) +{ + return (block_type == paragraph || + block_type == atx_header || + block_type == indented_code || + block_type == fenced_code); +} + +static int add_line(block* block, bstring ln, int offset) +{ + bstring s = bmidstr(ln, offset, blength(ln) - offset); + check(block->open, "attempted to add line (%s) to closed container (%d)", + ln->data, block->tag); + check(bformata(block->string_content, "%s", s->data) == 0, + "could not append line to string_content"); + bdestroy(s); + return 0; + error: + return -1; +} + +static int remove_trailing_blank_lines(bstring ln) +{ + bstring tofind = bfromcstr(" \t\r\n"); + int pos; + // find last nonspace: + pos = bninchrr(ln, blength(ln) - 1, tofind); + if (pos == BSTR_ERR) { // all spaces + bassigncstr(ln, ""); + } else { + // find next newline after it + pos = bstrchrp(ln, '\n', pos); + if (pos != BSTR_ERR) { + check(bdelete(ln, pos, blength(ln) - pos) != BSTR_ERR, + "failed to delete trailing blank lines"); + } + } + bdestroy(tofind); + return 0; + error: + return -1; +} + +// Check to see if a block ends with a blank line, descending +// if needed into lists and sublists. +static bool ends_with_blank_line(block* block) +{ + if (block->last_line_blank) { + return true; + } + if ((block->tag == list || block->tag == list_item) && block->last_child) { + return ends_with_blank_line(block->last_child); + } else { + return false; + } +} + +// Break out of all containing lists +static int break_out_of_lists(block ** bptr, int line_number) +{ + block * container = *bptr; + block * b = container->top; + // find first containing list: + while (b && b->tag != list) { + b = b->last_child; + } + if (b) { + while (container && container != b) { + finalize(container, line_number); + container = container->parent; + } + finalize(b, line_number); + *bptr = b->parent; + } + return 0; +} + + +extern int finalize(block* b, int line_number) +{ + int firstlinelen; + int pos; + block* item; + block* subitem; + + check(b != NULL, "finalize called on null block"); + if (!b->open) { + return 0; // don't do anything if the block is already closed + } + b->open = false; + if (line_number > b->start_line) { + b->end_line = line_number - 1; + } else { + b->end_line = line_number; + } + + switch (b->tag) { + + case paragraph: + pos = 0; + while (bchar(b->string_content, 0) == '[' && + (pos = parse_reference(b->string_content, + b->top->attributes.refmap))) { + bdelete(b->string_content, 0, pos); + } + if (is_blank(b->string_content, 0)) { + b->tag = reference_def; + } + break; + + case indented_code: + remove_trailing_blank_lines(b->string_content); + bformata(b->string_content, "\n"); + break; + + case fenced_code: + // first line of contents becomes info + firstlinelen = bstrchr(b->string_content, '\n'); + b->attributes.fenced_code_data.info = + bmidstr(b->string_content, 0, firstlinelen); + bdelete(b->string_content, 0, firstlinelen + 1); // +1 for \n + btrimws(b->attributes.fenced_code_data.info); + unescape(b->attributes.fenced_code_data.info); + break; + + case list: // determine tight/loose status + b->attributes.list_data.tight = true; // tight by default + item = b->children; + + while (item) { + // check for non-final non-empty list item ending with blank line: + if (item->last_line_blank && item->next) { + b->attributes.list_data.tight = false; + break; + } + // recurse into children of list item, to see if there are + // spaces between them: + subitem = item->children; + while (subitem) { + if (ends_with_blank_line(subitem) && + (item->next || subitem->next)) { + b->attributes.list_data.tight = false; + break; + } + subitem = subitem->next; + } + if (!(b->attributes.list_data.tight)) { + break; + } + item = item->next; + } + + break; + + default: + break; + } + + return 0; + error: + return -1; +} + +// Add a block as child of another. Return pointer to child. +extern block* add_child(block* parent, + int block_type, int start_line, int start_column) +{ + // if 'parent' isn't the kind of block that can accept this child, + // then back up til we hit a block that can. + while (!can_contain(parent->tag, block_type)) { + finalize(parent, start_line); + parent = parent->parent; + } + + check(parent != NULL, "parent container cannot accept children"); + + block* child = make_block(block_type, start_line, start_column); + child->parent = parent; + child->top = parent->top; + + if (parent->last_child) { + parent->last_child->next = child; + child->prev = parent->last_child; + } else { + parent->children = child; + child->prev = NULL; + } + parent->last_child = child; + return child; + error: + return NULL; +} + +// Free a block list and any children. +extern void free_blocks(block* e) +{ + block * next; + while (e != NULL) { + next = e->next; + free_inlines(e->inline_content); + bdestroy(e->string_content); + if (e->tag == fenced_code) { + bdestroy(e->attributes.fenced_code_data.info); + } else if (e->tag == document) { + free_reference_map(e->attributes.refmap); + } + free_blocks(e->children); + free(e); + e = next; + } +} + +// Walk through block and all children, recursively, parsing +// string content into inline content where appropriate. +int process_inlines(block* cur, reference** refmap) +{ + switch (cur->tag) { + + case paragraph: + case atx_header: + case setext_header: + check(cur->string_content != NULL, "string_content is NULL"); + cur->inline_content = parse_inlines(cur->string_content, refmap); + bdestroy(cur->string_content); + cur->string_content = NULL; + break; + + default: + break; + } + + block * child = cur->children; + while (child != NULL) { + process_inlines(child, refmap); + child = child->next; + } + + return 0; + error: + return -1; +} + +// Attempts to parse a list item marker (bullet or enumerated). +// On success, returns length of the marker, and populates +// data with the details. On failure, returns 0. +static int parse_list_marker(bstring ln, int pos, + struct ListData ** dataptr) +{ + char c; + int startpos; + int start = 1; + struct ListData * data; + + startpos = pos; + c = bchar(ln, pos); + + if ((c == '*' || c == '-' || c == '+') && !scan_hrule(ln, pos)) { + pos++; + if (!isspace(bchar(ln, pos))) { + return 0; + } + data = malloc(sizeof(struct ListData)); + data->marker_offset = 0; // will be adjusted later + data->list_type = bullet; + data->bullet_char = c; + data->start = 1; + data->delimiter = period; + data->tight = false; + + } else if (isdigit(c)) { + + pos++; + while (isdigit(bchar(ln, pos))) { + pos++; + } + + if (!sscanf((char *) ln->data + startpos, "%d", &start)) { + log_err("sscanf failed"); + return 0; + } + + c = bchar(ln, pos); + if (c == '.' || c == ')') { + pos++; + if (!isspace(bchar(ln, pos))) { + return 0; + } + data = malloc(sizeof(struct ListData)); + data->marker_offset = 0; // will be adjusted later + data->list_type = ordered; + data->bullet_char = 0; + data->start = start; + data->delimiter = (c == '.' ? period : parens); + data->tight = false; + } else { + return 0; + } + + } else { + return 0; + } + + *dataptr = data; + return (pos - startpos); +} + +// Return 1 if list item belongs in list, else 0. +static int lists_match(struct ListData list_data, + struct ListData item_data) +{ + return (list_data.list_type == item_data.list_type && + list_data.delimiter == item_data.delimiter && + // list_data.marker_offset == item_data.marker_offset && + list_data.bullet_char == item_data.bullet_char); +} + +// Process one line at a time, modifying a block. +// Returns 0 if successful. curptr is changed to point to +// the currently open block. +extern int incorporate_line(bstring ln, int line_number, block** curptr) +{ + block* last_matched_container; + int offset = 0; + int matched = 0; + int lev = 0; + int i; + struct ListData * data = NULL; + bool all_matched = true; + block* container; + block* cur = *curptr; + bool blank = false; + int first_nonspace; + int indent; + + // detab input line + check(bdetab(ln, 1) != BSTR_ERR, + "invalid UTF-8 sequence in line %d\n", line_number); + + // container starts at the document root. + container = cur->top; + + // for each containing block, try to parse the associated line start. + // bail out on failure: container will point to the last matching block. + + while (container->last_child && container->last_child->open) { + container = container->last_child; + + first_nonspace = offset; + while (bchar(ln, first_nonspace) == ' ') { + first_nonspace++; + } + + indent = first_nonspace - offset; + blank = bchar(ln, first_nonspace) == '\n'; + + if (container->tag == block_quote) { + + matched = indent <= 3 && bchar(ln, first_nonspace) == '>'; + if (matched) { + offset = first_nonspace + 1; + if (bchar(ln, offset) == ' ') { + offset++; + } + } else { + all_matched = false; + } + + } else if (container->tag == list_item) { + + if (indent >= container->attributes.list_data.marker_offset + + container->attributes.list_data.padding) { + offset += container->attributes.list_data.marker_offset + + container->attributes.list_data.padding; + } else if (blank) { + offset = first_nonspace; + } else { + all_matched = false; + } + + } else if (container->tag == indented_code) { + + if (indent >= CODE_INDENT) { + offset += CODE_INDENT; + } else if (blank) { + offset = first_nonspace; + } else { + all_matched = false; + } + + } else if (container->tag == atx_header || + container->tag == setext_header) { + + // a header can never contain more than one line + all_matched = false; + + } else if (container->tag == fenced_code) { + + // skip optional spaces of fence offset + i = container->attributes.fenced_code_data.fence_offset; + while (i > 0 && bchar(ln, offset) == ' ') { + offset++; + i--; + } + + } else if (container->tag == html_block) { + + if (blank) { + all_matched = false; + } + + } else if (container->tag == paragraph) { + + if (blank) { + container->last_line_blank =true; + all_matched = false; + } + + } + + if (!all_matched) { + container = container->parent; // back up to last matching block + break; + } + } + + last_matched_container = container; + + // check to see if we've hit 2nd blank line, break out of list: + if (blank && container->last_line_blank) { + break_out_of_lists(&container, line_number); + } + + // unless last matched container is code block, try new container starts: + while (container->tag != fenced_code && container->tag != indented_code && + container->tag != html_block) { + + first_nonspace = offset; + while (bchar(ln, first_nonspace) == ' ') { + first_nonspace++; + } + + indent = first_nonspace - offset; + blank = bchar(ln, first_nonspace) == '\n'; + + if (indent >= CODE_INDENT) { + + if (cur->tag != paragraph && !blank) { + offset += CODE_INDENT; + container = add_child(container, indented_code, line_number, offset + 1); + } else { // indent > 4 in lazy line + break; + } + + } else if (bchar(ln, first_nonspace) == '>') { + + offset = first_nonspace + 1; + // optional following character + if (bchar(ln, offset) == ' ') { + offset++; + } + container = add_child(container, block_quote, line_number, offset + 1); + + } else if ((matched = scan_atx_header_start(ln, first_nonspace))) { + + offset = first_nonspace + matched; + container = add_child(container, atx_header, line_number, offset + 1); + int hashpos = bstrchrp(ln, '#', first_nonspace); + check(hashpos != BSTR_ERR, "no # found in atx header start"); + int level = 0; + while (bchar(ln, hashpos) == '#') { + level++; + hashpos++; + } + container->attributes.header_level = level; + + } else if ((matched = scan_open_code_fence(ln, first_nonspace))) { + + container = add_child(container, fenced_code, line_number, + first_nonspace + 1); + container->attributes.fenced_code_data.fence_char = bchar(ln, + first_nonspace); + container->attributes.fenced_code_data.fence_length = matched; + container->attributes.fenced_code_data.fence_offset = + first_nonspace - offset; + offset = first_nonspace + matched; + + } else if ((matched = scan_html_block_tag(ln, first_nonspace))) { + + container = add_child(container, html_block, line_number, + first_nonspace + 1); + // note, we don't adjust offset because the tag is part of the text + + } else if (container->tag == paragraph && + (lev = scan_setext_header_line(ln, first_nonspace)) && + // check that there is only one line in the paragraph: + bstrrchrp(container->string_content, '\n', + blength(container->string_content) - 2) == BSTR_ERR) { + + container->tag = setext_header; + container->attributes.header_level = lev; + offset = blength(ln) - 1; + + } else if (!(container->tag == paragraph && !all_matched) && + (matched = scan_hrule(ln, first_nonspace))) { + + // it's only now that we know the line is not part of a setext header: + container = add_child(container, hrule, line_number, first_nonspace + 1); + finalize(container, line_number); + container = container->parent; + offset = blength(ln) - 1; + + } else if ((matched = parse_list_marker(ln, first_nonspace, &data))) { + + // compute padding: + offset = first_nonspace + matched; + i = 0; + while (i <= 5 && bchar(ln, offset + i) == ' ') { + i++; + } + // i = number of spaces after marker, up to 5 + if (i >= 5 || i < 1 || bchar(ln, offset) == '\n') { + data->padding = matched + 1; + if (i > 0) { + offset += 1; + } + } else { + data->padding = matched + i; + offset += i; + } + + // check container; if it's a list, see if this list item + // can continue the list; otherwise, create a list container. + + data->marker_offset = indent; + + if (container->tag != list || + !lists_match(container->attributes.list_data, *data)) { + container = add_child(container, list, line_number, + first_nonspace + 1); + container->attributes.list_data = *data; + } + + // add the list item + container = add_child(container, list_item, line_number, + first_nonspace + 1); + container->attributes.list_data = *data; + free(data); + + } else { + break; + } + + if (accepts_lines(container->tag)) { + // if it's a line container, it can't contain other containers + break; + } + } + + // what remains at offset is a text line. add the text to the + // appropriate container. + + first_nonspace = offset; + while (bchar(ln, first_nonspace) == ' ') { + first_nonspace++; + } + + indent = first_nonspace - offset; + blank = bchar(ln, first_nonspace) == '\n'; + + // block quote lines are never blank as they start with > + // and we don't count blanks in fenced code for purposes of tight/loose + // lists or breaking out of lists. we also don't set last_line_blank + // on an empty list item. + container->last_line_blank = (blank && + container->tag != block_quote && + container->tag != fenced_code && + !(container->tag == list_item && + container->children == NULL && + container->start_line == line_number)); + + block *cont = container; + while (cont->parent) { + cont->parent->last_line_blank = false; + cont = cont->parent; + } + + if (cur != last_matched_container && + container == last_matched_container && + !blank && + cur->tag == paragraph && + blength(cur->string_content) > 0) { + + check(add_line(cur, ln, offset) == 0, "could not add line"); + + } else { // not a lazy continuation + + // finalize any blocks that were not matched and set cur to container: + while (cur != last_matched_container) { + + finalize(cur, line_number); + cur = cur->parent; + check(cur != NULL, "cur is NULL, last_matched_container->tag = %d", + last_matched_container->tag); + + } + + if (container->tag == indented_code) { + + check(add_line(container, ln, offset) == 0, "could not add line"); + + } else if (container->tag == fenced_code) { + + matched = (indent <= 3 + && bchar(ln, first_nonspace) == container->attributes.fenced_code_data.fence_char) + && scan_close_code_fence(ln, first_nonspace, + container->attributes.fenced_code_data.fence_length); + if (matched) { + // if closing fence, don't add line to container; instead, close it: + finalize(container, line_number); + container = container->parent; // back up to parent + } else { + check(add_line(container, ln, offset) == 0, "could not add line"); + } + + } else if (container->tag == html_block) { + + check(add_line(container, ln, offset) == 0, "could not add line"); + + } else if (blank) { + + // ??? do nothing + + } else if (container->tag == atx_header) { + + // chop off trailing ###s...use a scanner? + brtrimws(ln); + int p = blength(ln) - 1; + int numhashes = 0; + // if string ends in #s, remove these: + while (bchar(ln, p) == '#') { + p--; + numhashes++; + } + if (bchar(ln, p) == '\\') { + // the last # was escaped, so we include it. + p++; + numhashes--; + } + check(bdelete(ln, p + 1, numhashes) != BSTR_ERR, + "could not delete final hashes"); + check(add_line(container, ln, first_nonspace) == 0, "could not add line"); + finalize(container, line_number); + container = container->parent; + + } else if (accepts_lines(container->tag)) { + + check(add_line(container, ln, first_nonspace) == 0, "could not add line"); + + } else if (container->tag != hrule && container->tag != setext_header) { + + // create paragraph container for line + container = add_child(container, paragraph, line_number, first_nonspace + 1); + check(add_line(container, ln, first_nonspace) == 0, "could not add line"); + + } else { + + log_warn("Line %d with container type %d did not match any condition:\n\"%s\"", + line_number, container->tag, ln->data); + + } + *curptr = container; + } + + return 0; + error: + return -1; +} + diff --git a/src/bstrlib.c b/src/bstrlib.c new file mode 100644 index 0000000..1b19dbe --- /dev/null +++ b/src/bstrlib.c @@ -0,0 +1,2979 @@ +/* + * This source file is part of the bstring string library. This code was + * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause + * BSD open source license or GPL v2.0. Refer to the accompanying documentation + * for details on usage and license. + */ + +/* + * bstrlib.c + * + * This file is the core module for implementing the bstring functions. + */ + +#if defined (_MSC_VER) +/* These warnings from MSVC++ are totally pointless. */ +# define _CRT_SECURE_NO_WARNINGS +#endif + +#include +#include +#include +#include +#include +#include +#include "bstrlib.h" + +/* Optionally include a mechanism for debugging memory */ + +#if defined(MEMORY_DEBUG) || defined(BSTRLIB_MEMORY_DEBUG) +#include "memdbg.h" +#endif + +#ifndef bstr__alloc +#define bstr__alloc(x) malloc (x) +#endif + +#ifndef bstr__free +#define bstr__free(p) free (p) +#endif + +#ifndef bstr__realloc +#define bstr__realloc(p,x) realloc ((p), (x)) +#endif + +#ifndef bstr__memcpy +#define bstr__memcpy(d,s,l) memcpy ((d), (s), (l)) +#endif + +#ifndef bstr__memmove +#define bstr__memmove(d,s,l) memmove ((d), (s), (l)) +#endif + +#ifndef bstr__memset +#define bstr__memset(d,c,l) memset ((d), (c), (l)) +#endif + +#ifndef bstr__memcmp +#define bstr__memcmp(d,c,l) memcmp ((d), (c), (l)) +#endif + +#ifndef bstr__memchr +#define bstr__memchr(s,c,l) memchr ((s), (c), (l)) +#endif + +/* Just a length safe wrapper for memmove. */ + +#define bBlockCopy(D,S,L) { if ((L) > 0) bstr__memmove ((D),(S),(L)); } + +/* Compute the snapped size for a given requested size. By snapping to powers + of 2 like this, repeated reallocations are avoided. */ +static int snapUpSize (int i) { + if (i < 8) { + i = 8; + } else { + unsigned int j; + j = (unsigned int) i; + + j |= (j >> 1); + j |= (j >> 2); + j |= (j >> 4); + j |= (j >> 8); /* Ok, since int >= 16 bits */ +#if (UINT_MAX != 0xffff) + j |= (j >> 16); /* For 32 bit int systems */ +#if (UINT_MAX > 0xffffffffUL) + j |= (j >> 32); /* For 64 bit int systems */ +#endif +#endif + /* Least power of two greater than i */ + j++; + if ((int) j >= i) i = (int) j; + } + return i; +} + +/* int balloc (bstring b, int len) + * + * Increase the size of the memory backing the bstring b to at least len. + */ +int balloc (bstring b, int olen) { + int len; + if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen <= 0 || + b->mlen < b->slen || olen <= 0) { + return BSTR_ERR; + } + + if (olen >= b->mlen) { + unsigned char * x; + + if ((len = snapUpSize (olen)) <= b->mlen) return BSTR_OK; + + /* Assume probability of a non-moving realloc is 0.125 */ + if (7 * b->mlen < 8 * b->slen) { + + /* If slen is close to mlen in size then use realloc to reduce + the memory defragmentation */ + + reallocStrategy:; + + x = (unsigned char *) bstr__realloc (b->data, (size_t) len); + if (x == NULL) { + + /* Since we failed, try allocating the tighest possible + allocation */ + + if (NULL == (x = (unsigned char *) bstr__realloc (b->data, (size_t) (len = olen)))) { + return BSTR_ERR; + } + } + } else { + + /* If slen is not close to mlen then avoid the penalty of copying + the extra bytes that are allocated, but not considered part of + the string */ + + if (NULL == (x = (unsigned char *) bstr__alloc ((size_t) len))) { + + /* Perhaps there is no available memory for the two + allocations to be in memory at once */ + + goto reallocStrategy; + + } else { + if (b->slen) bstr__memcpy ((char *) x, (char *) b->data, (size_t) b->slen); + bstr__free (b->data); + } + } + b->data = x; + b->mlen = len; + b->data[b->slen] = (unsigned char) '\0'; + } + + return BSTR_OK; +} + +/* int ballocmin (bstring b, int len) + * + * Set the size of the memory backing the bstring b to len or b->slen+1, + * whichever is larger. Note that repeated use of this function can degrade + * performance. + */ +int ballocmin (bstring b, int len) { + unsigned char * s; + + if (b == NULL || b->data == NULL || (b->slen+1) < 0 || b->mlen <= 0 || + b->mlen < b->slen || len <= 0) { + return BSTR_ERR; + } + + if (len < b->slen + 1) len = b->slen + 1; + + if (len != b->mlen) { + s = (unsigned char *) bstr__realloc (b->data, (size_t) len); + if (NULL == s) return BSTR_ERR; + s[b->slen] = (unsigned char) '\0'; + b->data = s; + b->mlen = len; + } + + return BSTR_OK; +} + +/* bstring bfromcstr (const char * str) + * + * Create a bstring which contains the contents of the '\0' terminated char * + * buffer str. + */ +bstring bfromcstr (const char * str) { +bstring b; +int i; +size_t j; + + if (str == NULL) return NULL; + j = (strlen) (str); + i = snapUpSize ((int) (j + (2 - (j != 0)))); + if (i <= (int) j) return NULL; + + b = (bstring) bstr__alloc (sizeof (struct tagbstring)); + if (NULL == b) return NULL; + b->slen = (int) j; + if (NULL == (b->data = (unsigned char *) bstr__alloc (b->mlen = i))) { + bstr__free (b); + return NULL; + } + + bstr__memcpy (b->data, str, j+1); + return b; +} + +/* bstring bfromcstralloc (int mlen, const char * str) + * + * Create a bstring which contains the contents of the '\0' terminated char * + * buffer str. The memory buffer backing the string is at least len + * characters in length. + */ +bstring bfromcstralloc (int mlen, const char * str) { +bstring b; +int i; +size_t j; + + if (str == NULL) return NULL; + j = (strlen) (str); + i = snapUpSize ((int) (j + (2 - (j != 0)))); + if (i <= (int) j) return NULL; + + b = (bstring) bstr__alloc (sizeof (struct tagbstring)); + if (b == NULL) return NULL; + b->slen = (int) j; + if (i < mlen) i = mlen; + + if (NULL == (b->data = (unsigned char *) bstr__alloc (b->mlen = i))) { + bstr__free (b); + return NULL; + } + + bstr__memcpy (b->data, str, j+1); + return b; +} + +/* bstring blk2bstr (const void * blk, int len) + * + * Create a bstring which contains the content of the block blk of length + * len. + */ +bstring blk2bstr (const void * blk, int len) { +bstring b; +int i; + + if (blk == NULL || len < 0) return NULL; + b = (bstring) bstr__alloc (sizeof (struct tagbstring)); + if (b == NULL) return NULL; + b->slen = len; + + i = len + (2 - (len != 0)); + i = snapUpSize (i); + + b->mlen = i; + + b->data = (unsigned char *) bstr__alloc ((size_t) b->mlen); + if (b->data == NULL) { + bstr__free (b); + return NULL; + } + + if (len > 0) bstr__memcpy (b->data, blk, (size_t) len); + b->data[len] = (unsigned char) '\0'; + + return b; +} + +/* char * bstr2cstr (const_bstring s, char z) + * + * Create a '\0' terminated char * buffer which is equal to the contents of + * the bstring s, except that any contained '\0' characters are converted + * to the character in z. This returned value should be freed with a + * bcstrfree () call, by the calling application. + */ +char * bstr2cstr (const_bstring b, char z) { +int i, l; +char * r; + + if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; + l = b->slen; + r = (char *) bstr__alloc ((size_t) (l + 1)); + if (r == NULL) return r; + + for (i=0; i < l; i ++) { + r[i] = (char) ((b->data[i] == '\0') ? z : (char) (b->data[i])); + } + + r[l] = (unsigned char) '\0'; + + return r; +} + +/* int bcstrfree (char * s) + * + * Frees a C-string generated by bstr2cstr (). This is normally unnecessary + * since it just wraps a call to bstr__free (), however, if bstr__alloc () + * and bstr__free () have been redefined as a macros within the bstrlib + * module (via defining them in memdbg.h after defining + * BSTRLIB_MEMORY_DEBUG) with some difference in behaviour from the std + * library functions, then this allows a correct way of freeing the memory + * that allows higher level code to be independent from these macro + * redefinitions. + */ +int bcstrfree (char * s) { + if (s) { + bstr__free (s); + return BSTR_OK; + } + return BSTR_ERR; +} + +/* int bconcat (bstring b0, const_bstring b1) + * + * Concatenate the bstring b1 to the bstring b0. + */ +int bconcat (bstring b0, const_bstring b1) { +int len, d; +bstring aux = (bstring) b1; + + if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL) return BSTR_ERR; + + d = b0->slen; + len = b1->slen; + if ((d | (b0->mlen - d) | len | (d + len)) < 0) return BSTR_ERR; + + if (b0->mlen <= d + len + 1) { + ptrdiff_t pd = b1->data - b0->data; + if (0 <= pd && pd < b0->mlen) { + if (NULL == (aux = bstrcpy (b1))) return BSTR_ERR; + } + if (balloc (b0, d + len + 1) != BSTR_OK) { + if (aux != b1) bdestroy (aux); + return BSTR_ERR; + } + } + + bBlockCopy (&b0->data[d], &aux->data[0], (size_t) len); + b0->data[d + len] = (unsigned char) '\0'; + b0->slen = d + len; + if (aux != b1) bdestroy (aux); + return BSTR_OK; +} + +/* int bconchar (bstring b, char c) +/ * + * Concatenate the single character c to the bstring b. + */ +int bconchar (bstring b, char c) { +int d; + + if (b == NULL) return BSTR_ERR; + d = b->slen; + if ((d | (b->mlen - d)) < 0 || balloc (b, d + 2) != BSTR_OK) return BSTR_ERR; + b->data[d] = (unsigned char) c; + b->data[d + 1] = (unsigned char) '\0'; + b->slen++; + return BSTR_OK; +} + +/* int bcatcstr (bstring b, const char * s) + * + * Concatenate a char * string to a bstring. + */ +int bcatcstr (bstring b, const char * s) { +char * d; +int i, l; + + if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen < b->slen + || b->mlen <= 0 || s == NULL) return BSTR_ERR; + + /* Optimistically concatenate directly */ + l = b->mlen - b->slen; + d = (char *) &b->data[b->slen]; + for (i=0; i < l; i++) { + if ((*d++ = *s++) == '\0') { + b->slen += i; + return BSTR_OK; + } + } + b->slen += i; + + /* Need to explicitely resize and concatenate tail */ + return bcatblk (b, (const void *) s, (int) strlen (s)); +} + +/* int bcatblk (bstring b, const void * s, int len) + * + * Concatenate a fixed length buffer to a bstring. + */ +int bcatblk (bstring b, const void * s, int len) { +int nl; + + if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen < b->slen + || b->mlen <= 0 || s == NULL || len < 0) return BSTR_ERR; + + if (0 > (nl = b->slen + len)) return BSTR_ERR; /* Overflow? */ + if (b->mlen <= nl && 0 > balloc (b, nl + 1)) return BSTR_ERR; + + bBlockCopy (&b->data[b->slen], s, (size_t) len); + b->slen = nl; + b->data[nl] = (unsigned char) '\0'; + return BSTR_OK; +} + +/* bstring bstrcpy (const_bstring b) + * + * Create a copy of the bstring b. + */ +bstring bstrcpy (const_bstring b) { +bstring b0; +int i,j; + + /* Attempted to copy an invalid string? */ + if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; + + b0 = (bstring) bstr__alloc (sizeof (struct tagbstring)); + if (b0 == NULL) { + /* Unable to allocate memory for string header */ + return NULL; + } + + i = b->slen; + j = snapUpSize (i + 1); + + b0->data = (unsigned char *) bstr__alloc (j); + if (b0->data == NULL) { + j = i + 1; + b0->data = (unsigned char *) bstr__alloc (j); + if (b0->data == NULL) { + /* Unable to allocate memory for string data */ + bstr__free (b0); + return NULL; + } + } + + b0->mlen = j; + b0->slen = i; + + if (i) bstr__memcpy ((char *) b0->data, (char *) b->data, i); + b0->data[b0->slen] = (unsigned char) '\0'; + + return b0; +} + +/* int bassign (bstring a, const_bstring b) + * + * Overwrite the string a with the contents of string b. + */ +int bassign (bstring a, const_bstring b) { + if (b == NULL || b->data == NULL || b->slen < 0) + return BSTR_ERR; + if (b->slen != 0) { + if (balloc (a, b->slen) != BSTR_OK) return BSTR_ERR; + bstr__memmove (a->data, b->data, b->slen); + } else { + if (a == NULL || a->data == NULL || a->mlen < a->slen || + a->slen < 0 || a->mlen == 0) + return BSTR_ERR; + } + a->data[b->slen] = (unsigned char) '\0'; + a->slen = b->slen; + return BSTR_OK; +} + +/* int bassignmidstr (bstring a, const_bstring b, int left, int len) + * + * Overwrite the string a with the middle of contents of string b + * starting from position left and running for a length len. left and + * len are clamped to the ends of b as with the function bmidstr. + */ +int bassignmidstr (bstring a, const_bstring b, int left, int len) { + if (b == NULL || b->data == NULL || b->slen < 0) + return BSTR_ERR; + + if (left < 0) { + len += left; + left = 0; + } + + if (len > b->slen - left) len = b->slen - left; + + if (a == NULL || a->data == NULL || a->mlen < a->slen || + a->slen < 0 || a->mlen == 0) + return BSTR_ERR; + + if (len > 0) { + if (balloc (a, len) != BSTR_OK) return BSTR_ERR; + bstr__memmove (a->data, b->data + left, len); + a->slen = len; + } else { + a->slen = 0; + } + a->data[a->slen] = (unsigned char) '\0'; + return BSTR_OK; +} + +/* int bassigncstr (bstring a, const char * str) + * + * Overwrite the string a with the contents of char * string str. Note that + * the bstring a must be a well defined and writable bstring. If an error + * occurs BSTR_ERR is returned however a may be partially overwritten. + */ +int bassigncstr (bstring a, const char * str) { +int i; +size_t len; + if (a == NULL || a->data == NULL || a->mlen < a->slen || + a->slen < 0 || a->mlen == 0 || NULL == str) + return BSTR_ERR; + + for (i=0; i < a->mlen; i++) { + if ('\0' == (a->data[i] = str[i])) { + a->slen = i; + return BSTR_OK; + } + } + + a->slen = i; + len = strlen (str + i); + if (len > INT_MAX || i + len + 1 > INT_MAX || + 0 > balloc (a, (int) (i + len + 1))) return BSTR_ERR; + bBlockCopy (a->data + i, str + i, (size_t) len + 1); + a->slen += (int) len; + return BSTR_OK; +} + +/* int bassignblk (bstring a, const void * s, int len) + * + * Overwrite the string a with the contents of the block (s, len). Note that + * the bstring a must be a well defined and writable bstring. If an error + * occurs BSTR_ERR is returned and a is not overwritten. + */ +int bassignblk (bstring a, const void * s, int len) { + if (a == NULL || a->data == NULL || a->mlen < a->slen || + a->slen < 0 || a->mlen == 0 || NULL == s || len + 1 < 1) + return BSTR_ERR; + if (len + 1 > a->mlen && 0 > balloc (a, len + 1)) return BSTR_ERR; + bBlockCopy (a->data, s, (size_t) len); + a->data[len] = (unsigned char) '\0'; + a->slen = len; + return BSTR_OK; +} + +/* int btrunc (bstring b, int n) + * + * Truncate the bstring to at most n characters. + */ +int btrunc (bstring b, int n) { + if (n < 0 || b == NULL || b->data == NULL || b->mlen < b->slen || + b->slen < 0 || b->mlen <= 0) return BSTR_ERR; + if (b->slen > n) { + b->slen = n; + b->data[n] = (unsigned char) '\0'; + } + return BSTR_OK; +} + +#define upcase(c) (toupper ((unsigned char) c)) +#define downcase(c) (tolower ((unsigned char) c)) +#define wspace(c) (isspace ((unsigned char) c)) + +/* int btoupper (bstring b) + * + * Convert contents of bstring to upper case. + */ +int btoupper (bstring b) { +int i, len; + if (b == NULL || b->data == NULL || b->mlen < b->slen || + b->slen < 0 || b->mlen <= 0) return BSTR_ERR; + for (i=0, len = b->slen; i < len; i++) { + b->data[i] = (unsigned char) upcase (b->data[i]); + } + return BSTR_OK; +} + +/* int btolower (bstring b) + * + * Convert contents of bstring to lower case. + */ +int btolower (bstring b) { +int i, len; + if (b == NULL || b->data == NULL || b->mlen < b->slen || + b->slen < 0 || b->mlen <= 0) return BSTR_ERR; + for (i=0, len = b->slen; i < len; i++) { + b->data[i] = (unsigned char) downcase (b->data[i]); + } + return BSTR_OK; +} + +/* int bstricmp (const_bstring b0, const_bstring b1) + * + * Compare two strings without differentiating between case. The return + * value is the difference of the values of the characters where the two + * strings first differ after lower case transformation, otherwise 0 is + * returned indicating that the strings are equal. If the lengths are + * different, then a difference from 0 is given, but if the first extra + * character is '\0', then it is taken to be the value UCHAR_MAX+1. + */ +int bstricmp (const_bstring b0, const_bstring b1) { +int i, v, n; + + if (bdata (b0) == NULL || b0->slen < 0 || + bdata (b1) == NULL || b1->slen < 0) return SHRT_MIN; + if ((n = b0->slen) > b1->slen) n = b1->slen; + else if (b0->slen == b1->slen && b0->data == b1->data) return BSTR_OK; + + for (i = 0; i < n; i ++) { + v = (char) downcase (b0->data[i]) + - (char) downcase (b1->data[i]); + if (0 != v) return v; + } + + if (b0->slen > n) { + v = (char) downcase (b0->data[n]); + if (v) return v; + return UCHAR_MAX + 1; + } + if (b1->slen > n) { + v = - (char) downcase (b1->data[n]); + if (v) return v; + return - (int) (UCHAR_MAX + 1); + } + return BSTR_OK; +} + +/* int bstrnicmp (const_bstring b0, const_bstring b1, int n) + * + * Compare two strings without differentiating between case for at most n + * characters. If the position where the two strings first differ is + * before the nth position, the return value is the difference of the values + * of the characters, otherwise 0 is returned. If the lengths are different + * and less than n characters, then a difference from 0 is given, but if the + * first extra character is '\0', then it is taken to be the value + * UCHAR_MAX+1. + */ +int bstrnicmp (const_bstring b0, const_bstring b1, int n) { +int i, v, m; + + if (bdata (b0) == NULL || b0->slen < 0 || + bdata (b1) == NULL || b1->slen < 0 || n < 0) return SHRT_MIN; + m = n; + if (m > b0->slen) m = b0->slen; + if (m > b1->slen) m = b1->slen; + + if (b0->data != b1->data) { + for (i = 0; i < m; i ++) { + v = (char) downcase (b0->data[i]); + v -= (char) downcase (b1->data[i]); + if (v != 0) return b0->data[i] - b1->data[i]; + } + } + + if (n == m || b0->slen == b1->slen) return BSTR_OK; + + if (b0->slen > m) { + v = (char) downcase (b0->data[m]); + if (v) return v; + return UCHAR_MAX + 1; + } + + v = - (char) downcase (b1->data[m]); + if (v) return v; + return - (int) (UCHAR_MAX + 1); +} + +/* int biseqcaseless (const_bstring b0, const_bstring b1) + * + * Compare two strings for equality without differentiating between case. + * If the strings differ other than in case, 0 is returned, if the strings + * are the same, 1 is returned, if there is an error, -1 is returned. If + * the length of the strings are different, this function is O(1). '\0' + * termination characters are not treated in any special way. + */ +int biseqcaseless (const_bstring b0, const_bstring b1) { +int i, n; + + if (bdata (b0) == NULL || b0->slen < 0 || + bdata (b1) == NULL || b1->slen < 0) return BSTR_ERR; + if (b0->slen != b1->slen) return BSTR_OK; + if (b0->data == b1->data || b0->slen == 0) return 1; + for (i=0, n=b0->slen; i < n; i++) { + if (b0->data[i] != b1->data[i]) { + unsigned char c = (unsigned char) downcase (b0->data[i]); + if (c != (unsigned char) downcase (b1->data[i])) return 0; + } + } + return 1; +} + +/* int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len) + * + * Compare beginning of string b0 with a block of memory of length len + * without differentiating between case for equality. If the beginning of b0 + * differs from the memory block other than in case (or if b0 is too short), + * 0 is returned, if the strings are the same, 1 is returned, if there is an + * error, -1 is returned. '\0' characters are not treated in any special + * way. + */ +int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len) { +int i; + + if (bdata (b0) == NULL || b0->slen < 0 || NULL == blk || len < 0) + return BSTR_ERR; + if (b0->slen < len) return BSTR_OK; + if (b0->data == (const unsigned char *) blk || len == 0) return 1; + + for (i = 0; i < len; i ++) { + if (b0->data[i] != ((const unsigned char *) blk)[i]) { + if (downcase (b0->data[i]) != + downcase (((const unsigned char *) blk)[i])) return 0; + } + } + return 1; +} + +/* + * int bltrimws (bstring b) + * + * Delete whitespace contiguous from the left end of the string. + */ +int bltrimws (bstring b) { +int i, len; + + if (b == NULL || b->data == NULL || b->mlen < b->slen || + b->slen < 0 || b->mlen <= 0) return BSTR_ERR; + + for (len = b->slen, i = 0; i < len; i++) { + if (!wspace (b->data[i])) { + return bdelete (b, 0, i); + } + } + + b->data[0] = (unsigned char) '\0'; + b->slen = 0; + return BSTR_OK; +} + +/* + * int brtrimws (bstring b) + * + * Delete whitespace contiguous from the right end of the string. + */ +int brtrimws (bstring b) { +int i; + + if (b == NULL || b->data == NULL || b->mlen < b->slen || + b->slen < 0 || b->mlen <= 0) return BSTR_ERR; + + for (i = b->slen - 1; i >= 0; i--) { + if (!wspace (b->data[i])) { + if (b->mlen > i) b->data[i+1] = (unsigned char) '\0'; + b->slen = i + 1; + return BSTR_OK; + } + } + + b->data[0] = (unsigned char) '\0'; + b->slen = 0; + return BSTR_OK; +} + +/* + * int btrimws (bstring b) + * + * Delete whitespace contiguous from both ends of the string. + */ +int btrimws (bstring b) { +int i, j; + + if (b == NULL || b->data == NULL || b->mlen < b->slen || + b->slen < 0 || b->mlen <= 0) return BSTR_ERR; + + for (i = b->slen - 1; i >= 0; i--) { + if (!wspace (b->data[i])) { + if (b->mlen > i) b->data[i+1] = (unsigned char) '\0'; + b->slen = i + 1; + for (j = 0; wspace (b->data[j]); j++) {} + return bdelete (b, 0, j); + } + } + + b->data[0] = (unsigned char) '\0'; + b->slen = 0; + return BSTR_OK; +} + +/* int biseq (const_bstring b0, const_bstring b1) + * + * Compare the string b0 and b1. If the strings differ, 0 is returned, if + * the strings are the same, 1 is returned, if there is an error, -1 is + * returned. If the length of the strings are different, this function is + * O(1). '\0' termination characters are not treated in any special way. + */ +int biseq (const_bstring b0, const_bstring b1) { + if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL || + b0->slen < 0 || b1->slen < 0) return BSTR_ERR; + if (b0->slen != b1->slen) return BSTR_OK; + if (b0->data == b1->data || b0->slen == 0) return 1; + return !bstr__memcmp (b0->data, b1->data, b0->slen); +} + +/* int bisstemeqblk (const_bstring b0, const void * blk, int len) + * + * Compare beginning of string b0 with a block of memory of length len for + * equality. If the beginning of b0 differs from the memory block (or if b0 + * is too short), 0 is returned, if the strings are the same, 1 is returned, + * if there is an error, -1 is returned. '\0' characters are not treated in + * any special way. + */ +int bisstemeqblk (const_bstring b0, const void * blk, int len) { +int i; + + if (bdata (b0) == NULL || b0->slen < 0 || NULL == blk || len < 0) + return BSTR_ERR; + if (b0->slen < len) return BSTR_OK; + if (b0->data == (const unsigned char *) blk || len == 0) return 1; + + for (i = 0; i < len; i ++) { + if (b0->data[i] != ((const unsigned char *) blk)[i]) return BSTR_OK; + } + return 1; +} + +/* int biseqcstr (const_bstring b, const char *s) + * + * Compare the bstring b and char * string s. The C string s must be '\0' + * terminated at exactly the length of the bstring b, and the contents + * between the two must be identical with the bstring b with no '\0' + * characters for the two contents to be considered equal. This is + * equivalent to the condition that their current contents will be always be + * equal when comparing them in the same format after converting one or the + * other. If the strings are equal 1 is returned, if they are unequal 0 is + * returned and if there is a detectable error BSTR_ERR is returned. + */ +int biseqcstr (const_bstring b, const char * s) { +int i; + if (b == NULL || s == NULL || b->data == NULL || b->slen < 0) return BSTR_ERR; + for (i=0; i < b->slen; i++) { + if (s[i] == '\0' || b->data[i] != (unsigned char) s[i]) return BSTR_OK; + } + return s[i] == '\0'; +} + +/* int biseqcstrcaseless (const_bstring b, const char *s) + * + * Compare the bstring b and char * string s. The C string s must be '\0' + * terminated at exactly the length of the bstring b, and the contents + * between the two must be identical except for case with the bstring b with + * no '\0' characters for the two contents to be considered equal. This is + * equivalent to the condition that their current contents will be always be + * equal ignoring case when comparing them in the same format after + * converting one or the other. If the strings are equal, except for case, + * 1 is returned, if they are unequal regardless of case 0 is returned and + * if there is a detectable error BSTR_ERR is returned. + */ +int biseqcstrcaseless (const_bstring b, const char * s) { +int i; + if (b == NULL || s == NULL || b->data == NULL || b->slen < 0) return BSTR_ERR; + for (i=0; i < b->slen; i++) { + if (s[i] == '\0' || + (b->data[i] != (unsigned char) s[i] && + downcase (b->data[i]) != (unsigned char) downcase (s[i]))) + return BSTR_OK; + } + return s[i] == '\0'; +} + +/* int bstrcmp (const_bstring b0, const_bstring b1) + * + * Compare the string b0 and b1. If there is an error, SHRT_MIN is returned, + * otherwise a value less than or greater than zero, indicating that the + * string pointed to by b0 is lexicographically less than or greater than + * the string pointed to by b1 is returned. If the the string lengths are + * unequal but the characters up until the length of the shorter are equal + * then a value less than, or greater than zero, indicating that the string + * pointed to by b0 is shorter or longer than the string pointed to by b1 is + * returned. 0 is returned if and only if the two strings are the same. If + * the length of the strings are different, this function is O(n). Like its + * standard C library counter part strcmp, the comparison does not proceed + * past any '\0' termination characters encountered. + */ +int bstrcmp (const_bstring b0, const_bstring b1) { +int i, v, n; + + if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL || + b0->slen < 0 || b1->slen < 0) return SHRT_MIN; + n = b0->slen; if (n > b1->slen) n = b1->slen; + if (b0->slen == b1->slen && (b0->data == b1->data || b0->slen == 0)) + return BSTR_OK; + + for (i = 0; i < n; i ++) { + v = ((char) b0->data[i]) - ((char) b1->data[i]); + if (v != 0) return v; + if (b0->data[i] == (unsigned char) '\0') return BSTR_OK; + } + + if (b0->slen > n) return 1; + if (b1->slen > n) return -1; + return BSTR_OK; +} + +/* int bstrncmp (const_bstring b0, const_bstring b1, int n) + * + * Compare the string b0 and b1 for at most n characters. If there is an + * error, SHRT_MIN is returned, otherwise a value is returned as if b0 and + * b1 were first truncated to at most n characters then bstrcmp was called + * with these new strings are paremeters. If the length of the strings are + * different, this function is O(n). Like its standard C library counter + * part strcmp, the comparison does not proceed past any '\0' termination + * characters encountered. + */ +int bstrncmp (const_bstring b0, const_bstring b1, int n) { +int i, v, m; + + if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL || + b0->slen < 0 || b1->slen < 0) return SHRT_MIN; + m = n; + if (m > b0->slen) m = b0->slen; + if (m > b1->slen) m = b1->slen; + + if (b0->data != b1->data) { + for (i = 0; i < m; i ++) { + v = ((char) b0->data[i]) - ((char) b1->data[i]); + if (v != 0) return v; + if (b0->data[i] == (unsigned char) '\0') return BSTR_OK; + } + } + + if (n == m || b0->slen == b1->slen) return BSTR_OK; + + if (b0->slen > m) return 1; + return -1; +} + +/* bstring bmidstr (const_bstring b, int left, int len) + * + * Create a bstring which is the substring of b starting from position left + * and running for a length len (clamped by the end of the bstring b.) If + * b is detectably invalid, then NULL is returned. The section described + * by (left, len) is clamped to the boundaries of b. + */ +bstring bmidstr (const_bstring b, int left, int len) { + + if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; + + if (left < 0) { + len += left; + left = 0; + } + + if (len > b->slen - left) len = b->slen - left; + + if (len <= 0) return bfromcstr (""); + return blk2bstr (b->data + left, len); +} + +/* int bdelete (bstring b, int pos, int len) + * + * Removes characters from pos to pos+len-1 inclusive and shifts the tail of + * the bstring starting from pos+len to pos. len must be positive for this + * call to have any effect. The section of the string described by (pos, + * len) is clamped to boundaries of the bstring b. + */ +int bdelete (bstring b, int pos, int len) { + /* Clamp to left side of bstring */ + if (pos < 0) { + len += pos; + pos = 0; + } + + if (len < 0 || b == NULL || b->data == NULL || b->slen < 0 || + b->mlen < b->slen || b->mlen <= 0) + return BSTR_ERR; + if (len > 0 && pos < b->slen) { + if (pos + len >= b->slen) { + b->slen = pos; + } else { + bBlockCopy ((char *) (b->data + pos), + (char *) (b->data + pos + len), + b->slen - (pos+len)); + b->slen -= len; + } + b->data[b->slen] = (unsigned char) '\0'; + } + return BSTR_OK; +} + +/* int bdestroy (bstring b) + * + * Free up the bstring. Note that if b is detectably invalid or not writable + * then no action is performed and BSTR_ERR is returned. Like a freed memory + * allocation, dereferences, writes or any other action on b after it has + * been bdestroyed is undefined. + */ +int bdestroy (bstring b) { + if (b == NULL || b->slen < 0 || b->mlen <= 0 || b->mlen < b->slen || + b->data == NULL) + return BSTR_ERR; + + bstr__free (b->data); + + /* In case there is any stale usage, there is one more chance to + notice this error. */ + + b->slen = -1; + b->mlen = -__LINE__; + b->data = NULL; + + bstr__free (b); + return BSTR_OK; +} + +/* int binstr (const_bstring b1, int pos, const_bstring b2) + * + * Search for the bstring b2 in b1 starting from position pos, and searching + * forward. If it is found then return with the first position where it is + * found, otherwise return BSTR_ERR. Note that this is just a brute force + * string searcher that does not attempt clever things like the Boyer-Moore + * search algorithm. Because of this there are many degenerate cases where + * this can take much longer than it needs to. + */ +int binstr (const_bstring b1, int pos, const_bstring b2) { +int j, ii, ll, lf; +unsigned char * d0; +unsigned char c0; +register unsigned char * d1; +register unsigned char c1; +register int i; + + if (b1 == NULL || b1->data == NULL || b1->slen < 0 || + b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; + if (b1->slen == pos) return (b2->slen == 0)?pos:BSTR_ERR; + if (b1->slen < pos || pos < 0) return BSTR_ERR; + if (b2->slen == 0) return pos; + + /* No space to find such a string? */ + if ((lf = b1->slen - b2->slen + 1) <= pos) return BSTR_ERR; + + /* An obvious alias case */ + if (b1->data == b2->data && pos == 0) return 0; + + i = pos; + + d0 = b2->data; + d1 = b1->data; + ll = b2->slen; + + /* Peel off the b2->slen == 1 case */ + c0 = d0[0]; + if (1 == ll) { + for (;i < lf; i++) if (c0 == d1[i]) return i; + return BSTR_ERR; + } + + c1 = c0; + j = 0; + lf = b1->slen - 1; + + ii = -1; + if (i < lf) do { + /* Unrolled current character test */ + if (c1 != d1[i]) { + if (c1 != d1[1+i]) { + i += 2; + continue; + } + i++; + } + + /* Take note if this is the start of a potential match */ + if (0 == j) ii = i; + + /* Shift the test character down by one */ + j++; + i++; + + /* If this isn't past the last character continue */ + if (j < ll) { + c1 = d0[j]; + continue; + } + + N0:; + + /* If no characters mismatched, then we matched */ + if (i == ii+j) return ii; + + /* Shift back to the beginning */ + i -= j; + j = 0; + c1 = c0; + } while (i < lf); + + /* Deal with last case if unrolling caused a misalignment */ + if (i == lf && ll == j+1 && c1 == d1[i]) goto N0; + + return BSTR_ERR; +} + +/* int binstrr (const_bstring b1, int pos, const_bstring b2) + * + * Search for the bstring b2 in b1 starting from position pos, and searching + * backward. If it is found then return with the first position where it is + * found, otherwise return BSTR_ERR. Note that this is just a brute force + * string searcher that does not attempt clever things like the Boyer-Moore + * search algorithm. Because of this there are many degenerate cases where + * this can take much longer than it needs to. + */ +int binstrr (const_bstring b1, int pos, const_bstring b2) { +int j, i, l; +unsigned char * d0, * d1; + + if (b1 == NULL || b1->data == NULL || b1->slen < 0 || + b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; + if (b1->slen == pos && b2->slen == 0) return pos; + if (b1->slen < pos || pos < 0) return BSTR_ERR; + if (b2->slen == 0) return pos; + + /* Obvious alias case */ + if (b1->data == b2->data && pos == 0 && b2->slen <= b1->slen) return 0; + + i = pos; + if ((l = b1->slen - b2->slen) < 0) return BSTR_ERR; + + /* If no space to find such a string then snap back */ + if (l + 1 <= i) i = l; + j = 0; + + d0 = b2->data; + d1 = b1->data; + l = b2->slen; + + for (;;) { + if (d0[j] == d1[i + j]) { + j ++; + if (j >= l) return i; + } else { + i --; + if (i < 0) break; + j=0; + } + } + + return BSTR_ERR; +} + +/* int binstrcaseless (const_bstring b1, int pos, const_bstring b2) + * + * Search for the bstring b2 in b1 starting from position pos, and searching + * forward but without regard to case. If it is found then return with the + * first position where it is found, otherwise return BSTR_ERR. Note that + * this is just a brute force string searcher that does not attempt clever + * things like the Boyer-Moore search algorithm. Because of this there are + * many degenerate cases where this can take much longer than it needs to. + */ +int binstrcaseless (const_bstring b1, int pos, const_bstring b2) { +int j, i, l, ll; +unsigned char * d0, * d1; + + if (b1 == NULL || b1->data == NULL || b1->slen < 0 || + b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; + if (b1->slen == pos) return (b2->slen == 0)?pos:BSTR_ERR; + if (b1->slen < pos || pos < 0) return BSTR_ERR; + if (b2->slen == 0) return pos; + + l = b1->slen - b2->slen + 1; + + /* No space to find such a string? */ + if (l <= pos) return BSTR_ERR; + + /* An obvious alias case */ + if (b1->data == b2->data && pos == 0) return BSTR_OK; + + i = pos; + j = 0; + + d0 = b2->data; + d1 = b1->data; + ll = b2->slen; + + for (;;) { + if (d0[j] == d1[i + j] || downcase (d0[j]) == downcase (d1[i + j])) { + j ++; + if (j >= ll) return i; + } else { + i ++; + if (i >= l) break; + j=0; + } + } + + return BSTR_ERR; +} + +/* int binstrrcaseless (const_bstring b1, int pos, const_bstring b2) + * + * Search for the bstring b2 in b1 starting from position pos, and searching + * backward but without regard to case. If it is found then return with the + * first position where it is found, otherwise return BSTR_ERR. Note that + * this is just a brute force string searcher that does not attempt clever + * things like the Boyer-Moore search algorithm. Because of this there are + * many degenerate cases where this can take much longer than it needs to. + */ +int binstrrcaseless (const_bstring b1, int pos, const_bstring b2) { +int j, i, l; +unsigned char * d0, * d1; + + if (b1 == NULL || b1->data == NULL || b1->slen < 0 || + b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; + if (b1->slen == pos && b2->slen == 0) return pos; + if (b1->slen < pos || pos < 0) return BSTR_ERR; + if (b2->slen == 0) return pos; + + /* Obvious alias case */ + if (b1->data == b2->data && pos == 0 && b2->slen <= b1->slen) return BSTR_OK; + + i = pos; + if ((l = b1->slen - b2->slen) < 0) return BSTR_ERR; + + /* If no space to find such a string then snap back */ + if (l + 1 <= i) i = l; + j = 0; + + d0 = b2->data; + d1 = b1->data; + l = b2->slen; + + for (;;) { + if (d0[j] == d1[i + j] || downcase (d0[j]) == downcase (d1[i + j])) { + j ++; + if (j >= l) return i; + } else { + i --; + if (i < 0) break; + j=0; + } + } + + return BSTR_ERR; +} + + +/* int bstrchrp (const_bstring b, int c, int pos) + * + * Search for the character c in b forwards from the position pos + * (inclusive). + */ +int bstrchrp (const_bstring b, int c, int pos) { +unsigned char * p; + + if (b == NULL || b->data == NULL || b->slen <= pos || pos < 0) return BSTR_ERR; + p = (unsigned char *) bstr__memchr ((b->data + pos), (unsigned char) c, (b->slen - pos)); + if (p) return (int) (p - b->data); + return BSTR_ERR; +} + +/* int bstrrchrp (const_bstring b, int c, int pos) + * + * Search for the character c in b backwards from the position pos in string + * (inclusive). + */ +int bstrrchrp (const_bstring b, int c, int pos) { +int i; + + if (b == NULL || b->data == NULL || b->slen <= pos || pos < 0) return BSTR_ERR; + for (i=pos; i >= 0; i--) { + if (b->data[i] == (unsigned char) c) return i; + } + return BSTR_ERR; +} + +#if !defined (BSTRLIB_AGGRESSIVE_MEMORY_FOR_SPEED_TRADEOFF) +#define LONG_LOG_BITS_QTY (3) +#define LONG_BITS_QTY (1 << LONG_LOG_BITS_QTY) +#define LONG_TYPE unsigned char + +#define CFCLEN ((1 << CHAR_BIT) / LONG_BITS_QTY) +struct charField { LONG_TYPE content[CFCLEN]; }; +#define testInCharField(cf,c) ((cf)->content[(c) >> LONG_LOG_BITS_QTY] & (((long)1) << ((c) & (LONG_BITS_QTY-1)))) +#define setInCharField(cf,idx) { \ + unsigned int c = (unsigned int) (idx); \ + (cf)->content[c >> LONG_LOG_BITS_QTY] |= (LONG_TYPE) (1ul << (c & (LONG_BITS_QTY-1))); \ +} + +#else + +#define CFCLEN (1 << CHAR_BIT) +struct charField { unsigned char content[CFCLEN]; }; +#define testInCharField(cf,c) ((cf)->content[(unsigned char) (c)]) +#define setInCharField(cf,idx) (cf)->content[(unsigned int) (idx)] = ~0 + +#endif + +/* Convert a bstring to charField */ +static int buildCharField (struct charField * cf, const_bstring b) { +int i; + if (b == NULL || b->data == NULL || b->slen <= 0) return BSTR_ERR; + memset ((void *) cf->content, 0, sizeof (struct charField)); + for (i=0; i < b->slen; i++) { + setInCharField (cf, b->data[i]); + } + return BSTR_OK; +} + +static void invertCharField (struct charField * cf) { +int i; + for (i=0; i < CFCLEN; i++) cf->content[i] = ~cf->content[i]; +} + +/* Inner engine for binchr */ +static int binchrCF (const unsigned char * data, int len, int pos, const struct charField * cf) { +int i; + for (i=pos; i < len; i++) { + unsigned char c = (unsigned char) data[i]; + if (testInCharField (cf, c)) return i; + } + return BSTR_ERR; +} + +/* int binchr (const_bstring b0, int pos, const_bstring b1); + * + * Search for the first position in b0 starting from pos or after, in which + * one of the characters in b1 is found and return it. If such a position + * does not exist in b0, then BSTR_ERR is returned. + */ +int binchr (const_bstring b0, int pos, const_bstring b1) { +struct charField chrs; + if (pos < 0 || b0 == NULL || b0->data == NULL || + b0->slen <= pos) return BSTR_ERR; + if (1 == b1->slen) return bstrchrp (b0, b1->data[0], pos); + if (0 > buildCharField (&chrs, b1)) return BSTR_ERR; + return binchrCF (b0->data, b0->slen, pos, &chrs); +} + +/* Inner engine for binchrr */ +static int binchrrCF (const unsigned char * data, int pos, const struct charField * cf) { +int i; + for (i=pos; i >= 0; i--) { + unsigned int c = (unsigned int) data[i]; + if (testInCharField (cf, c)) return i; + } + return BSTR_ERR; +} + +/* int binchrr (const_bstring b0, int pos, const_bstring b1); + * + * Search for the last position in b0 no greater than pos, in which one of + * the characters in b1 is found and return it. If such a position does not + * exist in b0, then BSTR_ERR is returned. + */ +int binchrr (const_bstring b0, int pos, const_bstring b1) { +struct charField chrs; + if (pos < 0 || b0 == NULL || b0->data == NULL || b1 == NULL || + b0->slen < pos) return BSTR_ERR; + if (pos == b0->slen) pos--; + if (1 == b1->slen) return bstrrchrp (b0, b1->data[0], pos); + if (0 > buildCharField (&chrs, b1)) return BSTR_ERR; + return binchrrCF (b0->data, pos, &chrs); +} + +/* int bninchr (const_bstring b0, int pos, const_bstring b1); + * + * Search for the first position in b0 starting from pos or after, in which + * none of the characters in b1 is found and return it. If such a position + * does not exist in b0, then BSTR_ERR is returned. + */ +int bninchr (const_bstring b0, int pos, const_bstring b1) { +struct charField chrs; + if (pos < 0 || b0 == NULL || b0->data == NULL || + b0->slen <= pos) return BSTR_ERR; + if (buildCharField (&chrs, b1) < 0) return BSTR_ERR; + invertCharField (&chrs); + return binchrCF (b0->data, b0->slen, pos, &chrs); +} + +/* int bninchrr (const_bstring b0, int pos, const_bstring b1); + * + * Search for the last position in b0 no greater than pos, in which none of + * the characters in b1 is found and return it. If such a position does not + * exist in b0, then BSTR_ERR is returned. + */ +int bninchrr (const_bstring b0, int pos, const_bstring b1) { +struct charField chrs; + if (pos < 0 || b0 == NULL || b0->data == NULL || + b0->slen < pos) return BSTR_ERR; + if (pos == b0->slen) pos--; + if (buildCharField (&chrs, b1) < 0) return BSTR_ERR; + invertCharField (&chrs); + return binchrrCF (b0->data, pos, &chrs); +} + +/* int bsetstr (bstring b0, int pos, bstring b1, unsigned char fill) + * + * Overwrite the string b0 starting at position pos with the string b1. If + * the position pos is past the end of b0, then the character "fill" is + * appended as necessary to make up the gap between the end of b0 and pos. + * If b1 is NULL, it behaves as if it were a 0-length string. + */ +int bsetstr (bstring b0, int pos, const_bstring b1, unsigned char fill) { +int d, newlen; +ptrdiff_t pd; +bstring aux = (bstring) b1; + + if (pos < 0 || b0 == NULL || b0->slen < 0 || NULL == b0->data || + b0->mlen < b0->slen || b0->mlen <= 0) return BSTR_ERR; + if (b1 != NULL && (b1->slen < 0 || b1->data == NULL)) return BSTR_ERR; + + d = pos; + + /* Aliasing case */ + if (NULL != aux) { + if ((pd = (ptrdiff_t) (b1->data - b0->data)) >= 0 && pd < (ptrdiff_t) b0->mlen) { + if (NULL == (aux = bstrcpy (b1))) return BSTR_ERR; + } + d += aux->slen; + } + + /* Increase memory size if necessary */ + if (balloc (b0, d + 1) != BSTR_OK) { + if (aux != b1) bdestroy (aux); + return BSTR_ERR; + } + + newlen = b0->slen; + + /* Fill in "fill" character as necessary */ + if (pos > newlen) { + bstr__memset (b0->data + b0->slen, (int) fill, (size_t) (pos - b0->slen)); + newlen = pos; + } + + /* Copy b1 to position pos in b0. */ + if (aux != NULL) { + bBlockCopy ((char *) (b0->data + pos), (char *) aux->data, aux->slen); + if (aux != b1) bdestroy (aux); + } + + /* Indicate the potentially increased size of b0 */ + if (d > newlen) newlen = d; + + b0->slen = newlen; + b0->data[newlen] = (unsigned char) '\0'; + + return BSTR_OK; +} + +/* int binsert (bstring b1, int pos, bstring b2, unsigned char fill) + * + * Inserts the string b2 into b1 at position pos. If the position pos is + * past the end of b1, then the character "fill" is appended as necessary to + * make up the gap between the end of b1 and pos. Unlike bsetstr, binsert + * does not allow b2 to be NULL. + */ +int binsert (bstring b1, int pos, const_bstring b2, unsigned char fill) { +int d, l; +ptrdiff_t pd; +bstring aux = (bstring) b2; + + if (pos < 0 || b1 == NULL || b2 == NULL || b1->slen < 0 || + b2->slen < 0 || b1->mlen < b1->slen || b1->mlen <= 0) return BSTR_ERR; + + /* Aliasing case */ + if ((pd = (ptrdiff_t) (b2->data - b1->data)) >= 0 && pd < (ptrdiff_t) b1->mlen) { + if (NULL == (aux = bstrcpy (b2))) return BSTR_ERR; + } + + /* Compute the two possible end pointers */ + d = b1->slen + aux->slen; + l = pos + aux->slen; + if ((d|l) < 0) return BSTR_ERR; + + if (l > d) { + /* Inserting past the end of the string */ + if (balloc (b1, l + 1) != BSTR_OK) { + if (aux != b2) bdestroy (aux); + return BSTR_ERR; + } + bstr__memset (b1->data + b1->slen, (int) fill, (size_t) (pos - b1->slen)); + b1->slen = l; + } else { + /* Inserting in the middle of the string */ + if (balloc (b1, d + 1) != BSTR_OK) { + if (aux != b2) bdestroy (aux); + return BSTR_ERR; + } + bBlockCopy (b1->data + l, b1->data + pos, d - l); + b1->slen = d; + } + bBlockCopy (b1->data + pos, aux->data, aux->slen); + b1->data[b1->slen] = (unsigned char) '\0'; + if (aux != b2) bdestroy (aux); + return BSTR_OK; +} + +/* int breplace (bstring b1, int pos, int len, bstring b2, + * unsigned char fill) + * + * Replace a section of a string from pos for a length len with the string b2. + * fill is used is pos > b1->slen. + */ +int breplace (bstring b1, int pos, int len, const_bstring b2, + unsigned char fill) { +int pl, ret; +ptrdiff_t pd; +bstring aux = (bstring) b2; + + if (pos < 0 || len < 0 || (pl = pos + len) < 0 || b1 == NULL || + b2 == NULL || b1->data == NULL || b2->data == NULL || + b1->slen < 0 || b2->slen < 0 || b1->mlen < b1->slen || + b1->mlen <= 0) return BSTR_ERR; + + /* Straddles the end? */ + if (pl >= b1->slen) { + if ((ret = bsetstr (b1, pos, b2, fill)) < 0) return ret; + if (pos + b2->slen < b1->slen) { + b1->slen = pos + b2->slen; + b1->data[b1->slen] = (unsigned char) '\0'; + } + return ret; + } + + /* Aliasing case */ + if ((pd = (ptrdiff_t) (b2->data - b1->data)) >= 0 && pd < (ptrdiff_t) b1->slen) { + if (NULL == (aux = bstrcpy (b2))) return BSTR_ERR; + } + + if (aux->slen > len) { + if (balloc (b1, b1->slen + aux->slen - len) != BSTR_OK) { + if (aux != b2) bdestroy (aux); + return BSTR_ERR; + } + } + + if (aux->slen != len) bstr__memmove (b1->data + pos + aux->slen, b1->data + pos + len, b1->slen - (pos + len)); + bstr__memcpy (b1->data + pos, aux->data, aux->slen); + b1->slen += aux->slen - len; + b1->data[b1->slen] = (unsigned char) '\0'; + if (aux != b2) bdestroy (aux); + return BSTR_OK; +} + +/* + * findreplaceengine is used to implement bfindreplace and + * bfindreplacecaseless. It works by breaking the three cases of + * expansion, reduction and replacement, and solving each of these + * in the most efficient way possible. + */ + +typedef int (*instr_fnptr) (const_bstring s1, int pos, const_bstring s2); + +#define INITIAL_STATIC_FIND_INDEX_COUNT 32 + +static int findreplaceengine (bstring b, const_bstring find, const_bstring repl, int pos, instr_fnptr instr) { +int i, ret, slen, mlen, delta, acc; +int * d; +int static_d[INITIAL_STATIC_FIND_INDEX_COUNT+1]; /* This +1 is unnecessary, but it shuts up LINT. */ +ptrdiff_t pd; +bstring auxf = (bstring) find; +bstring auxr = (bstring) repl; + + if (b == NULL || b->data == NULL || find == NULL || + find->data == NULL || repl == NULL || repl->data == NULL || + pos < 0 || find->slen <= 0 || b->mlen < 0 || b->slen > b->mlen || + b->mlen <= 0 || b->slen < 0 || repl->slen < 0) return BSTR_ERR; + if (pos > b->slen - find->slen) return BSTR_OK; + + /* Alias with find string */ + pd = (ptrdiff_t) (find->data - b->data); + if ((ptrdiff_t) (pos - find->slen) < pd && pd < (ptrdiff_t) b->slen) { + if (NULL == (auxf = bstrcpy (find))) return BSTR_ERR; + } + + /* Alias with repl string */ + pd = (ptrdiff_t) (repl->data - b->data); + if ((ptrdiff_t) (pos - repl->slen) < pd && pd < (ptrdiff_t) b->slen) { + if (NULL == (auxr = bstrcpy (repl))) { + if (auxf != find) bdestroy (auxf); + return BSTR_ERR; + } + } + + delta = auxf->slen - auxr->slen; + + /* in-place replacement since find and replace strings are of equal + length */ + if (delta == 0) { + while ((pos = instr (b, pos, auxf)) >= 0) { + bstr__memcpy (b->data + pos, auxr->data, auxr->slen); + pos += auxf->slen; + } + if (auxf != find) bdestroy (auxf); + if (auxr != repl) bdestroy (auxr); + return BSTR_OK; + } + + /* shrinking replacement since auxf->slen > auxr->slen */ + if (delta > 0) { + acc = 0; + + while ((i = instr (b, pos, auxf)) >= 0) { + if (acc && i > pos) + bstr__memmove (b->data + pos - acc, b->data + pos, i - pos); + if (auxr->slen) + bstr__memcpy (b->data + i - acc, auxr->data, auxr->slen); + acc += delta; + pos = i + auxf->slen; + } + + if (acc) { + i = b->slen; + if (i > pos) + bstr__memmove (b->data + pos - acc, b->data + pos, i - pos); + b->slen -= acc; + b->data[b->slen] = (unsigned char) '\0'; + } + + if (auxf != find) bdestroy (auxf); + if (auxr != repl) bdestroy (auxr); + return BSTR_OK; + } + + /* expanding replacement since find->slen < repl->slen. Its a lot + more complicated. This works by first finding all the matches and + storing them to a growable array, then doing at most one resize of + the destination bstring and then performing the direct memory transfers + of the string segment pieces to form the final result. The growable + array of matches uses a deferred doubling reallocing strategy. What + this means is that it starts as a reasonably fixed sized auto array in + the hopes that many if not most cases will never need to grow this + array. But it switches as soon as the bounds of the array will be + exceeded. An extra find result is always appended to this array that + corresponds to the end of the destination string, so slen is checked + against mlen - 1 rather than mlen before resizing. + */ + + mlen = INITIAL_STATIC_FIND_INDEX_COUNT; + d = (int *) static_d; /* Avoid malloc for trivial/initial cases */ + acc = slen = 0; + + while ((pos = instr (b, pos, auxf)) >= 0) { + if (slen >= mlen - 1) { + int sl, *t; + + mlen += mlen; + sl = sizeof (int *) * mlen; + if (static_d == d) d = NULL; /* static_d cannot be realloced */ + if (mlen <= 0 || sl < mlen || NULL == (t = (int *) bstr__realloc (d, sl))) { + ret = BSTR_ERR; + goto done; + } + if (NULL == d) bstr__memcpy (t, static_d, sizeof (static_d)); + d = t; + } + d[slen] = pos; + slen++; + acc -= delta; + pos += auxf->slen; + if (pos < 0 || acc < 0) { + ret = BSTR_ERR; + goto done; + } + } + + /* slen <= INITIAL_STATIC_INDEX_COUNT-1 or mlen-1 here. */ + d[slen] = b->slen; + + if (BSTR_OK == (ret = balloc (b, b->slen + acc + 1))) { + b->slen += acc; + for (i = slen-1; i >= 0; i--) { + int s, l; + s = d[i] + auxf->slen; + l = d[i+1] - s; /* d[slen] may be accessed here. */ + if (l) { + bstr__memmove (b->data + s + acc, b->data + s, l); + } + if (auxr->slen) { + bstr__memmove (b->data + s + acc - auxr->slen, + auxr->data, auxr->slen); + } + acc += delta; + } + b->data[b->slen] = (unsigned char) '\0'; + } + + done:; + if (static_d == d) d = NULL; + bstr__free (d); + if (auxf != find) bdestroy (auxf); + if (auxr != repl) bdestroy (auxr); + return ret; +} + +/* int bfindreplace (bstring b, const_bstring find, const_bstring repl, + * int pos) + * + * Replace all occurrences of a find string with a replace string after a + * given point in a bstring. + */ +int bfindreplace (bstring b, const_bstring find, const_bstring repl, int pos) { + return findreplaceengine (b, find, repl, pos, binstr); +} + +/* int bfindreplacecaseless (bstring b, const_bstring find, const_bstring repl, + * int pos) + * + * Replace all occurrences of a find string, ignoring case, with a replace + * string after a given point in a bstring. + */ +int bfindreplacecaseless (bstring b, const_bstring find, const_bstring repl, int pos) { + return findreplaceengine (b, find, repl, pos, binstrcaseless); +} + +/* int binsertch (bstring b, int pos, int len, unsigned char fill) + * + * Inserts the character fill repeatedly into b at position pos for a + * length len. If the position pos is past the end of b, then the + * character "fill" is appended as necessary to make up the gap between the + * end of b and the position pos + len. + */ +int binsertch (bstring b, int pos, int len, unsigned char fill) { +int d, l, i; + + if (pos < 0 || b == NULL || b->slen < 0 || b->mlen < b->slen || + b->mlen <= 0 || len < 0) return BSTR_ERR; + + /* Compute the two possible end pointers */ + d = b->slen + len; + l = pos + len; + if ((d|l) < 0) return BSTR_ERR; + + if (l > d) { + /* Inserting past the end of the string */ + if (balloc (b, l + 1) != BSTR_OK) return BSTR_ERR; + pos = b->slen; + b->slen = l; + } else { + /* Inserting in the middle of the string */ + if (balloc (b, d + 1) != BSTR_OK) return BSTR_ERR; + for (i = d - 1; i >= l; i--) { + b->data[i] = b->data[i - len]; + } + b->slen = d; + } + + for (i=pos; i < l; i++) b->data[i] = fill; + b->data[b->slen] = (unsigned char) '\0'; + return BSTR_OK; +} + +/* int bpattern (bstring b, int len) + * + * Replicate the bstring, b in place, end to end repeatedly until it + * surpasses len characters, then chop the result to exactly len characters. + * This function operates in-place. The function will return with BSTR_ERR + * if b is NULL or of length 0, otherwise BSTR_OK is returned. + */ +int bpattern (bstring b, int len) { +int i, d; + + d = blength (b); + if (d <= 0 || len < 0 || balloc (b, len + 1) != BSTR_OK) return BSTR_ERR; + if (len > 0) { + if (d == 1) return bsetstr (b, len, NULL, b->data[0]); + for (i = d; i < len; i++) b->data[i] = b->data[i - d]; + } + b->data[len] = (unsigned char) '\0'; + b->slen = len; + return BSTR_OK; +} + +#define BS_BUFF_SZ (1024) + +/* int breada (bstring b, bNread readPtr, void * parm) + * + * Use a finite buffer fread-like function readPtr to concatenate to the + * bstring b the entire contents of file-like source data in a roughly + * efficient way. + */ +int breada (bstring b, bNread readPtr, void * parm) { +int i, l, n; + + if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen || + b->mlen <= 0 || readPtr == NULL) return BSTR_ERR; + + i = b->slen; + for (n=i+16; ; n += ((n < BS_BUFF_SZ) ? n : BS_BUFF_SZ)) { + if (BSTR_OK != balloc (b, n + 1)) return BSTR_ERR; + l = (int) readPtr ((void *) (b->data + i), 1, n - i, parm); + i += l; + b->slen = i; + if (i < n) break; + } + + b->data[i] = (unsigned char) '\0'; + return BSTR_OK; +} + +/* bstring bread (bNread readPtr, void * parm) + * + * Use a finite buffer fread-like function readPtr to create a bstring + * filled with the entire contents of file-like source data in a roughly + * efficient way. + */ +bstring bread (bNread readPtr, void * parm) { +bstring buff; + + if (0 > breada (buff = bfromcstr (""), readPtr, parm)) { + bdestroy (buff); + return NULL; + } + return buff; +} + +/* int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator) + * + * Use an fgetc-like single character stream reading function (getcPtr) to + * obtain a sequence of characters which are concatenated to the end of the + * bstring b. The stream read is terminated by the passed in terminator + * parameter. + * + * If getcPtr returns with a negative number, or the terminator character + * (which is appended) is read, then the stream reading is halted and the + * function returns with a partial result in b. If there is an empty partial + * result, 1 is returned. If no characters are read, or there is some other + * detectable error, BSTR_ERR is returned. + */ +int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator) { +int c, d, e; + + if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen || + b->mlen <= 0 || getcPtr == NULL) return BSTR_ERR; + d = 0; + e = b->mlen - 2; + + while ((c = getcPtr (parm)) >= 0) { + if (d > e) { + b->slen = d; + if (balloc (b, d + 2) != BSTR_OK) return BSTR_ERR; + e = b->mlen - 2; + } + b->data[d] = (unsigned char) c; + d++; + if (c == terminator) break; + } + + b->data[d] = (unsigned char) '\0'; + b->slen = d; + + return d == 0 && c < 0; +} + +/* int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator) + * + * Use an fgetc-like single character stream reading function (getcPtr) to + * obtain a sequence of characters which are concatenated to the end of the + * bstring b. The stream read is terminated by the passed in terminator + * parameter. + * + * If getcPtr returns with a negative number, or the terminator character + * (which is appended) is read, then the stream reading is halted and the + * function returns with a partial result concatentated to b. If there is + * an empty partial result, 1 is returned. If no characters are read, or + * there is some other detectable error, BSTR_ERR is returned. + */ +int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator) { +int c, d, e; + + if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen || + b->mlen <= 0 || getcPtr == NULL) return BSTR_ERR; + d = b->slen; + e = b->mlen - 2; + + while ((c = getcPtr (parm)) >= 0) { + if (d > e) { + b->slen = d; + if (balloc (b, d + 2) != BSTR_OK) return BSTR_ERR; + e = b->mlen - 2; + } + b->data[d] = (unsigned char) c; + d++; + if (c == terminator) break; + } + + b->data[d] = (unsigned char) '\0'; + b->slen = d; + + return d == 0 && c < 0; +} + +/* bstring bgets (bNgetc getcPtr, void * parm, char terminator) + * + * Use an fgetc-like single character stream reading function (getcPtr) to + * obtain a sequence of characters which are concatenated into a bstring. + * The stream read is terminated by the passed in terminator function. + * + * If getcPtr returns with a negative number, or the terminator character + * (which is appended) is read, then the stream reading is halted and the + * result obtained thus far is returned. If no characters are read, or + * there is some other detectable error, NULL is returned. + */ +bstring bgets (bNgetc getcPtr, void * parm, char terminator) { +bstring buff; + + if (0 > bgetsa (buff = bfromcstr (""), getcPtr, parm, terminator) || 0 >= buff->slen) { + bdestroy (buff); + buff = NULL; + } + return buff; +} + +struct bStream { + bstring buff; /* Buffer for over-reads */ + void * parm; /* The stream handle for core stream */ + bNread readFnPtr; /* fread compatible fnptr for core stream */ + int isEOF; /* track file's EOF state */ + int maxBuffSz; +}; + +/* struct bStream * bsopen (bNread readPtr, void * parm) + * + * Wrap a given open stream (described by a fread compatible function + * pointer and stream handle) into an open bStream suitable for the bstring + * library streaming functions. + */ +struct bStream * bsopen (bNread readPtr, void * parm) { +struct bStream * s; + + if (readPtr == NULL) return NULL; + s = (struct bStream *) bstr__alloc (sizeof (struct bStream)); + if (s == NULL) return NULL; + s->parm = parm; + s->buff = bfromcstr (""); + s->readFnPtr = readPtr; + s->maxBuffSz = BS_BUFF_SZ; + s->isEOF = 0; + return s; +} + +/* int bsbufflength (struct bStream * s, int sz) + * + * Set the length of the buffer used by the bStream. If sz is zero, the + * length is not set. This function returns with the previous length. + */ +int bsbufflength (struct bStream * s, int sz) { +int oldSz; + if (s == NULL || sz < 0) return BSTR_ERR; + oldSz = s->maxBuffSz; + if (sz > 0) s->maxBuffSz = sz; + return oldSz; +} + +int bseof (const struct bStream * s) { + if (s == NULL || s->readFnPtr == NULL) return BSTR_ERR; + return s->isEOF && (s->buff->slen == 0); +} + +/* void * bsclose (struct bStream * s) + * + * Close the bStream, and return the handle to the stream that was originally + * used to open the given stream. + */ +void * bsclose (struct bStream * s) { +void * parm; + if (s == NULL) return NULL; + s->readFnPtr = NULL; + if (s->buff) bdestroy (s->buff); + s->buff = NULL; + parm = s->parm; + s->parm = NULL; + s->isEOF = 1; + bstr__free (s); + return parm; +} + +/* int bsreadlna (bstring r, struct bStream * s, char terminator) + * + * Read a bstring terminated by the terminator character or the end of the + * stream from the bStream (s) and return it into the parameter r. This + * function may read additional characters from the core stream that are not + * returned, but will be retained for subsequent read operations. + */ +int bsreadlna (bstring r, struct bStream * s, char terminator) { +int i, l, ret, rlo; +char * b; +struct tagbstring x; + + if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0 || + r->slen < 0 || r->mlen < r->slen) return BSTR_ERR; + l = s->buff->slen; + if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; + b = (char *) s->buff->data; + x.data = (unsigned char *) b; + + /* First check if the current buffer holds the terminator */ + b[l] = terminator; /* Set sentinel */ + for (i=0; b[i] != terminator; i++) ; + if (i < l) { + x.slen = i + 1; + ret = bconcat (r, &x); + s->buff->slen = l; + if (BSTR_OK == ret) bdelete (s->buff, 0, i + 1); + return BSTR_OK; + } + + rlo = r->slen; + + /* If not then just concatenate the entire buffer to the output */ + x.slen = l; + if (BSTR_OK != bconcat (r, &x)) return BSTR_ERR; + + /* Perform direct in-place reads into the destination to allow for + the minimum of data-copies */ + for (;;) { + if (BSTR_OK != balloc (r, r->slen + s->maxBuffSz + 1)) return BSTR_ERR; + b = (char *) (r->data + r->slen); + l = (int) s->readFnPtr (b, 1, s->maxBuffSz, s->parm); + if (l <= 0) { + r->data[r->slen] = (unsigned char) '\0'; + s->buff->slen = 0; + s->isEOF = 1; + /* If nothing was read return with an error message */ + return BSTR_ERR & -(r->slen == rlo); + } + b[l] = terminator; /* Set sentinel */ + for (i=0; b[i] != terminator; i++) ; + if (i < l) break; + r->slen += l; + } + + /* Terminator found, push over-read back to buffer */ + i++; + r->slen += i; + s->buff->slen = l - i; + bstr__memcpy (s->buff->data, b + i, l - i); + r->data[r->slen] = (unsigned char) '\0'; + return BSTR_OK; +} + +/* int bsreadlnsa (bstring r, struct bStream * s, bstring term) + * + * Read a bstring terminated by any character in the term string or the end + * of the stream from the bStream (s) and return it into the parameter r. + * This function may read additional characters from the core stream that + * are not returned, but will be retained for subsequent read operations. + */ +int bsreadlnsa (bstring r, struct bStream * s, const_bstring term) { +int i, l, ret, rlo; +unsigned char * b; +struct tagbstring x; +struct charField cf; + + if (s == NULL || s->buff == NULL || r == NULL || term == NULL || + term->data == NULL || r->mlen <= 0 || r->slen < 0 || + r->mlen < r->slen) return BSTR_ERR; + if (term->slen == 1) return bsreadlna (r, s, term->data[0]); + if (term->slen < 1 || buildCharField (&cf, term)) return BSTR_ERR; + + l = s->buff->slen; + if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; + b = (unsigned char *) s->buff->data; + x.data = b; + + /* First check if the current buffer holds the terminator */ + b[l] = term->data[0]; /* Set sentinel */ + for (i=0; !testInCharField (&cf, b[i]); i++) ; + if (i < l) { + x.slen = i + 1; + ret = bconcat (r, &x); + s->buff->slen = l; + if (BSTR_OK == ret) bdelete (s->buff, 0, i + 1); + return BSTR_OK; + } + + rlo = r->slen; + + /* If not then just concatenate the entire buffer to the output */ + x.slen = l; + if (BSTR_OK != bconcat (r, &x)) return BSTR_ERR; + + /* Perform direct in-place reads into the destination to allow for + the minimum of data-copies */ + for (;;) { + if (BSTR_OK != balloc (r, r->slen + s->maxBuffSz + 1)) return BSTR_ERR; + b = (unsigned char *) (r->data + r->slen); + l = (int) s->readFnPtr (b, 1, s->maxBuffSz, s->parm); + if (l <= 0) { + r->data[r->slen] = (unsigned char) '\0'; + s->buff->slen = 0; + s->isEOF = 1; + /* If nothing was read return with an error message */ + return BSTR_ERR & -(r->slen == rlo); + } + + b[l] = term->data[0]; /* Set sentinel */ + for (i=0; !testInCharField (&cf, b[i]); i++) ; + if (i < l) break; + r->slen += l; + } + + /* Terminator found, push over-read back to buffer */ + i++; + r->slen += i; + s->buff->slen = l - i; + bstr__memcpy (s->buff->data, b + i, l - i); + r->data[r->slen] = (unsigned char) '\0'; + return BSTR_OK; +} + +/* int bsreada (bstring r, struct bStream * s, int n) + * + * Read a bstring of length n (or, if it is fewer, as many bytes as is + * remaining) from the bStream. This function may read additional + * characters from the core stream that are not returned, but will be + * retained for subsequent read operations. This function will not read + * additional characters from the core stream beyond virtual stream pointer. + */ +int bsreada (bstring r, struct bStream * s, int n) { +int l, ret, orslen; +char * b; +struct tagbstring x; + + if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0 + || r->slen < 0 || r->mlen < r->slen || n <= 0) return BSTR_ERR; + + n += r->slen; + if (n <= 0) return BSTR_ERR; + + l = s->buff->slen; + + orslen = r->slen; + + if (0 == l) { + if (s->isEOF) return BSTR_ERR; + if (r->mlen > n) { + l = (int) s->readFnPtr (r->data + r->slen, 1, n - r->slen, s->parm); + if (0 >= l || l > n - r->slen) { + s->isEOF = 1; + return BSTR_ERR; + } + r->slen += l; + r->data[r->slen] = (unsigned char) '\0'; + return 0; + } + } + + if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; + b = (char *) s->buff->data; + x.data = (unsigned char *) b; + + do { + if (l + r->slen >= n) { + x.slen = n - r->slen; + ret = bconcat (r, &x); + s->buff->slen = l; + if (BSTR_OK == ret) bdelete (s->buff, 0, x.slen); + return BSTR_ERR & -(r->slen == orslen); + } + + x.slen = l; + if (BSTR_OK != bconcat (r, &x)) break; + + l = n - r->slen; + if (l > s->maxBuffSz) l = s->maxBuffSz; + + l = (int) s->readFnPtr (b, 1, l, s->parm); + + } while (l > 0); + if (l < 0) l = 0; + if (l == 0) s->isEOF = 1; + s->buff->slen = l; + return BSTR_ERR & -(r->slen == orslen); +} + +/* int bsreadln (bstring r, struct bStream * s, char terminator) + * + * Read a bstring terminated by the terminator character or the end of the + * stream from the bStream (s) and return it into the parameter r. This + * function may read additional characters from the core stream that are not + * returned, but will be retained for subsequent read operations. + */ +int bsreadln (bstring r, struct bStream * s, char terminator) { + if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0) + return BSTR_ERR; + if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; + r->slen = 0; + return bsreadlna (r, s, terminator); +} + +/* int bsreadlns (bstring r, struct bStream * s, bstring term) + * + * Read a bstring terminated by any character in the term string or the end + * of the stream from the bStream (s) and return it into the parameter r. + * This function may read additional characters from the core stream that + * are not returned, but will be retained for subsequent read operations. + */ +int bsreadlns (bstring r, struct bStream * s, const_bstring term) { + if (s == NULL || s->buff == NULL || r == NULL || term == NULL + || term->data == NULL || r->mlen <= 0) return BSTR_ERR; + if (term->slen == 1) return bsreadln (r, s, term->data[0]); + if (term->slen < 1) return BSTR_ERR; + if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; + r->slen = 0; + return bsreadlnsa (r, s, term); +} + +/* int bsread (bstring r, struct bStream * s, int n) + * + * Read a bstring of length n (or, if it is fewer, as many bytes as is + * remaining) from the bStream. This function may read additional + * characters from the core stream that are not returned, but will be + * retained for subsequent read operations. This function will not read + * additional characters from the core stream beyond virtual stream pointer. + */ +int bsread (bstring r, struct bStream * s, int n) { + if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0 + || n <= 0) return BSTR_ERR; + if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; + r->slen = 0; + return bsreada (r, s, n); +} + +/* int bsunread (struct bStream * s, const_bstring b) + * + * Insert a bstring into the bStream at the current position. These + * characters will be read prior to those that actually come from the core + * stream. + */ +int bsunread (struct bStream * s, const_bstring b) { + if (s == NULL || s->buff == NULL) return BSTR_ERR; + return binsert (s->buff, 0, b, (unsigned char) '?'); +} + +/* int bspeek (bstring r, const struct bStream * s) + * + * Return the currently buffered characters from the bStream that will be + * read prior to reads from the core stream. + */ +int bspeek (bstring r, const struct bStream * s) { + if (s == NULL || s->buff == NULL) return BSTR_ERR; + return bassign (r, s->buff); +} + +/* bstring bjoin (const struct bstrList * bl, const_bstring sep); + * + * Join the entries of a bstrList into one bstring by sequentially + * concatenating them with the sep string in between. If there is an error + * NULL is returned, otherwise a bstring with the correct result is returned. + */ +bstring bjoin (const struct bstrList * bl, const_bstring sep) { +bstring b; +int i, c, v; + + if (bl == NULL || bl->qty < 0) return NULL; + if (sep != NULL && (sep->slen < 0 || sep->data == NULL)) return NULL; + + for (i = 0, c = 1; i < bl->qty; i++) { + v = bl->entry[i]->slen; + if (v < 0) return NULL; /* Invalid input */ + c += v; + if (c < 0) return NULL; /* Wrap around ?? */ + } + + if (sep != NULL) c += (bl->qty - 1) * sep->slen; + + b = (bstring) bstr__alloc (sizeof (struct tagbstring)); + if (NULL == b) return NULL; /* Out of memory */ + b->data = (unsigned char *) bstr__alloc (c); + if (b->data == NULL) { + bstr__free (b); + return NULL; + } + + b->mlen = c; + b->slen = c-1; + + for (i = 0, c = 0; i < bl->qty; i++) { + if (i > 0 && sep != NULL) { + bstr__memcpy (b->data + c, sep->data, sep->slen); + c += sep->slen; + } + v = bl->entry[i]->slen; + bstr__memcpy (b->data + c, bl->entry[i]->data, v); + c += v; + } + b->data[c] = (unsigned char) '\0'; + return b; +} + +#define BSSSC_BUFF_LEN (256) + +/* int bssplitscb (struct bStream * s, const_bstring splitStr, + * int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) + * + * Iterate the set of disjoint sequential substrings read from a stream + * divided by any of the characters in splitStr. An empty splitStr causes + * the whole stream to be iterated once. + * + * Note: At the point of calling the cb function, the bStream pointer is + * pointed exactly at the position right after having read the split + * character. The cb function can act on the stream by causing the bStream + * pointer to move, and bssplitscb will continue by starting the next split + * at the position of the pointer after the return from cb. + * + * However, if the cb causes the bStream s to be destroyed then the cb must + * return with a negative value, otherwise bssplitscb will continue in an + * undefined manner. + */ +int bssplitscb (struct bStream * s, const_bstring splitStr, + int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) { +struct charField chrs; +bstring buff; +int i, p, ret; + + if (cb == NULL || s == NULL || s->readFnPtr == NULL + || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; + + if (NULL == (buff = bfromcstr (""))) return BSTR_ERR; + + if (splitStr->slen == 0) { + while (bsreada (buff, s, BSSSC_BUFF_LEN) >= 0) ; + if ((ret = cb (parm, 0, buff)) > 0) + ret = 0; + } else { + buildCharField (&chrs, splitStr); + ret = p = i = 0; + for (;;) { + if (i >= buff->slen) { + bsreada (buff, s, BSSSC_BUFF_LEN); + if (i >= buff->slen) { + if (0 < (ret = cb (parm, p, buff))) ret = 0; + break; + } + } + if (testInCharField (&chrs, buff->data[i])) { + struct tagbstring t; + unsigned char c; + + blk2tbstr (t, buff->data + i + 1, buff->slen - (i + 1)); + if ((ret = bsunread (s, &t)) < 0) break; + buff->slen = i; + c = buff->data[i]; + buff->data[i] = (unsigned char) '\0'; + if ((ret = cb (parm, p, buff)) < 0) break; + buff->data[i] = c; + buff->slen = 0; + p += i + 1; + i = -1; + } + i++; + } + } + + bdestroy (buff); + return ret; +} + +/* int bssplitstrcb (struct bStream * s, const_bstring splitStr, + * int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) + * + * Iterate the set of disjoint sequential substrings read from a stream + * divided by the entire substring splitStr. An empty splitStr causes + * each character of the stream to be iterated. + * + * Note: At the point of calling the cb function, the bStream pointer is + * pointed exactly at the position right after having read the split + * character. The cb function can act on the stream by causing the bStream + * pointer to move, and bssplitscb will continue by starting the next split + * at the position of the pointer after the return from cb. + * + * However, if the cb causes the bStream s to be destroyed then the cb must + * return with a negative value, otherwise bssplitscb will continue in an + * undefined manner. + */ +int bssplitstrcb (struct bStream * s, const_bstring splitStr, + int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) { +bstring buff; +int i, p, ret; + + if (cb == NULL || s == NULL || s->readFnPtr == NULL + || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; + + if (splitStr->slen == 1) return bssplitscb (s, splitStr, cb, parm); + + if (NULL == (buff = bfromcstr (""))) return BSTR_ERR; + + if (splitStr->slen == 0) { + for (i=0; bsreada (buff, s, BSSSC_BUFF_LEN) >= 0; i++) { + if ((ret = cb (parm, 0, buff)) < 0) { + bdestroy (buff); + return ret; + } + buff->slen = 0; + } + return BSTR_OK; + } else { + ret = p = i = 0; + for (i=p=0;;) { + if ((ret = binstr (buff, 0, splitStr)) >= 0) { + struct tagbstring t; + blk2tbstr (t, buff->data, ret); + i = ret + splitStr->slen; + if ((ret = cb (parm, p, &t)) < 0) break; + p += i; + bdelete (buff, 0, i); + } else { + bsreada (buff, s, BSSSC_BUFF_LEN); + if (bseof (s)) { + if ((ret = cb (parm, p, buff)) > 0) ret = 0; + break; + } + } + } + } + + bdestroy (buff); + return ret; +} + +/* int bstrListCreate (void) + * + * Create a bstrList. + */ +struct bstrList * bstrListCreate (void) { +struct bstrList * sl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList)); + if (sl) { + sl->entry = (bstring *) bstr__alloc (1*sizeof (bstring)); + if (!sl->entry) { + bstr__free (sl); + sl = NULL; + } else { + sl->qty = 0; + sl->mlen = 1; + } + } + return sl; +} + +/* int bstrListDestroy (struct bstrList * sl) + * + * Destroy a bstrList that has been created by bsplit, bsplits or bstrListCreate. + */ +int bstrListDestroy (struct bstrList * sl) { +int i; + if (sl == NULL || sl->qty < 0) return BSTR_ERR; + for (i=0; i < sl->qty; i++) { + if (sl->entry[i]) { + bdestroy (sl->entry[i]); + sl->entry[i] = NULL; + } + } + sl->qty = -1; + sl->mlen = -1; + bstr__free (sl->entry); + sl->entry = NULL; + bstr__free (sl); + return BSTR_OK; +} + +/* int bstrListAlloc (struct bstrList * sl, int msz) + * + * Ensure that there is memory for at least msz number of entries for the + * list. + */ +int bstrListAlloc (struct bstrList * sl, int msz) { +bstring * l; +int smsz; +size_t nsz; + if (!sl || msz <= 0 || !sl->entry || sl->qty < 0 || sl->mlen <= 0 || sl->qty > sl->mlen) return BSTR_ERR; + if (sl->mlen >= msz) return BSTR_OK; + smsz = snapUpSize (msz); + nsz = ((size_t) smsz) * sizeof (bstring); + if (nsz < (size_t) smsz) return BSTR_ERR; + l = (bstring *) bstr__realloc (sl->entry, nsz); + if (!l) { + smsz = msz; + nsz = ((size_t) smsz) * sizeof (bstring); + l = (bstring *) bstr__realloc (sl->entry, nsz); + if (!l) return BSTR_ERR; + } + sl->mlen = smsz; + sl->entry = l; + return BSTR_OK; +} + +/* int bstrListAllocMin (struct bstrList * sl, int msz) + * + * Try to allocate the minimum amount of memory for the list to include at + * least msz entries or sl->qty whichever is greater. + */ +int bstrListAllocMin (struct bstrList * sl, int msz) { +bstring * l; +size_t nsz; + if (!sl || msz <= 0 || !sl->entry || sl->qty < 0 || sl->mlen <= 0 || sl->qty > sl->mlen) return BSTR_ERR; + if (msz < sl->qty) msz = sl->qty; + if (sl->mlen == msz) return BSTR_OK; + nsz = ((size_t) msz) * sizeof (bstring); + if (nsz < (size_t) msz) return BSTR_ERR; + l = (bstring *) bstr__realloc (sl->entry, nsz); + if (!l) return BSTR_ERR; + sl->mlen = msz; + sl->entry = l; + return BSTR_OK; +} + +/* int bsplitcb (const_bstring str, unsigned char splitChar, int pos, + * int (* cb) (void * parm, int ofs, int len), void * parm) + * + * Iterate the set of disjoint sequential substrings over str divided by the + * character in splitChar. + * + * Note: Non-destructive modification of str from within the cb function + * while performing this split is not undefined. bsplitcb behaves in + * sequential lock step with calls to cb. I.e., after returning from a cb + * that return a non-negative integer, bsplitcb continues from the position + * 1 character after the last detected split character and it will halt + * immediately if the length of str falls below this point. However, if the + * cb function destroys str, then it *must* return with a negative value, + * otherwise bsplitcb will continue in an undefined manner. + */ +int bsplitcb (const_bstring str, unsigned char splitChar, int pos, + int (* cb) (void * parm, int ofs, int len), void * parm) { +int i, p, ret; + + if (cb == NULL || str == NULL || pos < 0 || pos > str->slen) + return BSTR_ERR; + + p = pos; + do { + for (i=p; i < str->slen; i++) { + if (str->data[i] == splitChar) break; + } + if ((ret = cb (parm, p, i - p)) < 0) return ret; + p = i + 1; + } while (p <= str->slen); + return BSTR_OK; +} + +/* int bsplitscb (const_bstring str, const_bstring splitStr, int pos, + * int (* cb) (void * parm, int ofs, int len), void * parm) + * + * Iterate the set of disjoint sequential substrings over str divided by any + * of the characters in splitStr. An empty splitStr causes the whole str to + * be iterated once. + * + * Note: Non-destructive modification of str from within the cb function + * while performing this split is not undefined. bsplitscb behaves in + * sequential lock step with calls to cb. I.e., after returning from a cb + * that return a non-negative integer, bsplitscb continues from the position + * 1 character after the last detected split character and it will halt + * immediately if the length of str falls below this point. However, if the + * cb function destroys str, then it *must* return with a negative value, + * otherwise bsplitscb will continue in an undefined manner. + */ +int bsplitscb (const_bstring str, const_bstring splitStr, int pos, + int (* cb) (void * parm, int ofs, int len), void * parm) { +struct charField chrs; +int i, p, ret; + + if (cb == NULL || str == NULL || pos < 0 || pos > str->slen + || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; + if (splitStr->slen == 0) { + if ((ret = cb (parm, 0, str->slen)) > 0) ret = 0; + return ret; + } + + if (splitStr->slen == 1) + return bsplitcb (str, splitStr->data[0], pos, cb, parm); + + buildCharField (&chrs, splitStr); + + p = pos; + do { + for (i=p; i < str->slen; i++) { + if (testInCharField (&chrs, str->data[i])) break; + } + if ((ret = cb (parm, p, i - p)) < 0) return ret; + p = i + 1; + } while (p <= str->slen); + return BSTR_OK; +} + +/* int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos, + * int (* cb) (void * parm, int ofs, int len), void * parm) + * + * Iterate the set of disjoint sequential substrings over str divided by the + * substring splitStr. An empty splitStr causes the whole str to be + * iterated once. + * + * Note: Non-destructive modification of str from within the cb function + * while performing this split is not undefined. bsplitstrcb behaves in + * sequential lock step with calls to cb. I.e., after returning from a cb + * that return a non-negative integer, bsplitscb continues from the position + * 1 character after the last detected split character and it will halt + * immediately if the length of str falls below this point. However, if the + * cb function destroys str, then it *must* return with a negative value, + * otherwise bsplitscb will continue in an undefined manner. + */ +int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos, + int (* cb) (void * parm, int ofs, int len), void * parm) { +int i, p, ret; + + if (cb == NULL || str == NULL || pos < 0 || pos > str->slen + || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; + + if (0 == splitStr->slen) { + for (i=pos; i < str->slen; i++) { + if ((ret = cb (parm, i, 1)) < 0) return ret; + } + return BSTR_OK; + } + + if (splitStr->slen == 1) + return bsplitcb (str, splitStr->data[0], pos, cb, parm); + + for (i=p=pos; i <= str->slen - splitStr->slen; i++) { + if (0 == bstr__memcmp (splitStr->data, str->data + i, splitStr->slen)) { + if ((ret = cb (parm, p, i - p)) < 0) return ret; + i += splitStr->slen; + p = i; + } + } + if ((ret = cb (parm, p, str->slen - p)) < 0) return ret; + return BSTR_OK; +} + +struct genBstrList { + bstring b; + struct bstrList * bl; +}; + +static int bscb (void * parm, int ofs, int len) { +struct genBstrList * g = (struct genBstrList *) parm; + if (g->bl->qty >= g->bl->mlen) { + int mlen = g->bl->mlen * 2; + bstring * tbl; + + while (g->bl->qty >= mlen) { + if (mlen < g->bl->mlen) return BSTR_ERR; + mlen += mlen; + } + + tbl = (bstring *) bstr__realloc (g->bl->entry, sizeof (bstring) * mlen); + if (tbl == NULL) return BSTR_ERR; + + g->bl->entry = tbl; + g->bl->mlen = mlen; + } + + g->bl->entry[g->bl->qty] = bmidstr (g->b, ofs, len); + g->bl->qty++; + return BSTR_OK; +} + +/* struct bstrList * bsplit (const_bstring str, unsigned char splitChar) + * + * Create an array of sequential substrings from str divided by the character + * splitChar. + */ +struct bstrList * bsplit (const_bstring str, unsigned char splitChar) { +struct genBstrList g; + + if (str == NULL || str->data == NULL || str->slen < 0) return NULL; + + g.bl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList)); + if (g.bl == NULL) return NULL; + g.bl->mlen = 4; + g.bl->entry = (bstring *) bstr__alloc (g.bl->mlen * sizeof (bstring)); + if (NULL == g.bl->entry) { + bstr__free (g.bl); + return NULL; + } + + g.b = (bstring) str; + g.bl->qty = 0; + if (bsplitcb (str, splitChar, 0, bscb, &g) < 0) { + bstrListDestroy (g.bl); + return NULL; + } + return g.bl; +} + +/* struct bstrList * bsplitstr (const_bstring str, const_bstring splitStr) + * + * Create an array of sequential substrings from str divided by the entire + * substring splitStr. + */ +struct bstrList * bsplitstr (const_bstring str, const_bstring splitStr) { +struct genBstrList g; + + if (str == NULL || str->data == NULL || str->slen < 0) return NULL; + + g.bl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList)); + if (g.bl == NULL) return NULL; + g.bl->mlen = 4; + g.bl->entry = (bstring *) bstr__alloc (g.bl->mlen * sizeof (bstring)); + if (NULL == g.bl->entry) { + bstr__free (g.bl); + return NULL; + } + + g.b = (bstring) str; + g.bl->qty = 0; + if (bsplitstrcb (str, splitStr, 0, bscb, &g) < 0) { + bstrListDestroy (g.bl); + return NULL; + } + return g.bl; +} + +/* struct bstrList * bsplits (const_bstring str, bstring splitStr) + * + * Create an array of sequential substrings from str divided by any of the + * characters in splitStr. An empty splitStr causes a single entry bstrList + * containing a copy of str to be returned. + */ +struct bstrList * bsplits (const_bstring str, const_bstring splitStr) { +struct genBstrList g; + + if ( str == NULL || str->slen < 0 || str->data == NULL || + splitStr == NULL || splitStr->slen < 0 || splitStr->data == NULL) + return NULL; + + g.bl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList)); + if (g.bl == NULL) return NULL; + g.bl->mlen = 4; + g.bl->entry = (bstring *) bstr__alloc (g.bl->mlen * sizeof (bstring)); + if (NULL == g.bl->entry) { + bstr__free (g.bl); + return NULL; + } + g.b = (bstring) str; + g.bl->qty = 0; + + if (bsplitscb (str, splitStr, 0, bscb, &g) < 0) { + bstrListDestroy (g.bl); + return NULL; + } + return g.bl; +} + +#if defined (__TURBOC__) && !defined (__BORLANDC__) +# ifndef BSTRLIB_NOVSNP +# define BSTRLIB_NOVSNP +# endif +#endif + +/* Give WATCOM C/C++, MSVC some latitude for their non-support of vsnprintf */ +#if defined(__WATCOMC__) || defined(_MSC_VER) +#define exvsnprintf(r,b,n,f,a) {r = _vsnprintf (b,n,f,a);} +#else +#ifdef BSTRLIB_NOVSNP +/* This is just a hack. If you are using a system without a vsnprintf, it is + not recommended that bformat be used at all. */ +#define exvsnprintf(r,b,n,f,a) {vsprintf (b,f,a); r = -1;} +#define START_VSNBUFF (256) +#else + +#if defined(__GNUC__) && !defined(__APPLE__) +/* Something is making gcc complain about this prototype not being here, so + I've just gone ahead and put it in. */ +extern int vsnprintf (char *buf, size_t count, const char *format, va_list arg); +#endif + +#define exvsnprintf(r,b,n,f,a) {r = vsnprintf (b,n,f,a);} +#endif +#endif + +#if !defined (BSTRLIB_NOVSNP) + +#ifndef START_VSNBUFF +#define START_VSNBUFF (16) +#endif + +/* On IRIX vsnprintf returns n-1 when the operation would overflow the target + buffer, WATCOM and MSVC both return -1, while C99 requires that the + returned value be exactly what the length would be if the buffer would be + large enough. This leads to the idea that if the return value is larger + than n, then changing n to the return value will reduce the number of + iterations required. */ + +/* int bformata (bstring b, const char * fmt, ...) + * + * After the first parameter, it takes the same parameters as printf (), but + * rather than outputting results to stdio, it appends the results to + * a bstring which contains what would have been output. Note that if there + * is an early generation of a '\0' character, the bstring will be truncated + * to this end point. + */ +int bformata (bstring b, const char * fmt, ...) { +va_list arglist; +bstring buff; +int n, r; + + if (b == NULL || fmt == NULL || b->data == NULL || b->mlen <= 0 + || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR; + + /* Since the length is not determinable beforehand, a search is + performed using the truncating "vsnprintf" call (to avoid buffer + overflows) on increasing potential sizes for the output result. */ + + if ((n = (int) (2*strlen (fmt))) < START_VSNBUFF) n = START_VSNBUFF; + if (NULL == (buff = bfromcstralloc (n + 2, ""))) { + n = 1; + if (NULL == (buff = bfromcstralloc (n + 2, ""))) return BSTR_ERR; + } + + for (;;) { + va_start (arglist, fmt); + exvsnprintf (r, (char *) buff->data, n + 1, fmt, arglist); + va_end (arglist); + + buff->data[n] = (unsigned char) '\0'; + buff->slen = (int) (strlen) ((char *) buff->data); + + if (buff->slen < n) break; + + if (r > n) n = r; else n += n; + + if (BSTR_OK != balloc (buff, n + 2)) { + bdestroy (buff); + return BSTR_ERR; + } + } + + r = bconcat (b, buff); + bdestroy (buff); + return r; +} + +/* int bassignformat (bstring b, const char * fmt, ...) + * + * After the first parameter, it takes the same parameters as printf (), but + * rather than outputting results to stdio, it outputs the results to + * the bstring parameter b. Note that if there is an early generation of a + * '\0' character, the bstring will be truncated to this end point. + */ +int bassignformat (bstring b, const char * fmt, ...) { +va_list arglist; +bstring buff; +int n, r; + + if (b == NULL || fmt == NULL || b->data == NULL || b->mlen <= 0 + || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR; + + /* Since the length is not determinable beforehand, a search is + performed using the truncating "vsnprintf" call (to avoid buffer + overflows) on increasing potential sizes for the output result. */ + + if ((n = (int) (2*strlen (fmt))) < START_VSNBUFF) n = START_VSNBUFF; + if (NULL == (buff = bfromcstralloc (n + 2, ""))) { + n = 1; + if (NULL == (buff = bfromcstralloc (n + 2, ""))) return BSTR_ERR; + } + + for (;;) { + va_start (arglist, fmt); + exvsnprintf (r, (char *) buff->data, n + 1, fmt, arglist); + va_end (arglist); + + buff->data[n] = (unsigned char) '\0'; + buff->slen = (int) (strlen) ((char *) buff->data); + + if (buff->slen < n) break; + + if (r > n) n = r; else n += n; + + if (BSTR_OK != balloc (buff, n + 2)) { + bdestroy (buff); + return BSTR_ERR; + } + } + + r = bassign (b, buff); + bdestroy (buff); + return r; +} + +/* bstring bformat (const char * fmt, ...) + * + * Takes the same parameters as printf (), but rather than outputting results + * to stdio, it forms a bstring which contains what would have been output. + * Note that if there is an early generation of a '\0' character, the + * bstring will be truncated to this end point. + */ +bstring bformat (const char * fmt, ...) { +va_list arglist; +bstring buff; +int n, r; + + if (fmt == NULL) return NULL; + + /* Since the length is not determinable beforehand, a search is + performed using the truncating "vsnprintf" call (to avoid buffer + overflows) on increasing potential sizes for the output result. */ + + if ((n = (int) (2*strlen (fmt))) < START_VSNBUFF) n = START_VSNBUFF; + if (NULL == (buff = bfromcstralloc (n + 2, ""))) { + n = 1; + if (NULL == (buff = bfromcstralloc (n + 2, ""))) return NULL; + } + + for (;;) { + va_start (arglist, fmt); + exvsnprintf (r, (char *) buff->data, n + 1, fmt, arglist); + va_end (arglist); + + buff->data[n] = (unsigned char) '\0'; + buff->slen = (int) (strlen) ((char *) buff->data); + + if (buff->slen < n) break; + + if (r > n) n = r; else n += n; + + if (BSTR_OK != balloc (buff, n + 2)) { + bdestroy (buff); + return NULL; + } + } + + return buff; +} + +/* int bvcformata (bstring b, int count, const char * fmt, va_list arglist) + * + * The bvcformata function formats data under control of the format control + * string fmt and attempts to append the result to b. The fmt parameter is + * the same as that of the printf function. The variable argument list is + * replaced with arglist, which has been initialized by the va_start macro. + * The size of the appended output is upper bounded by count. If the + * required output exceeds count, the string b is not augmented with any + * contents and a value below BSTR_ERR is returned. If a value below -count + * is returned then it is recommended that the negative of this value be + * used as an update to the count in a subsequent pass. On other errors, + * such as running out of memory, parameter errors or numeric wrap around + * BSTR_ERR is returned. BSTR_OK is returned when the output is successfully + * generated and appended to b. + * + * Note: There is no sanity checking of arglist, and this function is + * destructive of the contents of b from the b->slen point onward. If there + * is an early generation of a '\0' character, the bstring will be truncated + * to this end point. + */ +int bvcformata (bstring b, int count, const char * fmt, va_list arg) { +int n, r, l; + + if (b == NULL || fmt == NULL || count <= 0 || b->data == NULL + || b->mlen <= 0 || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR; + + if (count > (n = b->slen + count) + 2) return BSTR_ERR; + if (BSTR_OK != balloc (b, n + 2)) return BSTR_ERR; + + exvsnprintf (r, (char *) b->data + b->slen, count + 2, fmt, arg); + + /* Did the operation complete successfully within bounds? */ + for (l = b->slen; l <= n; l++) { + if ('\0' == b->data[l]) { + b->slen = l; + return BSTR_OK; + } + } + + /* Abort, since the buffer was not large enough. The return value + tries to help set what the retry length should be. */ + + b->data[b->slen] = '\0'; + if (r > count + 1) { /* Does r specify a particular target length? */ + n = r; + } else { + n = count + count; /* If not, just double the size of count */ + if (count > n) n = INT_MAX; + } + n = -n; + + if (n > BSTR_ERR-1) n = BSTR_ERR-1; + return n; +} + +#endif diff --git a/src/bstrlib.h b/src/bstrlib.h new file mode 100644 index 0000000..c8fa694 --- /dev/null +++ b/src/bstrlib.h @@ -0,0 +1,304 @@ +/* + * This source file is part of the bstring string library. This code was + * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause + * BSD open source license or GPL v2.0. Refer to the accompanying documentation + * for details on usage and license. + */ + +/* + * bstrlib.h + * + * This file is the header file for the core module for implementing the + * bstring functions. + */ + +#ifndef BSTRLIB_INCLUDE +#define BSTRLIB_INCLUDE + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include + +#if !defined (BSTRLIB_VSNP_OK) && !defined (BSTRLIB_NOVSNP) +# if defined (__TURBOC__) && !defined (__BORLANDC__) +# define BSTRLIB_NOVSNP +# endif +#endif + +#define BSTR_ERR (-1) +#define BSTR_OK (0) +#define BSTR_BS_BUFF_LENGTH_GET (0) + +typedef struct tagbstring * bstring; +typedef const struct tagbstring * const_bstring; + +/* Copy functions */ +#define cstr2bstr bfromcstr +extern bstring bfromcstr (const char * str); +extern bstring bfromcstralloc (int mlen, const char * str); +extern bstring blk2bstr (const void * blk, int len); +extern char * bstr2cstr (const_bstring s, char z); +extern int bcstrfree (char * s); +extern bstring bstrcpy (const_bstring b1); +extern int bassign (bstring a, const_bstring b); +extern int bassignmidstr (bstring a, const_bstring b, int left, int len); +extern int bassigncstr (bstring a, const char * str); +extern int bassignblk (bstring a, const void * s, int len); + +/* Destroy function */ +extern int bdestroy (bstring b); + +/* Space allocation hinting functions */ +extern int balloc (bstring s, int len); +extern int ballocmin (bstring b, int len); + +/* Substring extraction */ +extern bstring bmidstr (const_bstring b, int left, int len); + +/* Various standard manipulations */ +extern int bconcat (bstring b0, const_bstring b1); +extern int bconchar (bstring b0, char c); +extern int bcatcstr (bstring b, const char * s); +extern int bcatblk (bstring b, const void * s, int len); +extern int binsert (bstring s1, int pos, const_bstring s2, unsigned char fill); +extern int binsertch (bstring s1, int pos, int len, unsigned char fill); +extern int breplace (bstring b1, int pos, int len, const_bstring b2, unsigned char fill); +extern int bdelete (bstring s1, int pos, int len); +extern int bsetstr (bstring b0, int pos, const_bstring b1, unsigned char fill); +extern int btrunc (bstring b, int n); + +/* Scan/search functions */ +extern int bstricmp (const_bstring b0, const_bstring b1); +extern int bstrnicmp (const_bstring b0, const_bstring b1, int n); +extern int biseqcaseless (const_bstring b0, const_bstring b1); +extern int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len); +extern int biseq (const_bstring b0, const_bstring b1); +extern int bisstemeqblk (const_bstring b0, const void * blk, int len); +extern int biseqcstr (const_bstring b, const char * s); +extern int biseqcstrcaseless (const_bstring b, const char * s); +extern int bstrcmp (const_bstring b0, const_bstring b1); +extern int bstrncmp (const_bstring b0, const_bstring b1, int n); +extern int binstr (const_bstring s1, int pos, const_bstring s2); +extern int binstrr (const_bstring s1, int pos, const_bstring s2); +extern int binstrcaseless (const_bstring s1, int pos, const_bstring s2); +extern int binstrrcaseless (const_bstring s1, int pos, const_bstring s2); +extern int bstrchrp (const_bstring b, int c, int pos); +extern int bstrrchrp (const_bstring b, int c, int pos); +#define bstrchr(b,c) bstrchrp ((b), (c), 0) +#define bstrrchr(b,c) bstrrchrp ((b), (c), blength(b)-1) +extern int binchr (const_bstring b0, int pos, const_bstring b1); +extern int binchrr (const_bstring b0, int pos, const_bstring b1); +extern int bninchr (const_bstring b0, int pos, const_bstring b1); +extern int bninchrr (const_bstring b0, int pos, const_bstring b1); +extern int bfindreplace (bstring b, const_bstring find, const_bstring repl, int pos); +extern int bfindreplacecaseless (bstring b, const_bstring find, const_bstring repl, int pos); + +/* List of string container functions */ +struct bstrList { + int qty, mlen; + bstring * entry; +}; +extern struct bstrList * bstrListCreate (void); +extern int bstrListDestroy (struct bstrList * sl); +extern int bstrListAlloc (struct bstrList * sl, int msz); +extern int bstrListAllocMin (struct bstrList * sl, int msz); + +/* String split and join functions */ +extern struct bstrList * bsplit (const_bstring str, unsigned char splitChar); +extern struct bstrList * bsplits (const_bstring str, const_bstring splitStr); +extern struct bstrList * bsplitstr (const_bstring str, const_bstring splitStr); +extern bstring bjoin (const struct bstrList * bl, const_bstring sep); +extern int bsplitcb (const_bstring str, unsigned char splitChar, int pos, + int (* cb) (void * parm, int ofs, int len), void * parm); +extern int bsplitscb (const_bstring str, const_bstring splitStr, int pos, + int (* cb) (void * parm, int ofs, int len), void * parm); +extern int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos, + int (* cb) (void * parm, int ofs, int len), void * parm); + +/* Miscellaneous functions */ +extern int bpattern (bstring b, int len); +extern int btoupper (bstring b); +extern int btolower (bstring b); +extern int bltrimws (bstring b); +extern int brtrimws (bstring b); +extern int btrimws (bstring b); + +/* <*>printf format functions */ +#if !defined (BSTRLIB_NOVSNP) +extern bstring bformat (const char * fmt, ...); +extern int bformata (bstring b, const char * fmt, ...); +extern int bassignformat (bstring b, const char * fmt, ...); +extern int bvcformata (bstring b, int count, const char * fmt, va_list arglist); + +#define bvformata(ret, b, fmt, lastarg) { \ +bstring bstrtmp_b = (b); \ +const char * bstrtmp_fmt = (fmt); \ +int bstrtmp_r = BSTR_ERR, bstrtmp_sz = 16; \ + for (;;) { \ + va_list bstrtmp_arglist; \ + va_start (bstrtmp_arglist, lastarg); \ + bstrtmp_r = bvcformata (bstrtmp_b, bstrtmp_sz, bstrtmp_fmt, bstrtmp_arglist); \ + va_end (bstrtmp_arglist); \ + if (bstrtmp_r >= 0) { /* Everything went ok */ \ + bstrtmp_r = BSTR_OK; \ + break; \ + } else if (-bstrtmp_r <= bstrtmp_sz) { /* A real error? */ \ + bstrtmp_r = BSTR_ERR; \ + break; \ + } \ + bstrtmp_sz = -bstrtmp_r; /* Doubled or target size */ \ + } \ + ret = bstrtmp_r; \ +} + +#endif + +typedef int (*bNgetc) (void *parm); +typedef size_t (* bNread) (void *buff, size_t elsize, size_t nelem, void *parm); + +/* Input functions */ +extern bstring bgets (bNgetc getcPtr, void * parm, char terminator); +extern bstring bread (bNread readPtr, void * parm); +extern int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator); +extern int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator); +extern int breada (bstring b, bNread readPtr, void * parm); + +/* Stream functions */ +extern struct bStream * bsopen (bNread readPtr, void * parm); +extern void * bsclose (struct bStream * s); +extern int bsbufflength (struct bStream * s, int sz); +extern int bsreadln (bstring b, struct bStream * s, char terminator); +extern int bsreadlns (bstring r, struct bStream * s, const_bstring term); +extern int bsread (bstring b, struct bStream * s, int n); +extern int bsreadlna (bstring b, struct bStream * s, char terminator); +extern int bsreadlnsa (bstring r, struct bStream * s, const_bstring term); +extern int bsreada (bstring b, struct bStream * s, int n); +extern int bsunread (struct bStream * s, const_bstring b); +extern int bspeek (bstring r, const struct bStream * s); +extern int bssplitscb (struct bStream * s, const_bstring splitStr, + int (* cb) (void * parm, int ofs, const_bstring entry), void * parm); +extern int bssplitstrcb (struct bStream * s, const_bstring splitStr, + int (* cb) (void * parm, int ofs, const_bstring entry), void * parm); +extern int bseof (const struct bStream * s); + +struct tagbstring { + int mlen; + int slen; + unsigned char * data; +}; + +/* Accessor macros */ +#define blengthe(b, e) (((b) == (void *)0 || (b)->slen < 0) ? (int)(e) : ((b)->slen)) +#define blength(b) (blengthe ((b), 0)) +#define bdataofse(b, o, e) (((b) == (void *)0 || (b)->data == (void*)0) ? (char *)(e) : ((char *)(b)->data) + (o)) +#define bdataofs(b, o) (bdataofse ((b), (o), (void *)0)) +#define bdatae(b, e) (bdataofse (b, 0, e)) +#define bdata(b) (bdataofs (b, 0)) +#define bchare(b, p, e) ((((unsigned)(p)) < (unsigned)blength(b)) ? ((b)->data[(p)]) : (e)) +#define bchar(b, p) bchare ((b), (p), '\0') + +/* Static constant string initialization macro */ +#define bsStaticMlen(q,m) {(m), (int) sizeof(q)-1, (unsigned char *) ("" q "")} +#if defined(_MSC_VER) +/* There are many versions of MSVC which emit __LINE__ as a non-constant. */ +# define bsStatic(q) bsStaticMlen(q,-32) +#endif +#ifndef bsStatic +# define bsStatic(q) bsStaticMlen(q,-__LINE__) +#endif + +/* Static constant block parameter pair */ +#define bsStaticBlkParms(q) ((void *)("" q "")), ((int) sizeof(q)-1) + +/* Reference building macros */ +#define cstr2tbstr btfromcstr +#define btfromcstr(t,s) { \ + (t).data = (unsigned char *) (s); \ + (t).slen = ((t).data) ? ((int) (strlen) ((char *)(t).data)) : 0; \ + (t).mlen = -1; \ +} +#define blk2tbstr(t,s,l) { \ + (t).data = (unsigned char *) (s); \ + (t).slen = l; \ + (t).mlen = -1; \ +} +#define btfromblk(t,s,l) blk2tbstr(t,s,l) +#define bmid2tbstr(t,b,p,l) { \ + const_bstring bstrtmp_s = (b); \ + if (bstrtmp_s && bstrtmp_s->data && bstrtmp_s->slen >= 0) { \ + int bstrtmp_left = (p); \ + int bstrtmp_len = (l); \ + if (bstrtmp_left < 0) { \ + bstrtmp_len += bstrtmp_left; \ + bstrtmp_left = 0; \ + } \ + if (bstrtmp_len > bstrtmp_s->slen - bstrtmp_left) \ + bstrtmp_len = bstrtmp_s->slen - bstrtmp_left; \ + if (bstrtmp_len <= 0) { \ + (t).data = (unsigned char *)""; \ + (t).slen = 0; \ + } else { \ + (t).data = bstrtmp_s->data + bstrtmp_left; \ + (t).slen = bstrtmp_len; \ + } \ + } else { \ + (t).data = (unsigned char *)""; \ + (t).slen = 0; \ + } \ + (t).mlen = -__LINE__; \ +} +#define btfromblkltrimws(t,s,l) { \ + int bstrtmp_idx = 0, bstrtmp_len = (l); \ + unsigned char * bstrtmp_s = (s); \ + if (bstrtmp_s && bstrtmp_len >= 0) { \ + for (; bstrtmp_idx < bstrtmp_len; bstrtmp_idx++) { \ + if (!isspace (bstrtmp_s[bstrtmp_idx])) break; \ + } \ + } \ + (t).data = bstrtmp_s + bstrtmp_idx; \ + (t).slen = bstrtmp_len - bstrtmp_idx; \ + (t).mlen = -__LINE__; \ +} +#define btfromblkrtrimws(t,s,l) { \ + int bstrtmp_len = (l) - 1; \ + unsigned char * bstrtmp_s = (s); \ + if (bstrtmp_s && bstrtmp_len >= 0) { \ + for (; bstrtmp_len >= 0; bstrtmp_len--) { \ + if (!isspace (bstrtmp_s[bstrtmp_len])) break; \ + } \ + } \ + (t).data = bstrtmp_s; \ + (t).slen = bstrtmp_len + 1; \ + (t).mlen = -__LINE__; \ +} +#define btfromblktrimws(t,s,l) { \ + int bstrtmp_idx = 0, bstrtmp_len = (l) - 1; \ + unsigned char * bstrtmp_s = (s); \ + if (bstrtmp_s && bstrtmp_len >= 0) { \ + for (; bstrtmp_idx <= bstrtmp_len; bstrtmp_idx++) { \ + if (!isspace (bstrtmp_s[bstrtmp_idx])) break; \ + } \ + for (; bstrtmp_len >= bstrtmp_idx; bstrtmp_len--) { \ + if (!isspace (bstrtmp_s[bstrtmp_len])) break; \ + } \ + } \ + (t).data = bstrtmp_s + bstrtmp_idx; \ + (t).slen = bstrtmp_len + 1 - bstrtmp_idx; \ + (t).mlen = -__LINE__; \ +} + +/* Write protection macros */ +#define bwriteprotect(t) { if ((t).mlen >= 0) (t).mlen = -1; } +#define bwriteallow(t) { if ((t).mlen == -1) (t).mlen = (t).slen + ((t).slen == 0); } +#define biswriteprotected(t) ((t).mlen <= 0) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/case_fold_switch.c b/src/case_fold_switch.c new file mode 100644 index 0000000..70fdd75 --- /dev/null +++ b/src/case_fold_switch.c @@ -0,0 +1,2637 @@ + switch (c) { + case 0x0041: + bufpush(0x0061); + break; + case 0x0042: + bufpush(0x0062); + break; + case 0x0043: + bufpush(0x0063); + break; + case 0x0044: + bufpush(0x0064); + break; + case 0x0045: + bufpush(0x0065); + break; + case 0x0046: + bufpush(0x0066); + break; + case 0x0047: + bufpush(0x0067); + break; + case 0x0048: + bufpush(0x0068); + break; + case 0x0049: + bufpush(0x0069); + break; + case 0x004A: + bufpush(0x006A); + break; + case 0x004B: + bufpush(0x006B); + break; + case 0x004C: + bufpush(0x006C); + break; + case 0x004D: + bufpush(0x006D); + break; + case 0x004E: + bufpush(0x006E); + break; + case 0x004F: + bufpush(0x006F); + break; + case 0x0050: + bufpush(0x0070); + break; + case 0x0051: + bufpush(0x0071); + break; + case 0x0052: + bufpush(0x0072); + break; + case 0x0053: + bufpush(0x0073); + break; + case 0x0054: + bufpush(0x0074); + break; + case 0x0055: + bufpush(0x0075); + break; + case 0x0056: + bufpush(0x0076); + break; + case 0x0057: + bufpush(0x0077); + break; + case 0x0058: + bufpush(0x0078); + break; + case 0x0059: + bufpush(0x0079); + break; + case 0x005A: + bufpush(0x007A); + break; + case 0x00B5: + bufpush(0x03BC); + break; + case 0x00C0: + bufpush(0x00E0); + break; + case 0x00C1: + bufpush(0x00E1); + break; + case 0x00C2: + bufpush(0x00E2); + break; + case 0x00C3: + bufpush(0x00E3); + break; + case 0x00C4: + bufpush(0x00E4); + break; + case 0x00C5: + bufpush(0x00E5); + break; + case 0x00C6: + bufpush(0x00E6); + break; + case 0x00C7: + bufpush(0x00E7); + break; + case 0x00C8: + bufpush(0x00E8); + break; + case 0x00C9: + bufpush(0x00E9); + break; + case 0x00CA: + bufpush(0x00EA); + break; + case 0x00CB: + bufpush(0x00EB); + break; + case 0x00CC: + bufpush(0x00EC); + break; + case 0x00CD: + bufpush(0x00ED); + break; + case 0x00CE: + bufpush(0x00EE); + break; + case 0x00CF: + bufpush(0x00EF); + break; + case 0x00D0: + bufpush(0x00F0); + break; + case 0x00D1: + bufpush(0x00F1); + break; + case 0x00D2: + bufpush(0x00F2); + break; + case 0x00D3: + bufpush(0x00F3); + break; + case 0x00D4: + bufpush(0x00F4); + break; + case 0x00D5: + bufpush(0x00F5); + break; + case 0x00D6: + bufpush(0x00F6); + break; + case 0x00D8: + bufpush(0x00F8); + break; + case 0x00D9: + bufpush(0x00F9); + break; + case 0x00DA: + bufpush(0x00FA); + break; + case 0x00DB: + bufpush(0x00FB); + break; + case 0x00DC: + bufpush(0x00FC); + break; + case 0x00DD: + bufpush(0x00FD); + break; + case 0x00DE: + bufpush(0x00FE); + break; + case 0x00DF: + bufpush(0x0073); + bufpush(0x0073); + break; + case 0x0100: + bufpush(0x0101); + break; + case 0x0102: + bufpush(0x0103); + break; + case 0x0104: + bufpush(0x0105); + break; + case 0x0106: + bufpush(0x0107); + break; + case 0x0108: + bufpush(0x0109); + break; + case 0x010A: + bufpush(0x010B); + break; + case 0x010C: + bufpush(0x010D); + break; + case 0x010E: + bufpush(0x010F); + break; + case 0x0110: + bufpush(0x0111); + break; + case 0x0112: + bufpush(0x0113); + break; + case 0x0114: + bufpush(0x0115); + break; + case 0x0116: + bufpush(0x0117); + break; + case 0x0118: + bufpush(0x0119); + break; + case 0x011A: + bufpush(0x011B); + break; + case 0x011C: + bufpush(0x011D); + break; + case 0x011E: + bufpush(0x011F); + break; + case 0x0120: + bufpush(0x0121); + break; + case 0x0122: + bufpush(0x0123); + break; + case 0x0124: + bufpush(0x0125); + break; + case 0x0126: + bufpush(0x0127); + break; + case 0x0128: + bufpush(0x0129); + break; + case 0x012A: + bufpush(0x012B); + break; + case 0x012C: + bufpush(0x012D); + break; + case 0x012E: + bufpush(0x012F); + break; + case 0x0130: + bufpush(0x0069); + bufpush(0x0307); + break; + case 0x0132: + bufpush(0x0133); + break; + case 0x0134: + bufpush(0x0135); + break; + case 0x0136: + bufpush(0x0137); + break; + case 0x0139: + bufpush(0x013A); + break; + case 0x013B: + bufpush(0x013C); + break; + case 0x013D: + bufpush(0x013E); + break; + case 0x013F: + bufpush(0x0140); + break; + case 0x0141: + bufpush(0x0142); + break; + case 0x0143: + bufpush(0x0144); + break; + case 0x0145: + bufpush(0x0146); + break; + case 0x0147: + bufpush(0x0148); + break; + case 0x0149: + bufpush(0x02BC); + bufpush(0x006E); + break; + case 0x014A: + bufpush(0x014B); + break; + case 0x014C: + bufpush(0x014D); + break; + case 0x014E: + bufpush(0x014F); + break; + case 0x0150: + bufpush(0x0151); + break; + case 0x0152: + bufpush(0x0153); + break; + case 0x0154: + bufpush(0x0155); + break; + case 0x0156: + bufpush(0x0157); + break; + case 0x0158: + bufpush(0x0159); + break; + case 0x015A: + bufpush(0x015B); + break; + case 0x015C: + bufpush(0x015D); + break; + case 0x015E: + bufpush(0x015F); + break; + case 0x0160: + bufpush(0x0161); + break; + case 0x0162: + bufpush(0x0163); + break; + case 0x0164: + bufpush(0x0165); + break; + case 0x0166: + bufpush(0x0167); + break; + case 0x0168: + bufpush(0x0169); + break; + case 0x016A: + bufpush(0x016B); + break; + case 0x016C: + bufpush(0x016D); + break; + case 0x016E: + bufpush(0x016F); + break; + case 0x0170: + bufpush(0x0171); + break; + case 0x0172: + bufpush(0x0173); + break; + case 0x0174: + bufpush(0x0175); + break; + case 0x0176: + bufpush(0x0177); + break; + case 0x0178: + bufpush(0x00FF); + break; + case 0x0179: + bufpush(0x017A); + break; + case 0x017B: + bufpush(0x017C); + break; + case 0x017D: + bufpush(0x017E); + break; + case 0x017F: + bufpush(0x0073); + break; + case 0x0181: + bufpush(0x0253); + break; + case 0x0182: + bufpush(0x0183); + break; + case 0x0184: + bufpush(0x0185); + break; + case 0x0186: + bufpush(0x0254); + break; + case 0x0187: + bufpush(0x0188); + break; + case 0x0189: + bufpush(0x0256); + break; + case 0x018A: + bufpush(0x0257); + break; + case 0x018B: + bufpush(0x018C); + break; + case 0x018E: + bufpush(0x01DD); + break; + case 0x018F: + bufpush(0x0259); + break; + case 0x0190: + bufpush(0x025B); + break; + case 0x0191: + bufpush(0x0192); + break; + case 0x0193: + bufpush(0x0260); + break; + case 0x0194: + bufpush(0x0263); + break; + case 0x0196: + bufpush(0x0269); + break; + case 0x0197: + bufpush(0x0268); + break; + case 0x0198: + bufpush(0x0199); + break; + case 0x019C: + bufpush(0x026F); + break; + case 0x019D: + bufpush(0x0272); + break; + case 0x019F: + bufpush(0x0275); + break; + case 0x01A0: + bufpush(0x01A1); + break; + case 0x01A2: + bufpush(0x01A3); + break; + case 0x01A4: + bufpush(0x01A5); + break; + case 0x01A6: + bufpush(0x0280); + break; + case 0x01A7: + bufpush(0x01A8); + break; + case 0x01A9: + bufpush(0x0283); + break; + case 0x01AC: + bufpush(0x01AD); + break; + case 0x01AE: + bufpush(0x0288); + break; + case 0x01AF: + bufpush(0x01B0); + break; + case 0x01B1: + bufpush(0x028A); + break; + case 0x01B2: + bufpush(0x028B); + break; + case 0x01B3: + bufpush(0x01B4); + break; + case 0x01B5: + bufpush(0x01B6); + break; + case 0x01B7: + bufpush(0x0292); + break; + case 0x01B8: + bufpush(0x01B9); + break; + case 0x01BC: + bufpush(0x01BD); + break; + case 0x01C4: + bufpush(0x01C6); + break; + case 0x01C5: + bufpush(0x01C6); + break; + case 0x01C7: + bufpush(0x01C9); + break; + case 0x01C8: + bufpush(0x01C9); + break; + case 0x01CA: + bufpush(0x01CC); + break; + case 0x01CB: + bufpush(0x01CC); + break; + case 0x01CD: + bufpush(0x01CE); + break; + case 0x01CF: + bufpush(0x01D0); + break; + case 0x01D1: + bufpush(0x01D2); + break; + case 0x01D3: + bufpush(0x01D4); + break; + case 0x01D5: + bufpush(0x01D6); + break; + case 0x01D7: + bufpush(0x01D8); + break; + case 0x01D9: + bufpush(0x01DA); + break; + case 0x01DB: + bufpush(0x01DC); + break; + case 0x01DE: + bufpush(0x01DF); + break; + case 0x01E0: + bufpush(0x01E1); + break; + case 0x01E2: + bufpush(0x01E3); + break; + case 0x01E4: + bufpush(0x01E5); + break; + case 0x01E6: + bufpush(0x01E7); + break; + case 0x01E8: + bufpush(0x01E9); + break; + case 0x01EA: + bufpush(0x01EB); + break; + case 0x01EC: + bufpush(0x01ED); + break; + case 0x01EE: + bufpush(0x01EF); + break; + case 0x01F0: + bufpush(0x006A); + bufpush(0x030C); + break; + case 0x01F1: + bufpush(0x01F3); + break; + case 0x01F2: + bufpush(0x01F3); + break; + case 0x01F4: + bufpush(0x01F5); + break; + case 0x01F6: + bufpush(0x0195); + break; + case 0x01F7: + bufpush(0x01BF); + break; + case 0x01F8: + bufpush(0x01F9); + break; + case 0x01FA: + bufpush(0x01FB); + break; + case 0x01FC: + bufpush(0x01FD); + break; + case 0x01FE: + bufpush(0x01FF); + break; + case 0x0200: + bufpush(0x0201); + break; + case 0x0202: + bufpush(0x0203); + break; + case 0x0204: + bufpush(0x0205); + break; + case 0x0206: + bufpush(0x0207); + break; + case 0x0208: + bufpush(0x0209); + break; + case 0x020A: + bufpush(0x020B); + break; + case 0x020C: + bufpush(0x020D); + break; + case 0x020E: + bufpush(0x020F); + break; + case 0x0210: + bufpush(0x0211); + break; + case 0x0212: + bufpush(0x0213); + break; + case 0x0214: + bufpush(0x0215); + break; + case 0x0216: + bufpush(0x0217); + break; + case 0x0218: + bufpush(0x0219); + break; + case 0x021A: + bufpush(0x021B); + break; + case 0x021C: + bufpush(0x021D); + break; + case 0x021E: + bufpush(0x021F); + break; + case 0x0220: + bufpush(0x019E); + break; + case 0x0222: + bufpush(0x0223); + break; + case 0x0224: + bufpush(0x0225); + break; + case 0x0226: + bufpush(0x0227); + break; + case 0x0228: + bufpush(0x0229); + break; + case 0x022A: + bufpush(0x022B); + break; + case 0x022C: + bufpush(0x022D); + break; + case 0x022E: + bufpush(0x022F); + break; + case 0x0230: + bufpush(0x0231); + break; + case 0x0232: + bufpush(0x0233); + break; + case 0x0345: + bufpush(0x03B9); + break; + case 0x0386: + bufpush(0x03AC); + break; + case 0x0388: + bufpush(0x03AD); + break; + case 0x0389: + bufpush(0x03AE); + break; + case 0x038A: + bufpush(0x03AF); + break; + case 0x038C: + bufpush(0x03CC); + break; + case 0x038E: + bufpush(0x03CD); + break; + case 0x038F: + bufpush(0x03CE); + break; + case 0x0390: + bufpush(0x03B9); + bufpush(0x0308); + bufpush(0x0301); + break; + case 0x0391: + bufpush(0x03B1); + break; + case 0x0392: + bufpush(0x03B2); + break; + case 0x0393: + bufpush(0x03B3); + break; + case 0x0394: + bufpush(0x03B4); + break; + case 0x0395: + bufpush(0x03B5); + break; + case 0x0396: + bufpush(0x03B6); + break; + case 0x0397: + bufpush(0x03B7); + break; + case 0x0398: + bufpush(0x03B8); + break; + case 0x0399: + bufpush(0x03B9); + break; + case 0x039A: + bufpush(0x03BA); + break; + case 0x039B: + bufpush(0x03BB); + break; + case 0x039C: + bufpush(0x03BC); + break; + case 0x039D: + bufpush(0x03BD); + break; + case 0x039E: + bufpush(0x03BE); + break; + case 0x039F: + bufpush(0x03BF); + break; + case 0x03A0: + bufpush(0x03C0); + break; + case 0x03A1: + bufpush(0x03C1); + break; + case 0x03A3: + bufpush(0x03C3); + break; + case 0x03A4: + bufpush(0x03C4); + break; + case 0x03A5: + bufpush(0x03C5); + break; + case 0x03A6: + bufpush(0x03C6); + break; + case 0x03A7: + bufpush(0x03C7); + break; + case 0x03A8: + bufpush(0x03C8); + break; + case 0x03A9: + bufpush(0x03C9); + break; + case 0x03AA: + bufpush(0x03CA); + break; + case 0x03AB: + bufpush(0x03CB); + break; + case 0x03B0: + bufpush(0x03C5); + bufpush(0x0308); + bufpush(0x0301); + break; + case 0x03C2: + bufpush(0x03C3); + break; + case 0x03D0: + bufpush(0x03B2); + break; + case 0x03D1: + bufpush(0x03B8); + break; + case 0x03D5: + bufpush(0x03C6); + break; + case 0x03D6: + bufpush(0x03C0); + break; + case 0x03D8: + bufpush(0x03D9); + break; + case 0x03DA: + bufpush(0x03DB); + break; + case 0x03DC: + bufpush(0x03DD); + break; + case 0x03DE: + bufpush(0x03DF); + break; + case 0x03E0: + bufpush(0x03E1); + break; + case 0x03E2: + bufpush(0x03E3); + break; + case 0x03E4: + bufpush(0x03E5); + break; + case 0x03E6: + bufpush(0x03E7); + break; + case 0x03E8: + bufpush(0x03E9); + break; + case 0x03EA: + bufpush(0x03EB); + break; + case 0x03EC: + bufpush(0x03ED); + break; + case 0x03EE: + bufpush(0x03EF); + break; + case 0x03F0: + bufpush(0x03BA); + break; + case 0x03F1: + bufpush(0x03C1); + break; + case 0x03F2: + bufpush(0x03C3); + break; + case 0x03F4: + bufpush(0x03B8); + break; + case 0x03F5: + bufpush(0x03B5); + break; + case 0x0400: + bufpush(0x0450); + break; + case 0x0401: + bufpush(0x0451); + break; + case 0x0402: + bufpush(0x0452); + break; + case 0x0403: + bufpush(0x0453); + break; + case 0x0404: + bufpush(0x0454); + break; + case 0x0405: + bufpush(0x0455); + break; + case 0x0406: + bufpush(0x0456); + break; + case 0x0407: + bufpush(0x0457); + break; + case 0x0408: + bufpush(0x0458); + break; + case 0x0409: + bufpush(0x0459); + break; + case 0x040A: + bufpush(0x045A); + break; + case 0x040B: + bufpush(0x045B); + break; + case 0x040C: + bufpush(0x045C); + break; + case 0x040D: + bufpush(0x045D); + break; + case 0x040E: + bufpush(0x045E); + break; + case 0x040F: + bufpush(0x045F); + break; + case 0x0410: + bufpush(0x0430); + break; + case 0x0411: + bufpush(0x0431); + break; + case 0x0412: + bufpush(0x0432); + break; + case 0x0413: + bufpush(0x0433); + break; + case 0x0414: + bufpush(0x0434); + break; + case 0x0415: + bufpush(0x0435); + break; + case 0x0416: + bufpush(0x0436); + break; + case 0x0417: + bufpush(0x0437); + break; + case 0x0418: + bufpush(0x0438); + break; + case 0x0419: + bufpush(0x0439); + break; + case 0x041A: + bufpush(0x043A); + break; + case 0x041B: + bufpush(0x043B); + break; + case 0x041C: + bufpush(0x043C); + break; + case 0x041D: + bufpush(0x043D); + break; + case 0x041E: + bufpush(0x043E); + break; + case 0x041F: + bufpush(0x043F); + break; + case 0x0420: + bufpush(0x0440); + break; + case 0x0421: + bufpush(0x0441); + break; + case 0x0422: + bufpush(0x0442); + break; + case 0x0423: + bufpush(0x0443); + break; + case 0x0424: + bufpush(0x0444); + break; + case 0x0425: + bufpush(0x0445); + break; + case 0x0426: + bufpush(0x0446); + break; + case 0x0427: + bufpush(0x0447); + break; + case 0x0428: + bufpush(0x0448); + break; + case 0x0429: + bufpush(0x0449); + break; + case 0x042A: + bufpush(0x044A); + break; + case 0x042B: + bufpush(0x044B); + break; + case 0x042C: + bufpush(0x044C); + break; + case 0x042D: + bufpush(0x044D); + break; + case 0x042E: + bufpush(0x044E); + break; + case 0x042F: + bufpush(0x044F); + break; + case 0x0460: + bufpush(0x0461); + break; + case 0x0462: + bufpush(0x0463); + break; + case 0x0464: + bufpush(0x0465); + break; + case 0x0466: + bufpush(0x0467); + break; + case 0x0468: + bufpush(0x0469); + break; + case 0x046A: + bufpush(0x046B); + break; + case 0x046C: + bufpush(0x046D); + break; + case 0x046E: + bufpush(0x046F); + break; + case 0x0470: + bufpush(0x0471); + break; + case 0x0472: + bufpush(0x0473); + break; + case 0x0474: + bufpush(0x0475); + break; + case 0x0476: + bufpush(0x0477); + break; + case 0x0478: + bufpush(0x0479); + break; + case 0x047A: + bufpush(0x047B); + break; + case 0x047C: + bufpush(0x047D); + break; + case 0x047E: + bufpush(0x047F); + break; + case 0x0480: + bufpush(0x0481); + break; + case 0x048A: + bufpush(0x048B); + break; + case 0x048C: + bufpush(0x048D); + break; + case 0x048E: + bufpush(0x048F); + break; + case 0x0490: + bufpush(0x0491); + break; + case 0x0492: + bufpush(0x0493); + break; + case 0x0494: + bufpush(0x0495); + break; + case 0x0496: + bufpush(0x0497); + break; + case 0x0498: + bufpush(0x0499); + break; + case 0x049A: + bufpush(0x049B); + break; + case 0x049C: + bufpush(0x049D); + break; + case 0x049E: + bufpush(0x049F); + break; + case 0x04A0: + bufpush(0x04A1); + break; + case 0x04A2: + bufpush(0x04A3); + break; + case 0x04A4: + bufpush(0x04A5); + break; + case 0x04A6: + bufpush(0x04A7); + break; + case 0x04A8: + bufpush(0x04A9); + break; + case 0x04AA: + bufpush(0x04AB); + break; + case 0x04AC: + bufpush(0x04AD); + break; + case 0x04AE: + bufpush(0x04AF); + break; + case 0x04B0: + bufpush(0x04B1); + break; + case 0x04B2: + bufpush(0x04B3); + break; + case 0x04B4: + bufpush(0x04B5); + break; + case 0x04B6: + bufpush(0x04B7); + break; + case 0x04B8: + bufpush(0x04B9); + break; + case 0x04BA: + bufpush(0x04BB); + break; + case 0x04BC: + bufpush(0x04BD); + break; + case 0x04BE: + bufpush(0x04BF); + break; + case 0x04C1: + bufpush(0x04C2); + break; + case 0x04C3: + bufpush(0x04C4); + break; + case 0x04C5: + bufpush(0x04C6); + break; + case 0x04C7: + bufpush(0x04C8); + break; + case 0x04C9: + bufpush(0x04CA); + break; + case 0x04CB: + bufpush(0x04CC); + break; + case 0x04CD: + bufpush(0x04CE); + break; + case 0x04D0: + bufpush(0x04D1); + break; + case 0x04D2: + bufpush(0x04D3); + break; + case 0x04D4: + bufpush(0x04D5); + break; + case 0x04D6: + bufpush(0x04D7); + break; + case 0x04D8: + bufpush(0x04D9); + break; + case 0x04DA: + bufpush(0x04DB); + break; + case 0x04DC: + bufpush(0x04DD); + break; + case 0x04DE: + bufpush(0x04DF); + break; + case 0x04E0: + bufpush(0x04E1); + break; + case 0x04E2: + bufpush(0x04E3); + break; + case 0x04E4: + bufpush(0x04E5); + break; + case 0x04E6: + bufpush(0x04E7); + break; + case 0x04E8: + bufpush(0x04E9); + break; + case 0x04EA: + bufpush(0x04EB); + break; + case 0x04EC: + bufpush(0x04ED); + break; + case 0x04EE: + bufpush(0x04EF); + break; + case 0x04F0: + bufpush(0x04F1); + break; + case 0x04F2: + bufpush(0x04F3); + break; + case 0x04F4: + bufpush(0x04F5); + break; + case 0x04F8: + bufpush(0x04F9); + break; + case 0x0500: + bufpush(0x0501); + break; + case 0x0502: + bufpush(0x0503); + break; + case 0x0504: + bufpush(0x0505); + break; + case 0x0506: + bufpush(0x0507); + break; + case 0x0508: + bufpush(0x0509); + break; + case 0x050A: + bufpush(0x050B); + break; + case 0x050C: + bufpush(0x050D); + break; + case 0x050E: + bufpush(0x050F); + break; + case 0x0531: + bufpush(0x0561); + break; + case 0x0532: + bufpush(0x0562); + break; + case 0x0533: + bufpush(0x0563); + break; + case 0x0534: + bufpush(0x0564); + break; + case 0x0535: + bufpush(0x0565); + break; + case 0x0536: + bufpush(0x0566); + break; + case 0x0537: + bufpush(0x0567); + break; + case 0x0538: + bufpush(0x0568); + break; + case 0x0539: + bufpush(0x0569); + break; + case 0x053A: + bufpush(0x056A); + break; + case 0x053B: + bufpush(0x056B); + break; + case 0x053C: + bufpush(0x056C); + break; + case 0x053D: + bufpush(0x056D); + break; + case 0x053E: + bufpush(0x056E); + break; + case 0x053F: + bufpush(0x056F); + break; + case 0x0540: + bufpush(0x0570); + break; + case 0x0541: + bufpush(0x0571); + break; + case 0x0542: + bufpush(0x0572); + break; + case 0x0543: + bufpush(0x0573); + break; + case 0x0544: + bufpush(0x0574); + break; + case 0x0545: + bufpush(0x0575); + break; + case 0x0546: + bufpush(0x0576); + break; + case 0x0547: + bufpush(0x0577); + break; + case 0x0548: + bufpush(0x0578); + break; + case 0x0549: + bufpush(0x0579); + break; + case 0x054A: + bufpush(0x057A); + break; + case 0x054B: + bufpush(0x057B); + break; + case 0x054C: + bufpush(0x057C); + break; + case 0x054D: + bufpush(0x057D); + break; + case 0x054E: + bufpush(0x057E); + break; + case 0x054F: + bufpush(0x057F); + break; + case 0x0550: + bufpush(0x0580); + break; + case 0x0551: + bufpush(0x0581); + break; + case 0x0552: + bufpush(0x0582); + break; + case 0x0553: + bufpush(0x0583); + break; + case 0x0554: + bufpush(0x0584); + break; + case 0x0555: + bufpush(0x0585); + break; + case 0x0556: + bufpush(0x0586); + break; + case 0x0587: + bufpush(0x0565); + bufpush(0x0582); + break; + case 0x1E00: + bufpush(0x1E01); + break; + case 0x1E02: + bufpush(0x1E03); + break; + case 0x1E04: + bufpush(0x1E05); + break; + case 0x1E06: + bufpush(0x1E07); + break; + case 0x1E08: + bufpush(0x1E09); + break; + case 0x1E0A: + bufpush(0x1E0B); + break; + case 0x1E0C: + bufpush(0x1E0D); + break; + case 0x1E0E: + bufpush(0x1E0F); + break; + case 0x1E10: + bufpush(0x1E11); + break; + case 0x1E12: + bufpush(0x1E13); + break; + case 0x1E14: + bufpush(0x1E15); + break; + case 0x1E16: + bufpush(0x1E17); + break; + case 0x1E18: + bufpush(0x1E19); + break; + case 0x1E1A: + bufpush(0x1E1B); + break; + case 0x1E1C: + bufpush(0x1E1D); + break; + case 0x1E1E: + bufpush(0x1E1F); + break; + case 0x1E20: + bufpush(0x1E21); + break; + case 0x1E22: + bufpush(0x1E23); + break; + case 0x1E24: + bufpush(0x1E25); + break; + case 0x1E26: + bufpush(0x1E27); + break; + case 0x1E28: + bufpush(0x1E29); + break; + case 0x1E2A: + bufpush(0x1E2B); + break; + case 0x1E2C: + bufpush(0x1E2D); + break; + case 0x1E2E: + bufpush(0x1E2F); + break; + case 0x1E30: + bufpush(0x1E31); + break; + case 0x1E32: + bufpush(0x1E33); + break; + case 0x1E34: + bufpush(0x1E35); + break; + case 0x1E36: + bufpush(0x1E37); + break; + case 0x1E38: + bufpush(0x1E39); + break; + case 0x1E3A: + bufpush(0x1E3B); + break; + case 0x1E3C: + bufpush(0x1E3D); + break; + case 0x1E3E: + bufpush(0x1E3F); + break; + case 0x1E40: + bufpush(0x1E41); + break; + case 0x1E42: + bufpush(0x1E43); + break; + case 0x1E44: + bufpush(0x1E45); + break; + case 0x1E46: + bufpush(0x1E47); + break; + case 0x1E48: + bufpush(0x1E49); + break; + case 0x1E4A: + bufpush(0x1E4B); + break; + case 0x1E4C: + bufpush(0x1E4D); + break; + case 0x1E4E: + bufpush(0x1E4F); + break; + case 0x1E50: + bufpush(0x1E51); + break; + case 0x1E52: + bufpush(0x1E53); + break; + case 0x1E54: + bufpush(0x1E55); + break; + case 0x1E56: + bufpush(0x1E57); + break; + case 0x1E58: + bufpush(0x1E59); + break; + case 0x1E5A: + bufpush(0x1E5B); + break; + case 0x1E5C: + bufpush(0x1E5D); + break; + case 0x1E5E: + bufpush(0x1E5F); + break; + case 0x1E60: + bufpush(0x1E61); + break; + case 0x1E62: + bufpush(0x1E63); + break; + case 0x1E64: + bufpush(0x1E65); + break; + case 0x1E66: + bufpush(0x1E67); + break; + case 0x1E68: + bufpush(0x1E69); + break; + case 0x1E6A: + bufpush(0x1E6B); + break; + case 0x1E6C: + bufpush(0x1E6D); + break; + case 0x1E6E: + bufpush(0x1E6F); + break; + case 0x1E70: + bufpush(0x1E71); + break; + case 0x1E72: + bufpush(0x1E73); + break; + case 0x1E74: + bufpush(0x1E75); + break; + case 0x1E76: + bufpush(0x1E77); + break; + case 0x1E78: + bufpush(0x1E79); + break; + case 0x1E7A: + bufpush(0x1E7B); + break; + case 0x1E7C: + bufpush(0x1E7D); + break; + case 0x1E7E: + bufpush(0x1E7F); + break; + case 0x1E80: + bufpush(0x1E81); + break; + case 0x1E82: + bufpush(0x1E83); + break; + case 0x1E84: + bufpush(0x1E85); + break; + case 0x1E86: + bufpush(0x1E87); + break; + case 0x1E88: + bufpush(0x1E89); + break; + case 0x1E8A: + bufpush(0x1E8B); + break; + case 0x1E8C: + bufpush(0x1E8D); + break; + case 0x1E8E: + bufpush(0x1E8F); + break; + case 0x1E90: + bufpush(0x1E91); + break; + case 0x1E92: + bufpush(0x1E93); + break; + case 0x1E94: + bufpush(0x1E95); + break; + case 0x1E96: + bufpush(0x0068); + bufpush(0x0331); + break; + case 0x1E97: + bufpush(0x0074); + bufpush(0x0308); + break; + case 0x1E98: + bufpush(0x0077); + bufpush(0x030A); + break; + case 0x1E99: + bufpush(0x0079); + bufpush(0x030A); + break; + case 0x1E9A: + bufpush(0x0061); + bufpush(0x02BE); + break; + case 0x1E9B: + bufpush(0x1E61); + break; + case 0x1EA0: + bufpush(0x1EA1); + break; + case 0x1EA2: + bufpush(0x1EA3); + break; + case 0x1EA4: + bufpush(0x1EA5); + break; + case 0x1EA6: + bufpush(0x1EA7); + break; + case 0x1EA8: + bufpush(0x1EA9); + break; + case 0x1EAA: + bufpush(0x1EAB); + break; + case 0x1EAC: + bufpush(0x1EAD); + break; + case 0x1EAE: + bufpush(0x1EAF); + break; + case 0x1EB0: + bufpush(0x1EB1); + break; + case 0x1EB2: + bufpush(0x1EB3); + break; + case 0x1EB4: + bufpush(0x1EB5); + break; + case 0x1EB6: + bufpush(0x1EB7); + break; + case 0x1EB8: + bufpush(0x1EB9); + break; + case 0x1EBA: + bufpush(0x1EBB); + break; + case 0x1EBC: + bufpush(0x1EBD); + break; + case 0x1EBE: + bufpush(0x1EBF); + break; + case 0x1EC0: + bufpush(0x1EC1); + break; + case 0x1EC2: + bufpush(0x1EC3); + break; + case 0x1EC4: + bufpush(0x1EC5); + break; + case 0x1EC6: + bufpush(0x1EC7); + break; + case 0x1EC8: + bufpush(0x1EC9); + break; + case 0x1ECA: + bufpush(0x1ECB); + break; + case 0x1ECC: + bufpush(0x1ECD); + break; + case 0x1ECE: + bufpush(0x1ECF); + break; + case 0x1ED0: + bufpush(0x1ED1); + break; + case 0x1ED2: + bufpush(0x1ED3); + break; + case 0x1ED4: + bufpush(0x1ED5); + break; + case 0x1ED6: + bufpush(0x1ED7); + break; + case 0x1ED8: + bufpush(0x1ED9); + break; + case 0x1EDA: + bufpush(0x1EDB); + break; + case 0x1EDC: + bufpush(0x1EDD); + break; + case 0x1EDE: + bufpush(0x1EDF); + break; + case 0x1EE0: + bufpush(0x1EE1); + break; + case 0x1EE2: + bufpush(0x1EE3); + break; + case 0x1EE4: + bufpush(0x1EE5); + break; + case 0x1EE6: + bufpush(0x1EE7); + break; + case 0x1EE8: + bufpush(0x1EE9); + break; + case 0x1EEA: + bufpush(0x1EEB); + break; + case 0x1EEC: + bufpush(0x1EED); + break; + case 0x1EEE: + bufpush(0x1EEF); + break; + case 0x1EF0: + bufpush(0x1EF1); + break; + case 0x1EF2: + bufpush(0x1EF3); + break; + case 0x1EF4: + bufpush(0x1EF5); + break; + case 0x1EF6: + bufpush(0x1EF7); + break; + case 0x1EF8: + bufpush(0x1EF9); + break; + case 0x1F08: + bufpush(0x1F00); + break; + case 0x1F09: + bufpush(0x1F01); + break; + case 0x1F0A: + bufpush(0x1F02); + break; + case 0x1F0B: + bufpush(0x1F03); + break; + case 0x1F0C: + bufpush(0x1F04); + break; + case 0x1F0D: + bufpush(0x1F05); + break; + case 0x1F0E: + bufpush(0x1F06); + break; + case 0x1F0F: + bufpush(0x1F07); + break; + case 0x1F18: + bufpush(0x1F10); + break; + case 0x1F19: + bufpush(0x1F11); + break; + case 0x1F1A: + bufpush(0x1F12); + break; + case 0x1F1B: + bufpush(0x1F13); + break; + case 0x1F1C: + bufpush(0x1F14); + break; + case 0x1F1D: + bufpush(0x1F15); + break; + case 0x1F28: + bufpush(0x1F20); + break; + case 0x1F29: + bufpush(0x1F21); + break; + case 0x1F2A: + bufpush(0x1F22); + break; + case 0x1F2B: + bufpush(0x1F23); + break; + case 0x1F2C: + bufpush(0x1F24); + break; + case 0x1F2D: + bufpush(0x1F25); + break; + case 0x1F2E: + bufpush(0x1F26); + break; + case 0x1F2F: + bufpush(0x1F27); + break; + case 0x1F38: + bufpush(0x1F30); + break; + case 0x1F39: + bufpush(0x1F31); + break; + case 0x1F3A: + bufpush(0x1F32); + break; + case 0x1F3B: + bufpush(0x1F33); + break; + case 0x1F3C: + bufpush(0x1F34); + break; + case 0x1F3D: + bufpush(0x1F35); + break; + case 0x1F3E: + bufpush(0x1F36); + break; + case 0x1F3F: + bufpush(0x1F37); + break; + case 0x1F48: + bufpush(0x1F40); + break; + case 0x1F49: + bufpush(0x1F41); + break; + case 0x1F4A: + bufpush(0x1F42); + break; + case 0x1F4B: + bufpush(0x1F43); + break; + case 0x1F4C: + bufpush(0x1F44); + break; + case 0x1F4D: + bufpush(0x1F45); + break; + case 0x1F50: + bufpush(0x03C5); + bufpush(0x0313); + break; + case 0x1F52: + bufpush(0x03C5); + bufpush(0x0313); + bufpush(0x0300); + break; + case 0x1F54: + bufpush(0x03C5); + bufpush(0x0313); + bufpush(0x0301); + break; + case 0x1F56: + bufpush(0x03C5); + bufpush(0x0313); + bufpush(0x0342); + break; + case 0x1F59: + bufpush(0x1F51); + break; + case 0x1F5B: + bufpush(0x1F53); + break; + case 0x1F5D: + bufpush(0x1F55); + break; + case 0x1F5F: + bufpush(0x1F57); + break; + case 0x1F68: + bufpush(0x1F60); + break; + case 0x1F69: + bufpush(0x1F61); + break; + case 0x1F6A: + bufpush(0x1F62); + break; + case 0x1F6B: + bufpush(0x1F63); + break; + case 0x1F6C: + bufpush(0x1F64); + break; + case 0x1F6D: + bufpush(0x1F65); + break; + case 0x1F6E: + bufpush(0x1F66); + break; + case 0x1F6F: + bufpush(0x1F67); + break; + case 0x1F80: + bufpush(0x1F00); + bufpush(0x03B9); + break; + case 0x1F81: + bufpush(0x1F01); + bufpush(0x03B9); + break; + case 0x1F82: + bufpush(0x1F02); + bufpush(0x03B9); + break; + case 0x1F83: + bufpush(0x1F03); + bufpush(0x03B9); + break; + case 0x1F84: + bufpush(0x1F04); + bufpush(0x03B9); + break; + case 0x1F85: + bufpush(0x1F05); + bufpush(0x03B9); + break; + case 0x1F86: + bufpush(0x1F06); + bufpush(0x03B9); + break; + case 0x1F87: + bufpush(0x1F07); + bufpush(0x03B9); + break; + case 0x1F88: + bufpush(0x1F00); + bufpush(0x03B9); + break; + case 0x1F89: + bufpush(0x1F01); + bufpush(0x03B9); + break; + case 0x1F8A: + bufpush(0x1F02); + bufpush(0x03B9); + break; + case 0x1F8B: + bufpush(0x1F03); + bufpush(0x03B9); + break; + case 0x1F8C: + bufpush(0x1F04); + bufpush(0x03B9); + break; + case 0x1F8D: + bufpush(0x1F05); + bufpush(0x03B9); + break; + case 0x1F8E: + bufpush(0x1F06); + bufpush(0x03B9); + break; + case 0x1F8F: + bufpush(0x1F07); + bufpush(0x03B9); + break; + case 0x1F90: + bufpush(0x1F20); + bufpush(0x03B9); + break; + case 0x1F91: + bufpush(0x1F21); + bufpush(0x03B9); + break; + case 0x1F92: + bufpush(0x1F22); + bufpush(0x03B9); + break; + case 0x1F93: + bufpush(0x1F23); + bufpush(0x03B9); + break; + case 0x1F94: + bufpush(0x1F24); + bufpush(0x03B9); + break; + case 0x1F95: + bufpush(0x1F25); + bufpush(0x03B9); + break; + case 0x1F96: + bufpush(0x1F26); + bufpush(0x03B9); + break; + case 0x1F97: + bufpush(0x1F27); + bufpush(0x03B9); + break; + case 0x1F98: + bufpush(0x1F20); + bufpush(0x03B9); + break; + case 0x1F99: + bufpush(0x1F21); + bufpush(0x03B9); + break; + case 0x1F9A: + bufpush(0x1F22); + bufpush(0x03B9); + break; + case 0x1F9B: + bufpush(0x1F23); + bufpush(0x03B9); + break; + case 0x1F9C: + bufpush(0x1F24); + bufpush(0x03B9); + break; + case 0x1F9D: + bufpush(0x1F25); + bufpush(0x03B9); + break; + case 0x1F9E: + bufpush(0x1F26); + bufpush(0x03B9); + break; + case 0x1F9F: + bufpush(0x1F27); + bufpush(0x03B9); + break; + case 0x1FA0: + bufpush(0x1F60); + bufpush(0x03B9); + break; + case 0x1FA1: + bufpush(0x1F61); + bufpush(0x03B9); + break; + case 0x1FA2: + bufpush(0x1F62); + bufpush(0x03B9); + break; + case 0x1FA3: + bufpush(0x1F63); + bufpush(0x03B9); + break; + case 0x1FA4: + bufpush(0x1F64); + bufpush(0x03B9); + break; + case 0x1FA5: + bufpush(0x1F65); + bufpush(0x03B9); + break; + case 0x1FA6: + bufpush(0x1F66); + bufpush(0x03B9); + break; + case 0x1FA7: + bufpush(0x1F67); + bufpush(0x03B9); + break; + case 0x1FA8: + bufpush(0x1F60); + bufpush(0x03B9); + break; + case 0x1FA9: + bufpush(0x1F61); + bufpush(0x03B9); + break; + case 0x1FAA: + bufpush(0x1F62); + bufpush(0x03B9); + break; + case 0x1FAB: + bufpush(0x1F63); + bufpush(0x03B9); + break; + case 0x1FAC: + bufpush(0x1F64); + bufpush(0x03B9); + break; + case 0x1FAD: + bufpush(0x1F65); + bufpush(0x03B9); + break; + case 0x1FAE: + bufpush(0x1F66); + bufpush(0x03B9); + break; + case 0x1FAF: + bufpush(0x1F67); + bufpush(0x03B9); + break; + case 0x1FB2: + bufpush(0x1F70); + bufpush(0x03B9); + break; + case 0x1FB3: + bufpush(0x03B1); + bufpush(0x03B9); + break; + case 0x1FB4: + bufpush(0x03AC); + bufpush(0x03B9); + break; + case 0x1FB6: + bufpush(0x03B1); + bufpush(0x0342); + break; + case 0x1FB7: + bufpush(0x03B1); + bufpush(0x0342); + bufpush(0x03B9); + break; + case 0x1FB8: + bufpush(0x1FB0); + break; + case 0x1FB9: + bufpush(0x1FB1); + break; + case 0x1FBA: + bufpush(0x1F70); + break; + case 0x1FBB: + bufpush(0x1F71); + break; + case 0x1FBC: + bufpush(0x03B1); + bufpush(0x03B9); + break; + case 0x1FBE: + bufpush(0x03B9); + break; + case 0x1FC2: + bufpush(0x1F74); + bufpush(0x03B9); + break; + case 0x1FC3: + bufpush(0x03B7); + bufpush(0x03B9); + break; + case 0x1FC4: + bufpush(0x03AE); + bufpush(0x03B9); + break; + case 0x1FC6: + bufpush(0x03B7); + bufpush(0x0342); + break; + case 0x1FC7: + bufpush(0x03B7); + bufpush(0x0342); + bufpush(0x03B9); + break; + case 0x1FC8: + bufpush(0x1F72); + break; + case 0x1FC9: + bufpush(0x1F73); + break; + case 0x1FCA: + bufpush(0x1F74); + break; + case 0x1FCB: + bufpush(0x1F75); + break; + case 0x1FCC: + bufpush(0x03B7); + bufpush(0x03B9); + break; + case 0x1FD2: + bufpush(0x03B9); + bufpush(0x0308); + bufpush(0x0300); + break; + case 0x1FD3: + bufpush(0x03B9); + bufpush(0x0308); + bufpush(0x0301); + break; + case 0x1FD6: + bufpush(0x03B9); + bufpush(0x0342); + break; + case 0x1FD7: + bufpush(0x03B9); + bufpush(0x0308); + bufpush(0x0342); + break; + case 0x1FD8: + bufpush(0x1FD0); + break; + case 0x1FD9: + bufpush(0x1FD1); + break; + case 0x1FDA: + bufpush(0x1F76); + break; + case 0x1FDB: + bufpush(0x1F77); + break; + case 0x1FE2: + bufpush(0x03C5); + bufpush(0x0308); + bufpush(0x0300); + break; + case 0x1FE3: + bufpush(0x03C5); + bufpush(0x0308); + bufpush(0x0301); + break; + case 0x1FE4: + bufpush(0x03C1); + bufpush(0x0313); + break; + case 0x1FE6: + bufpush(0x03C5); + bufpush(0x0342); + break; + case 0x1FE7: + bufpush(0x03C5); + bufpush(0x0308); + bufpush(0x0342); + break; + case 0x1FE8: + bufpush(0x1FE0); + break; + case 0x1FE9: + bufpush(0x1FE1); + break; + case 0x1FEA: + bufpush(0x1F7A); + break; + case 0x1FEB: + bufpush(0x1F7B); + break; + case 0x1FEC: + bufpush(0x1FE5); + break; + case 0x1FF2: + bufpush(0x1F7C); + bufpush(0x03B9); + break; + case 0x1FF3: + bufpush(0x03C9); + bufpush(0x03B9); + break; + case 0x1FF4: + bufpush(0x03CE); + bufpush(0x03B9); + break; + case 0x1FF6: + bufpush(0x03C9); + bufpush(0x0342); + break; + case 0x1FF7: + bufpush(0x03C9); + bufpush(0x0342); + bufpush(0x03B9); + break; + case 0x1FF8: + bufpush(0x1F78); + break; + case 0x1FF9: + bufpush(0x1F79); + break; + case 0x1FFA: + bufpush(0x1F7C); + break; + case 0x1FFB: + bufpush(0x1F7D); + break; + case 0x1FFC: + bufpush(0x03C9); + bufpush(0x03B9); + break; + case 0x2126: + bufpush(0x03C9); + break; + case 0x212A: + bufpush(0x006B); + break; + case 0x212B: + bufpush(0x00E5); + break; + case 0x2160: + bufpush(0x2170); + break; + case 0x2161: + bufpush(0x2171); + break; + case 0x2162: + bufpush(0x2172); + break; + case 0x2163: + bufpush(0x2173); + break; + case 0x2164: + bufpush(0x2174); + break; + case 0x2165: + bufpush(0x2175); + break; + case 0x2166: + bufpush(0x2176); + break; + case 0x2167: + bufpush(0x2177); + break; + case 0x2168: + bufpush(0x2178); + break; + case 0x2169: + bufpush(0x2179); + break; + case 0x216A: + bufpush(0x217A); + break; + case 0x216B: + bufpush(0x217B); + break; + case 0x216C: + bufpush(0x217C); + break; + case 0x216D: + bufpush(0x217D); + break; + case 0x216E: + bufpush(0x217E); + break; + case 0x216F: + bufpush(0x217F); + break; + case 0x24B6: + bufpush(0x24D0); + break; + case 0x24B7: + bufpush(0x24D1); + break; + case 0x24B8: + bufpush(0x24D2); + break; + case 0x24B9: + bufpush(0x24D3); + break; + case 0x24BA: + bufpush(0x24D4); + break; + case 0x24BB: + bufpush(0x24D5); + break; + case 0x24BC: + bufpush(0x24D6); + break; + case 0x24BD: + bufpush(0x24D7); + break; + case 0x24BE: + bufpush(0x24D8); + break; + case 0x24BF: + bufpush(0x24D9); + break; + case 0x24C0: + bufpush(0x24DA); + break; + case 0x24C1: + bufpush(0x24DB); + break; + case 0x24C2: + bufpush(0x24DC); + break; + case 0x24C3: + bufpush(0x24DD); + break; + case 0x24C4: + bufpush(0x24DE); + break; + case 0x24C5: + bufpush(0x24DF); + break; + case 0x24C6: + bufpush(0x24E0); + break; + case 0x24C7: + bufpush(0x24E1); + break; + case 0x24C8: + bufpush(0x24E2); + break; + case 0x24C9: + bufpush(0x24E3); + break; + case 0x24CA: + bufpush(0x24E4); + break; + case 0x24CB: + bufpush(0x24E5); + break; + case 0x24CC: + bufpush(0x24E6); + break; + case 0x24CD: + bufpush(0x24E7); + break; + case 0x24CE: + bufpush(0x24E8); + break; + case 0x24CF: + bufpush(0x24E9); + break; + case 0xFB00: + bufpush(0x0066); + bufpush(0x0066); + break; + case 0xFB01: + bufpush(0x0066); + bufpush(0x0069); + break; + case 0xFB02: + bufpush(0x0066); + bufpush(0x006C); + break; + case 0xFB03: + bufpush(0x0066); + bufpush(0x0066); + bufpush(0x0069); + break; + case 0xFB04: + bufpush(0x0066); + bufpush(0x0066); + bufpush(0x006C); + break; + case 0xFB05: + bufpush(0x0073); + bufpush(0x0074); + break; + case 0xFB06: + bufpush(0x0073); + bufpush(0x0074); + break; + case 0xFB13: + bufpush(0x0574); + bufpush(0x0576); + break; + case 0xFB14: + bufpush(0x0574); + bufpush(0x0565); + break; + case 0xFB15: + bufpush(0x0574); + bufpush(0x056B); + break; + case 0xFB16: + bufpush(0x057E); + bufpush(0x0576); + break; + case 0xFB17: + bufpush(0x0574); + bufpush(0x056D); + break; + case 0xFF21: + bufpush(0xFF41); + break; + case 0xFF22: + bufpush(0xFF42); + break; + case 0xFF23: + bufpush(0xFF43); + break; + case 0xFF24: + bufpush(0xFF44); + break; + case 0xFF25: + bufpush(0xFF45); + break; + case 0xFF26: + bufpush(0xFF46); + break; + case 0xFF27: + bufpush(0xFF47); + break; + case 0xFF28: + bufpush(0xFF48); + break; + case 0xFF29: + bufpush(0xFF49); + break; + case 0xFF2A: + bufpush(0xFF4A); + break; + case 0xFF2B: + bufpush(0xFF4B); + break; + case 0xFF2C: + bufpush(0xFF4C); + break; + case 0xFF2D: + bufpush(0xFF4D); + break; + case 0xFF2E: + bufpush(0xFF4E); + break; + case 0xFF2F: + bufpush(0xFF4F); + break; + case 0xFF30: + bufpush(0xFF50); + break; + case 0xFF31: + bufpush(0xFF51); + break; + case 0xFF32: + bufpush(0xFF52); + break; + case 0xFF33: + bufpush(0xFF53); + break; + case 0xFF34: + bufpush(0xFF54); + break; + case 0xFF35: + bufpush(0xFF55); + break; + case 0xFF36: + bufpush(0xFF56); + break; + case 0xFF37: + bufpush(0xFF57); + break; + case 0xFF38: + bufpush(0xFF58); + break; + case 0xFF39: + bufpush(0xFF59); + break; + case 0xFF3A: + bufpush(0xFF5A); + break; + case 0x10400: + bufpush(0x10428); + break; + case 0x10401: + bufpush(0x10429); + break; + case 0x10402: + bufpush(0x1042A); + break; + case 0x10403: + bufpush(0x1042B); + break; + case 0x10404: + bufpush(0x1042C); + break; + case 0x10405: + bufpush(0x1042D); + break; + case 0x10406: + bufpush(0x1042E); + break; + case 0x10407: + bufpush(0x1042F); + break; + case 0x10408: + bufpush(0x10430); + break; + case 0x10409: + bufpush(0x10431); + break; + case 0x1040A: + bufpush(0x10432); + break; + case 0x1040B: + bufpush(0x10433); + break; + case 0x1040C: + bufpush(0x10434); + break; + case 0x1040D: + bufpush(0x10435); + break; + case 0x1040E: + bufpush(0x10436); + break; + case 0x1040F: + bufpush(0x10437); + break; + case 0x10410: + bufpush(0x10438); + break; + case 0x10411: + bufpush(0x10439); + break; + case 0x10412: + bufpush(0x1043A); + break; + case 0x10413: + bufpush(0x1043B); + break; + case 0x10414: + bufpush(0x1043C); + break; + case 0x10415: + bufpush(0x1043D); + break; + case 0x10416: + bufpush(0x1043E); + break; + case 0x10417: + bufpush(0x1043F); + break; + case 0x10418: + bufpush(0x10440); + break; + case 0x10419: + bufpush(0x10441); + break; + case 0x1041A: + bufpush(0x10442); + break; + case 0x1041B: + bufpush(0x10443); + break; + case 0x1041C: + bufpush(0x10444); + break; + case 0x1041D: + bufpush(0x10445); + break; + case 0x1041E: + bufpush(0x10446); + break; + case 0x1041F: + bufpush(0x10447); + break; + case 0x10420: + bufpush(0x10448); + break; + case 0x10421: + bufpush(0x10449); + break; + case 0x10422: + bufpush(0x1044A); + break; + case 0x10423: + bufpush(0x1044B); + break; + case 0x10424: + bufpush(0x1044C); + break; + case 0x10425: + bufpush(0x1044D); + break; + default: + bufpush(c); + } diff --git a/src/casefold.c b/src/casefold.c new file mode 100644 index 0000000..33f18aa --- /dev/null +++ b/src/casefold.c @@ -0,0 +1,2699 @@ +#include +#include + + + switch c { + case 0x0041: + bufpush(0x0061); + break; + case 0x0042: + bufpush(0x0062); + break; + case 0x0043: + bufpush(0x0063); + break; + case 0x0044: + bufpush(0x0064); + break; + case 0x0045: + bufpush(0x0065); + break; + case 0x0046: + bufpush(0x0066); + break; + case 0x0047: + bufpush(0x0067); + break; + case 0x0048: + bufpush(0x0068); + break; + case 0x0049: + bufpush(0x0069); + break; + case 0x0049: + bufpush(0x0131); + break; + case 0x004A: + bufpush(0x006A); + break; + case 0x004B: + bufpush(0x006B); + break; + case 0x004C: + bufpush(0x006C); + break; + case 0x004D: + bufpush(0x006D); + break; + case 0x004E: + bufpush(0x006E); + break; + case 0x004F: + bufpush(0x006F); + break; + case 0x0050: + bufpush(0x0070); + break; + case 0x0051: + bufpush(0x0071); + break; + case 0x0052: + bufpush(0x0072); + break; + case 0x0053: + bufpush(0x0073); + break; + case 0x0054: + bufpush(0x0074); + break; + case 0x0055: + bufpush(0x0075); + break; + case 0x0056: + bufpush(0x0076); + break; + case 0x0057: + bufpush(0x0077); + break; + case 0x0058: + bufpush(0x0078); + break; + case 0x0059: + bufpush(0x0079); + break; + case 0x005A: + bufpush(0x007A); + break; + case 0x00B5: + bufpush(0x03BC); + break; + case 0x00C0: + bufpush(0x00E0); + break; + case 0x00C1: + bufpush(0x00E1); + break; + case 0x00C2: + bufpush(0x00E2); + break; + case 0x00C3: + bufpush(0x00E3); + break; + case 0x00C4: + bufpush(0x00E4); + break; + case 0x00C5: + bufpush(0x00E5); + break; + case 0x00C6: + bufpush(0x00E6); + break; + case 0x00C7: + bufpush(0x00E7); + break; + case 0x00C8: + bufpush(0x00E8); + break; + case 0x00C9: + bufpush(0x00E9); + break; + case 0x00CA: + bufpush(0x00EA); + break; + case 0x00CB: + bufpush(0x00EB); + break; + case 0x00CC: + bufpush(0x00EC); + break; + case 0x00CD: + bufpush(0x00ED); + break; + case 0x00CE: + bufpush(0x00EE); + break; + case 0x00CF: + bufpush(0x00EF); + break; + case 0x00D0: + bufpush(0x00F0); + break; + case 0x00D1: + bufpush(0x00F1); + break; + case 0x00D2: + bufpush(0x00F2); + break; + case 0x00D3: + bufpush(0x00F3); + break; + case 0x00D4: + bufpush(0x00F4); + break; + case 0x00D5: + bufpush(0x00F5); + break; + case 0x00D6: + bufpush(0x00F6); + break; + case 0x00D8: + bufpush(0x00F8); + break; + case 0x00D9: + bufpush(0x00F9); + break; + case 0x00DA: + bufpush(0x00FA); + break; + case 0x00DB: + bufpush(0x00FB); + break; + case 0x00DC: + bufpush(0x00FC); + break; + case 0x00DD: + bufpush(0x00FD); + break; + case 0x00DE: + bufpush(0x00FE); + break; + case 0x00DF: + bufpush(0x0073); + bufpush(0x0073); + break; + case 0x0100: + bufpush(0x0101); + break; + case 0x0102: + bufpush(0x0103); + break; + case 0x0104: + bufpush(0x0105); + break; + case 0x0106: + bufpush(0x0107); + break; + case 0x0108: + bufpush(0x0109); + break; + case 0x010A: + bufpush(0x010B); + break; + case 0x010C: + bufpush(0x010D); + break; + case 0x010E: + bufpush(0x010F); + break; + case 0x0110: + bufpush(0x0111); + break; + case 0x0112: + bufpush(0x0113); + break; + case 0x0114: + bufpush(0x0115); + break; + case 0x0116: + bufpush(0x0117); + break; + case 0x0118: + bufpush(0x0119); + break; + case 0x011A: + bufpush(0x011B); + break; + case 0x011C: + bufpush(0x011D); + break; + case 0x011E: + bufpush(0x011F); + break; + case 0x0120: + bufpush(0x0121); + break; + case 0x0122: + bufpush(0x0123); + break; + case 0x0124: + bufpush(0x0125); + break; + case 0x0126: + bufpush(0x0127); + break; + case 0x0128: + bufpush(0x0129); + break; + case 0x012A: + bufpush(0x012B); + break; + case 0x012C: + bufpush(0x012D); + break; + case 0x012E: + bufpush(0x012F); + break; + case 0x0130: + bufpush(0x0069); + bufpush(0x0307); + break; + case 0x0130: + bufpush(0x0069); + break; + case 0x0132: + bufpush(0x0133); + break; + case 0x0134: + bufpush(0x0135); + break; + case 0x0136: + bufpush(0x0137); + break; + case 0x0139: + bufpush(0x013A); + break; + case 0x013B: + bufpush(0x013C); + break; + case 0x013D: + bufpush(0x013E); + break; + case 0x013F: + bufpush(0x0140); + break; + case 0x0141: + bufpush(0x0142); + break; + case 0x0143: + bufpush(0x0144); + break; + case 0x0145: + bufpush(0x0146); + break; + case 0x0147: + bufpush(0x0148); + break; + case 0x0149: + bufpush(0x02BC); + bufpush(0x006E); + break; + case 0x014A: + bufpush(0x014B); + break; + case 0x014C: + bufpush(0x014D); + break; + case 0x014E: + bufpush(0x014F); + break; + case 0x0150: + bufpush(0x0151); + break; + case 0x0152: + bufpush(0x0153); + break; + case 0x0154: + bufpush(0x0155); + break; + case 0x0156: + bufpush(0x0157); + break; + case 0x0158: + bufpush(0x0159); + break; + case 0x015A: + bufpush(0x015B); + break; + case 0x015C: + bufpush(0x015D); + break; + case 0x015E: + bufpush(0x015F); + break; + case 0x0160: + bufpush(0x0161); + break; + case 0x0162: + bufpush(0x0163); + break; + case 0x0164: + bufpush(0x0165); + break; + case 0x0166: + bufpush(0x0167); + break; + case 0x0168: + bufpush(0x0169); + break; + case 0x016A: + bufpush(0x016B); + break; + case 0x016C: + bufpush(0x016D); + break; + case 0x016E: + bufpush(0x016F); + break; + case 0x0170: + bufpush(0x0171); + break; + case 0x0172: + bufpush(0x0173); + break; + case 0x0174: + bufpush(0x0175); + break; + case 0x0176: + bufpush(0x0177); + break; + case 0x0178: + bufpush(0x00FF); + break; + case 0x0179: + bufpush(0x017A); + break; + case 0x017B: + bufpush(0x017C); + break; + case 0x017D: + bufpush(0x017E); + break; + case 0x017F: + bufpush(0x0073); + break; + case 0x0181: + bufpush(0x0253); + break; + case 0x0182: + bufpush(0x0183); + break; + case 0x0184: + bufpush(0x0185); + break; + case 0x0186: + bufpush(0x0254); + break; + case 0x0187: + bufpush(0x0188); + break; + case 0x0189: + bufpush(0x0256); + break; + case 0x018A: + bufpush(0x0257); + break; + case 0x018B: + bufpush(0x018C); + break; + case 0x018E: + bufpush(0x01DD); + break; + case 0x018F: + bufpush(0x0259); + break; + case 0x0190: + bufpush(0x025B); + break; + case 0x0191: + bufpush(0x0192); + break; + case 0x0193: + bufpush(0x0260); + break; + case 0x0194: + bufpush(0x0263); + break; + case 0x0196: + bufpush(0x0269); + break; + case 0x0197: + bufpush(0x0268); + break; + case 0x0198: + bufpush(0x0199); + break; + case 0x019C: + bufpush(0x026F); + break; + case 0x019D: + bufpush(0x0272); + break; + case 0x019F: + bufpush(0x0275); + break; + case 0x01A0: + bufpush(0x01A1); + break; + case 0x01A2: + bufpush(0x01A3); + break; + case 0x01A4: + bufpush(0x01A5); + break; + case 0x01A6: + bufpush(0x0280); + break; + case 0x01A7: + bufpush(0x01A8); + break; + case 0x01A9: + bufpush(0x0283); + break; + case 0x01AC: + bufpush(0x01AD); + break; + case 0x01AE: + bufpush(0x0288); + break; + case 0x01AF: + bufpush(0x01B0); + break; + case 0x01B1: + bufpush(0x028A); + break; + case 0x01B2: + bufpush(0x028B); + break; + case 0x01B3: + bufpush(0x01B4); + break; + case 0x01B5: + bufpush(0x01B6); + break; + case 0x01B7: + bufpush(0x0292); + break; + case 0x01B8: + bufpush(0x01B9); + break; + case 0x01BC: + bufpush(0x01BD); + break; + case 0x01C4: + bufpush(0x01C6); + break; + case 0x01C5: + bufpush(0x01C6); + break; + case 0x01C7: + bufpush(0x01C9); + break; + case 0x01C8: + bufpush(0x01C9); + break; + case 0x01CA: + bufpush(0x01CC); + break; + case 0x01CB: + bufpush(0x01CC); + break; + case 0x01CD: + bufpush(0x01CE); + break; + case 0x01CF: + bufpush(0x01D0); + break; + case 0x01D1: + bufpush(0x01D2); + break; + case 0x01D3: + bufpush(0x01D4); + break; + case 0x01D5: + bufpush(0x01D6); + break; + case 0x01D7: + bufpush(0x01D8); + break; + case 0x01D9: + bufpush(0x01DA); + break; + case 0x01DB: + bufpush(0x01DC); + break; + case 0x01DE: + bufpush(0x01DF); + break; + case 0x01E0: + bufpush(0x01E1); + break; + case 0x01E2: + bufpush(0x01E3); + break; + case 0x01E4: + bufpush(0x01E5); + break; + case 0x01E6: + bufpush(0x01E7); + break; + case 0x01E8: + bufpush(0x01E9); + break; + case 0x01EA: + bufpush(0x01EB); + break; + case 0x01EC: + bufpush(0x01ED); + break; + case 0x01EE: + bufpush(0x01EF); + break; + case 0x01F0: + bufpush(0x006A); + bufpush(0x030C); + break; + case 0x01F1: + bufpush(0x01F3); + break; + case 0x01F2: + bufpush(0x01F3); + break; + case 0x01F4: + bufpush(0x01F5); + break; + case 0x01F6: + bufpush(0x0195); + break; + case 0x01F7: + bufpush(0x01BF); + break; + case 0x01F8: + bufpush(0x01F9); + break; + case 0x01FA: + bufpush(0x01FB); + break; + case 0x01FC: + bufpush(0x01FD); + break; + case 0x01FE: + bufpush(0x01FF); + break; + case 0x0200: + bufpush(0x0201); + break; + case 0x0202: + bufpush(0x0203); + break; + case 0x0204: + bufpush(0x0205); + break; + case 0x0206: + bufpush(0x0207); + break; + case 0x0208: + bufpush(0x0209); + break; + case 0x020A: + bufpush(0x020B); + break; + case 0x020C: + bufpush(0x020D); + break; + case 0x020E: + bufpush(0x020F); + break; + case 0x0210: + bufpush(0x0211); + break; + case 0x0212: + bufpush(0x0213); + break; + case 0x0214: + bufpush(0x0215); + break; + case 0x0216: + bufpush(0x0217); + break; + case 0x0218: + bufpush(0x0219); + break; + case 0x021A: + bufpush(0x021B); + break; + case 0x021C: + bufpush(0x021D); + break; + case 0x021E: + bufpush(0x021F); + break; + case 0x0220: + bufpush(0x019E); + break; + case 0x0222: + bufpush(0x0223); + break; + case 0x0224: + bufpush(0x0225); + break; + case 0x0226: + bufpush(0x0227); + break; + case 0x0228: + bufpush(0x0229); + break; + case 0x022A: + bufpush(0x022B); + break; + case 0x022C: + bufpush(0x022D); + break; + case 0x022E: + bufpush(0x022F); + break; + case 0x0230: + bufpush(0x0231); + break; + case 0x0232: + bufpush(0x0233); + break; + case 0x0345: + bufpush(0x03B9); + break; + case 0x0386: + bufpush(0x03AC); + break; + case 0x0388: + bufpush(0x03AD); + break; + case 0x0389: + bufpush(0x03AE); + break; + case 0x038A: + bufpush(0x03AF); + break; + case 0x038C: + bufpush(0x03CC); + break; + case 0x038E: + bufpush(0x03CD); + break; + case 0x038F: + bufpush(0x03CE); + break; + case 0x0390: + bufpush(0x03B9); + bufpush(0x0308); + bufpush(0x0301); + break; + case 0x0391: + bufpush(0x03B1); + break; + case 0x0392: + bufpush(0x03B2); + break; + case 0x0393: + bufpush(0x03B3); + break; + case 0x0394: + bufpush(0x03B4); + break; + case 0x0395: + bufpush(0x03B5); + break; + case 0x0396: + bufpush(0x03B6); + break; + case 0x0397: + bufpush(0x03B7); + break; + case 0x0398: + bufpush(0x03B8); + break; + case 0x0399: + bufpush(0x03B9); + break; + case 0x039A: + bufpush(0x03BA); + break; + case 0x039B: + bufpush(0x03BB); + break; + case 0x039C: + bufpush(0x03BC); + break; + case 0x039D: + bufpush(0x03BD); + break; + case 0x039E: + bufpush(0x03BE); + break; + case 0x039F: + bufpush(0x03BF); + break; + case 0x03A0: + bufpush(0x03C0); + break; + case 0x03A1: + bufpush(0x03C1); + break; + case 0x03A3: + bufpush(0x03C3); + break; + case 0x03A4: + bufpush(0x03C4); + break; + case 0x03A5: + bufpush(0x03C5); + break; + case 0x03A6: + bufpush(0x03C6); + break; + case 0x03A7: + bufpush(0x03C7); + break; + case 0x03A8: + bufpush(0x03C8); + break; + case 0x03A9: + bufpush(0x03C9); + break; + case 0x03AA: + bufpush(0x03CA); + break; + case 0x03AB: + bufpush(0x03CB); + break; + case 0x03B0: + bufpush(0x03C5); + bufpush(0x0308); + bufpush(0x0301); + break; + case 0x03C2: + bufpush(0x03C3); + break; + case 0x03D0: + bufpush(0x03B2); + break; + case 0x03D1: + bufpush(0x03B8); + break; + case 0x03D5: + bufpush(0x03C6); + break; + case 0x03D6: + bufpush(0x03C0); + break; + case 0x03D8: + bufpush(0x03D9); + break; + case 0x03DA: + bufpush(0x03DB); + break; + case 0x03DC: + bufpush(0x03DD); + break; + case 0x03DE: + bufpush(0x03DF); + break; + case 0x03E0: + bufpush(0x03E1); + break; + case 0x03E2: + bufpush(0x03E3); + break; + case 0x03E4: + bufpush(0x03E5); + break; + case 0x03E6: + bufpush(0x03E7); + break; + case 0x03E8: + bufpush(0x03E9); + break; + case 0x03EA: + bufpush(0x03EB); + break; + case 0x03EC: + bufpush(0x03ED); + break; + case 0x03EE: + bufpush(0x03EF); + break; + case 0x03F0: + bufpush(0x03BA); + break; + case 0x03F1: + bufpush(0x03C1); + break; + case 0x03F2: + bufpush(0x03C3); + break; + case 0x03F4: + bufpush(0x03B8); + break; + case 0x03F5: + bufpush(0x03B5); + break; + case 0x0400: + bufpush(0x0450); + break; + case 0x0401: + bufpush(0x0451); + break; + case 0x0402: + bufpush(0x0452); + break; + case 0x0403: + bufpush(0x0453); + break; + case 0x0404: + bufpush(0x0454); + break; + case 0x0405: + bufpush(0x0455); + break; + case 0x0406: + bufpush(0x0456); + break; + case 0x0407: + bufpush(0x0457); + break; + case 0x0408: + bufpush(0x0458); + break; + case 0x0409: + bufpush(0x0459); + break; + case 0x040A: + bufpush(0x045A); + break; + case 0x040B: + bufpush(0x045B); + break; + case 0x040C: + bufpush(0x045C); + break; + case 0x040D: + bufpush(0x045D); + break; + case 0x040E: + bufpush(0x045E); + break; + case 0x040F: + bufpush(0x045F); + break; + case 0x0410: + bufpush(0x0430); + break; + case 0x0411: + bufpush(0x0431); + break; + case 0x0412: + bufpush(0x0432); + break; + case 0x0413: + bufpush(0x0433); + break; + case 0x0414: + bufpush(0x0434); + break; + case 0x0415: + bufpush(0x0435); + break; + case 0x0416: + bufpush(0x0436); + break; + case 0x0417: + bufpush(0x0437); + break; + case 0x0418: + bufpush(0x0438); + break; + case 0x0419: + bufpush(0x0439); + break; + case 0x041A: + bufpush(0x043A); + break; + case 0x041B: + bufpush(0x043B); + break; + case 0x041C: + bufpush(0x043C); + break; + case 0x041D: + bufpush(0x043D); + break; + case 0x041E: + bufpush(0x043E); + break; + case 0x041F: + bufpush(0x043F); + break; + case 0x0420: + bufpush(0x0440); + break; + case 0x0421: + bufpush(0x0441); + break; + case 0x0422: + bufpush(0x0442); + break; + case 0x0423: + bufpush(0x0443); + break; + case 0x0424: + bufpush(0x0444); + break; + case 0x0425: + bufpush(0x0445); + break; + case 0x0426: + bufpush(0x0446); + break; + case 0x0427: + bufpush(0x0447); + break; + case 0x0428: + bufpush(0x0448); + break; + case 0x0429: + bufpush(0x0449); + break; + case 0x042A: + bufpush(0x044A); + break; + case 0x042B: + bufpush(0x044B); + break; + case 0x042C: + bufpush(0x044C); + break; + case 0x042D: + bufpush(0x044D); + break; + case 0x042E: + bufpush(0x044E); + break; + case 0x042F: + bufpush(0x044F); + break; + case 0x0460: + bufpush(0x0461); + break; + case 0x0462: + bufpush(0x0463); + break; + case 0x0464: + bufpush(0x0465); + break; + case 0x0466: + bufpush(0x0467); + break; + case 0x0468: + bufpush(0x0469); + break; + case 0x046A: + bufpush(0x046B); + break; + case 0x046C: + bufpush(0x046D); + break; + case 0x046E: + bufpush(0x046F); + break; + case 0x0470: + bufpush(0x0471); + break; + case 0x0472: + bufpush(0x0473); + break; + case 0x0474: + bufpush(0x0475); + break; + case 0x0476: + bufpush(0x0477); + break; + case 0x0478: + bufpush(0x0479); + break; + case 0x047A: + bufpush(0x047B); + break; + case 0x047C: + bufpush(0x047D); + break; + case 0x047E: + bufpush(0x047F); + break; + case 0x0480: + bufpush(0x0481); + break; + case 0x048A: + bufpush(0x048B); + break; + case 0x048C: + bufpush(0x048D); + break; + case 0x048E: + bufpush(0x048F); + break; + case 0x0490: + bufpush(0x0491); + break; + case 0x0492: + bufpush(0x0493); + break; + case 0x0494: + bufpush(0x0495); + break; + case 0x0496: + bufpush(0x0497); + break; + case 0x0498: + bufpush(0x0499); + break; + case 0x049A: + bufpush(0x049B); + break; + case 0x049C: + bufpush(0x049D); + break; + case 0x049E: + bufpush(0x049F); + break; + case 0x04A0: + bufpush(0x04A1); + break; + case 0x04A2: + bufpush(0x04A3); + break; + case 0x04A4: + bufpush(0x04A5); + break; + case 0x04A6: + bufpush(0x04A7); + break; + case 0x04A8: + bufpush(0x04A9); + break; + case 0x04AA: + bufpush(0x04AB); + break; + case 0x04AC: + bufpush(0x04AD); + break; + case 0x04AE: + bufpush(0x04AF); + break; + case 0x04B0: + bufpush(0x04B1); + break; + case 0x04B2: + bufpush(0x04B3); + break; + case 0x04B4: + bufpush(0x04B5); + break; + case 0x04B6: + bufpush(0x04B7); + break; + case 0x04B8: + bufpush(0x04B9); + break; + case 0x04BA: + bufpush(0x04BB); + break; + case 0x04BC: + bufpush(0x04BD); + break; + case 0x04BE: + bufpush(0x04BF); + break; + case 0x04C1: + bufpush(0x04C2); + break; + case 0x04C3: + bufpush(0x04C4); + break; + case 0x04C5: + bufpush(0x04C6); + break; + case 0x04C7: + bufpush(0x04C8); + break; + case 0x04C9: + bufpush(0x04CA); + break; + case 0x04CB: + bufpush(0x04CC); + break; + case 0x04CD: + bufpush(0x04CE); + break; + case 0x04D0: + bufpush(0x04D1); + break; + case 0x04D2: + bufpush(0x04D3); + break; + case 0x04D4: + bufpush(0x04D5); + break; + case 0x04D6: + bufpush(0x04D7); + break; + case 0x04D8: + bufpush(0x04D9); + break; + case 0x04DA: + bufpush(0x04DB); + break; + case 0x04DC: + bufpush(0x04DD); + break; + case 0x04DE: + bufpush(0x04DF); + break; + case 0x04E0: + bufpush(0x04E1); + break; + case 0x04E2: + bufpush(0x04E3); + break; + case 0x04E4: + bufpush(0x04E5); + break; + case 0x04E6: + bufpush(0x04E7); + break; + case 0x04E8: + bufpush(0x04E9); + break; + case 0x04EA: + bufpush(0x04EB); + break; + case 0x04EC: + bufpush(0x04ED); + break; + case 0x04EE: + bufpush(0x04EF); + break; + case 0x04F0: + bufpush(0x04F1); + break; + case 0x04F2: + bufpush(0x04F3); + break; + case 0x04F4: + bufpush(0x04F5); + break; + case 0x04F8: + bufpush(0x04F9); + break; + case 0x0500: + bufpush(0x0501); + break; + case 0x0502: + bufpush(0x0503); + break; + case 0x0504: + bufpush(0x0505); + break; + case 0x0506: + bufpush(0x0507); + break; + case 0x0508: + bufpush(0x0509); + break; + case 0x050A: + bufpush(0x050B); + break; + case 0x050C: + bufpush(0x050D); + break; + case 0x050E: + bufpush(0x050F); + break; + case 0x0531: + bufpush(0x0561); + break; + case 0x0532: + bufpush(0x0562); + break; + case 0x0533: + bufpush(0x0563); + break; + case 0x0534: + bufpush(0x0564); + break; + case 0x0535: + bufpush(0x0565); + break; + case 0x0536: + bufpush(0x0566); + break; + case 0x0537: + bufpush(0x0567); + break; + case 0x0538: + bufpush(0x0568); + break; + case 0x0539: + bufpush(0x0569); + break; + case 0x053A: + bufpush(0x056A); + break; + case 0x053B: + bufpush(0x056B); + break; + case 0x053C: + bufpush(0x056C); + break; + case 0x053D: + bufpush(0x056D); + break; + case 0x053E: + bufpush(0x056E); + break; + case 0x053F: + bufpush(0x056F); + break; + case 0x0540: + bufpush(0x0570); + break; + case 0x0541: + bufpush(0x0571); + break; + case 0x0542: + bufpush(0x0572); + break; + case 0x0543: + bufpush(0x0573); + break; + case 0x0544: + bufpush(0x0574); + break; + case 0x0545: + bufpush(0x0575); + break; + case 0x0546: + bufpush(0x0576); + break; + case 0x0547: + bufpush(0x0577); + break; + case 0x0548: + bufpush(0x0578); + break; + case 0x0549: + bufpush(0x0579); + break; + case 0x054A: + bufpush(0x057A); + break; + case 0x054B: + bufpush(0x057B); + break; + case 0x054C: + bufpush(0x057C); + break; + case 0x054D: + bufpush(0x057D); + break; + case 0x054E: + bufpush(0x057E); + break; + case 0x054F: + bufpush(0x057F); + break; + case 0x0550: + bufpush(0x0580); + break; + case 0x0551: + bufpush(0x0581); + break; + case 0x0552: + bufpush(0x0582); + break; + case 0x0553: + bufpush(0x0583); + break; + case 0x0554: + bufpush(0x0584); + break; + case 0x0555: + bufpush(0x0585); + break; + case 0x0556: + bufpush(0x0586); + break; + case 0x0587: + bufpush(0x0565); + bufpush(0x0582); + break; + case 0x1E00: + bufpush(0x1E01); + break; + case 0x1E02: + bufpush(0x1E03); + break; + case 0x1E04: + bufpush(0x1E05); + break; + case 0x1E06: + bufpush(0x1E07); + break; + case 0x1E08: + bufpush(0x1E09); + break; + case 0x1E0A: + bufpush(0x1E0B); + break; + case 0x1E0C: + bufpush(0x1E0D); + break; + case 0x1E0E: + bufpush(0x1E0F); + break; + case 0x1E10: + bufpush(0x1E11); + break; + case 0x1E12: + bufpush(0x1E13); + break; + case 0x1E14: + bufpush(0x1E15); + break; + case 0x1E16: + bufpush(0x1E17); + break; + case 0x1E18: + bufpush(0x1E19); + break; + case 0x1E1A: + bufpush(0x1E1B); + break; + case 0x1E1C: + bufpush(0x1E1D); + break; + case 0x1E1E: + bufpush(0x1E1F); + break; + case 0x1E20: + bufpush(0x1E21); + break; + case 0x1E22: + bufpush(0x1E23); + break; + case 0x1E24: + bufpush(0x1E25); + break; + case 0x1E26: + bufpush(0x1E27); + break; + case 0x1E28: + bufpush(0x1E29); + break; + case 0x1E2A: + bufpush(0x1E2B); + break; + case 0x1E2C: + bufpush(0x1E2D); + break; + case 0x1E2E: + bufpush(0x1E2F); + break; + case 0x1E30: + bufpush(0x1E31); + break; + case 0x1E32: + bufpush(0x1E33); + break; + case 0x1E34: + bufpush(0x1E35); + break; + case 0x1E36: + bufpush(0x1E37); + break; + case 0x1E38: + bufpush(0x1E39); + break; + case 0x1E3A: + bufpush(0x1E3B); + break; + case 0x1E3C: + bufpush(0x1E3D); + break; + case 0x1E3E: + bufpush(0x1E3F); + break; + case 0x1E40: + bufpush(0x1E41); + break; + case 0x1E42: + bufpush(0x1E43); + break; + case 0x1E44: + bufpush(0x1E45); + break; + case 0x1E46: + bufpush(0x1E47); + break; + case 0x1E48: + bufpush(0x1E49); + break; + case 0x1E4A: + bufpush(0x1E4B); + break; + case 0x1E4C: + bufpush(0x1E4D); + break; + case 0x1E4E: + bufpush(0x1E4F); + break; + case 0x1E50: + bufpush(0x1E51); + break; + case 0x1E52: + bufpush(0x1E53); + break; + case 0x1E54: + bufpush(0x1E55); + break; + case 0x1E56: + bufpush(0x1E57); + break; + case 0x1E58: + bufpush(0x1E59); + break; + case 0x1E5A: + bufpush(0x1E5B); + break; + case 0x1E5C: + bufpush(0x1E5D); + break; + case 0x1E5E: + bufpush(0x1E5F); + break; + case 0x1E60: + bufpush(0x1E61); + break; + case 0x1E62: + bufpush(0x1E63); + break; + case 0x1E64: + bufpush(0x1E65); + break; + case 0x1E66: + bufpush(0x1E67); + break; + case 0x1E68: + bufpush(0x1E69); + break; + case 0x1E6A: + bufpush(0x1E6B); + break; + case 0x1E6C: + bufpush(0x1E6D); + break; + case 0x1E6E: + bufpush(0x1E6F); + break; + case 0x1E70: + bufpush(0x1E71); + break; + case 0x1E72: + bufpush(0x1E73); + break; + case 0x1E74: + bufpush(0x1E75); + break; + case 0x1E76: + bufpush(0x1E77); + break; + case 0x1E78: + bufpush(0x1E79); + break; + case 0x1E7A: + bufpush(0x1E7B); + break; + case 0x1E7C: + bufpush(0x1E7D); + break; + case 0x1E7E: + bufpush(0x1E7F); + break; + case 0x1E80: + bufpush(0x1E81); + break; + case 0x1E82: + bufpush(0x1E83); + break; + case 0x1E84: + bufpush(0x1E85); + break; + case 0x1E86: + bufpush(0x1E87); + break; + case 0x1E88: + bufpush(0x1E89); + break; + case 0x1E8A: + bufpush(0x1E8B); + break; + case 0x1E8C: + bufpush(0x1E8D); + break; + case 0x1E8E: + bufpush(0x1E8F); + break; + case 0x1E90: + bufpush(0x1E91); + break; + case 0x1E92: + bufpush(0x1E93); + break; + case 0x1E94: + bufpush(0x1E95); + break; + case 0x1E96: + bufpush(0x0068); + bufpush(0x0331); + break; + case 0x1E97: + bufpush(0x0074); + bufpush(0x0308); + break; + case 0x1E98: + bufpush(0x0077); + bufpush(0x030A); + break; + case 0x1E99: + bufpush(0x0079); + bufpush(0x030A); + break; + case 0x1E9A: + bufpush(0x0061); + bufpush(0x02BE); + break; + case 0x1E9B: + bufpush(0x1E61); + break; + case 0x1EA0: + bufpush(0x1EA1); + break; + case 0x1EA2: + bufpush(0x1EA3); + break; + case 0x1EA4: + bufpush(0x1EA5); + break; + case 0x1EA6: + bufpush(0x1EA7); + break; + case 0x1EA8: + bufpush(0x1EA9); + break; + case 0x1EAA: + bufpush(0x1EAB); + break; + case 0x1EAC: + bufpush(0x1EAD); + break; + case 0x1EAE: + bufpush(0x1EAF); + break; + case 0x1EB0: + bufpush(0x1EB1); + break; + case 0x1EB2: + bufpush(0x1EB3); + break; + case 0x1EB4: + bufpush(0x1EB5); + break; + case 0x1EB6: + bufpush(0x1EB7); + break; + case 0x1EB8: + bufpush(0x1EB9); + break; + case 0x1EBA: + bufpush(0x1EBB); + break; + case 0x1EBC: + bufpush(0x1EBD); + break; + case 0x1EBE: + bufpush(0x1EBF); + break; + case 0x1EC0: + bufpush(0x1EC1); + break; + case 0x1EC2: + bufpush(0x1EC3); + break; + case 0x1EC4: + bufpush(0x1EC5); + break; + case 0x1EC6: + bufpush(0x1EC7); + break; + case 0x1EC8: + bufpush(0x1EC9); + break; + case 0x1ECA: + bufpush(0x1ECB); + break; + case 0x1ECC: + bufpush(0x1ECD); + break; + case 0x1ECE: + bufpush(0x1ECF); + break; + case 0x1ED0: + bufpush(0x1ED1); + break; + case 0x1ED2: + bufpush(0x1ED3); + break; + case 0x1ED4: + bufpush(0x1ED5); + break; + case 0x1ED6: + bufpush(0x1ED7); + break; + case 0x1ED8: + bufpush(0x1ED9); + break; + case 0x1EDA: + bufpush(0x1EDB); + break; + case 0x1EDC: + bufpush(0x1EDD); + break; + case 0x1EDE: + bufpush(0x1EDF); + break; + case 0x1EE0: + bufpush(0x1EE1); + break; + case 0x1EE2: + bufpush(0x1EE3); + break; + case 0x1EE4: + bufpush(0x1EE5); + break; + case 0x1EE6: + bufpush(0x1EE7); + break; + case 0x1EE8: + bufpush(0x1EE9); + break; + case 0x1EEA: + bufpush(0x1EEB); + break; + case 0x1EEC: + bufpush(0x1EED); + break; + case 0x1EEE: + bufpush(0x1EEF); + break; + case 0x1EF0: + bufpush(0x1EF1); + break; + case 0x1EF2: + bufpush(0x1EF3); + break; + case 0x1EF4: + bufpush(0x1EF5); + break; + case 0x1EF6: + bufpush(0x1EF7); + break; + case 0x1EF8: + bufpush(0x1EF9); + break; + case 0x1F08: + bufpush(0x1F00); + break; + case 0x1F09: + bufpush(0x1F01); + break; + case 0x1F0A: + bufpush(0x1F02); + break; + case 0x1F0B: + bufpush(0x1F03); + break; + case 0x1F0C: + bufpush(0x1F04); + break; + case 0x1F0D: + bufpush(0x1F05); + break; + case 0x1F0E: + bufpush(0x1F06); + break; + case 0x1F0F: + bufpush(0x1F07); + break; + case 0x1F18: + bufpush(0x1F10); + break; + case 0x1F19: + bufpush(0x1F11); + break; + case 0x1F1A: + bufpush(0x1F12); + break; + case 0x1F1B: + bufpush(0x1F13); + break; + case 0x1F1C: + bufpush(0x1F14); + break; + case 0x1F1D: + bufpush(0x1F15); + break; + case 0x1F28: + bufpush(0x1F20); + break; + case 0x1F29: + bufpush(0x1F21); + break; + case 0x1F2A: + bufpush(0x1F22); + break; + case 0x1F2B: + bufpush(0x1F23); + break; + case 0x1F2C: + bufpush(0x1F24); + break; + case 0x1F2D: + bufpush(0x1F25); + break; + case 0x1F2E: + bufpush(0x1F26); + break; + case 0x1F2F: + bufpush(0x1F27); + break; + case 0x1F38: + bufpush(0x1F30); + break; + case 0x1F39: + bufpush(0x1F31); + break; + case 0x1F3A: + bufpush(0x1F32); + break; + case 0x1F3B: + bufpush(0x1F33); + break; + case 0x1F3C: + bufpush(0x1F34); + break; + case 0x1F3D: + bufpush(0x1F35); + break; + case 0x1F3E: + bufpush(0x1F36); + break; + case 0x1F3F: + bufpush(0x1F37); + break; + case 0x1F48: + bufpush(0x1F40); + break; + case 0x1F49: + bufpush(0x1F41); + break; + case 0x1F4A: + bufpush(0x1F42); + break; + case 0x1F4B: + bufpush(0x1F43); + break; + case 0x1F4C: + bufpush(0x1F44); + break; + case 0x1F4D: + bufpush(0x1F45); + break; + case 0x1F50: + bufpush(0x03C5); + bufpush(0x0313); + break; + case 0x1F52: + bufpush(0x03C5); + bufpush(0x0313); + bufpush(0x0300); + break; + case 0x1F54: + bufpush(0x03C5); + bufpush(0x0313); + bufpush(0x0301); + break; + case 0x1F56: + bufpush(0x03C5); + bufpush(0x0313); + bufpush(0x0342); + break; + case 0x1F59: + bufpush(0x1F51); + break; + case 0x1F5B: + bufpush(0x1F53); + break; + case 0x1F5D: + bufpush(0x1F55); + break; + case 0x1F5F: + bufpush(0x1F57); + break; + case 0x1F68: + bufpush(0x1F60); + break; + case 0x1F69: + bufpush(0x1F61); + break; + case 0x1F6A: + bufpush(0x1F62); + break; + case 0x1F6B: + bufpush(0x1F63); + break; + case 0x1F6C: + bufpush(0x1F64); + break; + case 0x1F6D: + bufpush(0x1F65); + break; + case 0x1F6E: + bufpush(0x1F66); + break; + case 0x1F6F: + bufpush(0x1F67); + break; + case 0x1F80: + bufpush(0x1F00); + bufpush(0x03B9); + break; + case 0x1F81: + bufpush(0x1F01); + bufpush(0x03B9); + break; + case 0x1F82: + bufpush(0x1F02); + bufpush(0x03B9); + break; + case 0x1F83: + bufpush(0x1F03); + bufpush(0x03B9); + break; + case 0x1F84: + bufpush(0x1F04); + bufpush(0x03B9); + break; + case 0x1F85: + bufpush(0x1F05); + bufpush(0x03B9); + break; + case 0x1F86: + bufpush(0x1F06); + bufpush(0x03B9); + break; + case 0x1F87: + bufpush(0x1F07); + bufpush(0x03B9); + break; + case 0x1F88: + bufpush(0x1F00); + bufpush(0x03B9); + break; + case 0x: + break; + case 0x1F89: + bufpush(0x1F01); + bufpush(0x03B9); + break; + case 0x: + break; + case 0x1F8A: + bufpush(0x1F02); + bufpush(0x03B9); + break; + case 0x: + break; + case 0x1F8B: + bufpush(0x1F03); + bufpush(0x03B9); + break; + case 0x: + break; + case 0x1F8C: + bufpush(0x1F04); + bufpush(0x03B9); + break; + case 0x: + break; + case 0x1F8D: + bufpush(0x1F05); + bufpush(0x03B9); + break; + case 0x: + break; + case 0x1F8E: + bufpush(0x1F06); + bufpush(0x03B9); + break; + case 0x: + break; + case 0x1F8F: + bufpush(0x1F07); + bufpush(0x03B9); + break; + case 0x: + break; + case 0x1F90: + bufpush(0x1F20); + bufpush(0x03B9); + break; + case 0x1F91: + bufpush(0x1F21); + bufpush(0x03B9); + break; + case 0x1F92: + bufpush(0x1F22); + bufpush(0x03B9); + break; + case 0x1F93: + bufpush(0x1F23); + bufpush(0x03B9); + break; + case 0x1F94: + bufpush(0x1F24); + bufpush(0x03B9); + break; + case 0x1F95: + bufpush(0x1F25); + bufpush(0x03B9); + break; + case 0x1F96: + bufpush(0x1F26); + bufpush(0x03B9); + break; + case 0x1F97: + bufpush(0x1F27); + bufpush(0x03B9); + break; + case 0x1F98: + bufpush(0x1F20); + bufpush(0x03B9); + break; + case 0x: + break; + case 0x1F99: + bufpush(0x1F21); + bufpush(0x03B9); + break; + case 0x: + break; + case 0x1F9A: + bufpush(0x1F22); + bufpush(0x03B9); + break; + case 0x: + break; + case 0x1F9B: + bufpush(0x1F23); + bufpush(0x03B9); + break; + case 0x: + break; + case 0x1F9C: + bufpush(0x1F24); + bufpush(0x03B9); + break; + case 0x: + break; + case 0x1F9D: + bufpush(0x1F25); + bufpush(0x03B9); + break; + case 0x: + break; + case 0x1F9E: + bufpush(0x1F26); + bufpush(0x03B9); + break; + case 0x: + break; + case 0x1F9F: + bufpush(0x1F27); + bufpush(0x03B9); + break; + case 0x: + break; + case 0x1FA0: + bufpush(0x1F60); + bufpush(0x03B9); + break; + case 0x1FA1: + bufpush(0x1F61); + bufpush(0x03B9); + break; + case 0x1FA2: + bufpush(0x1F62); + bufpush(0x03B9); + break; + case 0x1FA3: + bufpush(0x1F63); + bufpush(0x03B9); + break; + case 0x1FA4: + bufpush(0x1F64); + bufpush(0x03B9); + break; + case 0x1FA5: + bufpush(0x1F65); + bufpush(0x03B9); + break; + case 0x1FA6: + bufpush(0x1F66); + bufpush(0x03B9); + break; + case 0x1FA7: + bufpush(0x1F67); + bufpush(0x03B9); + break; + case 0x1FA8: + bufpush(0x1F60); + bufpush(0x03B9); + break; + case 0x: + break; + case 0x1FA9: + bufpush(0x1F61); + bufpush(0x03B9); + break; + case 0x: + break; + case 0x1FAA: + bufpush(0x1F62); + bufpush(0x03B9); + break; + case 0x: + break; + case 0x1FAB: + bufpush(0x1F63); + bufpush(0x03B9); + break; + case 0x: + break; + case 0x1FAC: + bufpush(0x1F64); + bufpush(0x03B9); + break; + case 0x: + break; + case 0x1FAD: + bufpush(0x1F65); + bufpush(0x03B9); + break; + case 0x: + break; + case 0x1FAE: + bufpush(0x1F66); + bufpush(0x03B9); + break; + case 0x: + break; + case 0x1FAF: + bufpush(0x1F67); + bufpush(0x03B9); + break; + case 0x: + break; + case 0x1FB2: + bufpush(0x1F70); + bufpush(0x03B9); + break; + case 0x1FB3: + bufpush(0x03B1); + bufpush(0x03B9); + break; + case 0x1FB4: + bufpush(0x03AC); + bufpush(0x03B9); + break; + case 0x1FB6: + bufpush(0x03B1); + bufpush(0x0342); + break; + case 0x1FB7: + bufpush(0x03B1); + bufpush(0x0342); + bufpush(0x03B9); + break; + case 0x1FB8: + bufpush(0x1FB0); + break; + case 0x1FB9: + bufpush(0x1FB1); + break; + case 0x1FBA: + bufpush(0x1F70); + break; + case 0x1FBB: + bufpush(0x1F71); + break; + case 0x1FBC: + bufpush(0x03B1); + bufpush(0x03B9); + break; + case 0x: + break; + case 0x1FBE: + bufpush(0x03B9); + break; + case 0x1FC2: + bufpush(0x1F74); + bufpush(0x03B9); + break; + case 0x1FC3: + bufpush(0x03B7); + bufpush(0x03B9); + break; + case 0x1FC4: + bufpush(0x03AE); + bufpush(0x03B9); + break; + case 0x1FC6: + bufpush(0x03B7); + bufpush(0x0342); + break; + case 0x1FC7: + bufpush(0x03B7); + bufpush(0x0342); + bufpush(0x03B9); + break; + case 0x1FC8: + bufpush(0x1F72); + break; + case 0x1FC9: + bufpush(0x1F73); + break; + case 0x1FCA: + bufpush(0x1F74); + break; + case 0x1FCB: + bufpush(0x1F75); + break; + case 0x1FCC: + bufpush(0x03B7); + bufpush(0x03B9); + break; + case 0x: + break; + case 0x1FD2: + bufpush(0x03B9); + bufpush(0x0308); + bufpush(0x0300); + break; + case 0x1FD3: + bufpush(0x03B9); + bufpush(0x0308); + bufpush(0x0301); + break; + case 0x1FD6: + bufpush(0x03B9); + bufpush(0x0342); + break; + case 0x1FD7: + bufpush(0x03B9); + bufpush(0x0308); + bufpush(0x0342); + break; + case 0x1FD8: + bufpush(0x1FD0); + break; + case 0x1FD9: + bufpush(0x1FD1); + break; + case 0x1FDA: + bufpush(0x1F76); + break; + case 0x1FDB: + bufpush(0x1F77); + break; + case 0x1FE2: + bufpush(0x03C5); + bufpush(0x0308); + bufpush(0x0300); + break; + case 0x1FE3: + bufpush(0x03C5); + bufpush(0x0308); + bufpush(0x0301); + break; + case 0x1FE4: + bufpush(0x03C1); + bufpush(0x0313); + break; + case 0x1FE6: + bufpush(0x03C5); + bufpush(0x0342); + break; + case 0x1FE7: + bufpush(0x03C5); + bufpush(0x0308); + bufpush(0x0342); + break; + case 0x1FE8: + bufpush(0x1FE0); + break; + case 0x1FE9: + bufpush(0x1FE1); + break; + case 0x1FEA: + bufpush(0x1F7A); + break; + case 0x1FEB: + bufpush(0x1F7B); + break; + case 0x1FEC: + bufpush(0x1FE5); + break; + case 0x1FF2: + bufpush(0x1F7C); + bufpush(0x03B9); + break; + case 0x1FF3: + bufpush(0x03C9); + bufpush(0x03B9); + break; + case 0x1FF4: + bufpush(0x03CE); + bufpush(0x03B9); + break; + case 0x1FF6: + bufpush(0x03C9); + bufpush(0x0342); + break; + case 0x1FF7: + bufpush(0x03C9); + bufpush(0x0342); + bufpush(0x03B9); + break; + case 0x1FF8: + bufpush(0x1F78); + break; + case 0x1FF9: + bufpush(0x1F79); + break; + case 0x1FFA: + bufpush(0x1F7C); + break; + case 0x1FFB: + bufpush(0x1F7D); + break; + case 0x1FFC: + bufpush(0x03C9); + bufpush(0x03B9); + break; + case 0x: + break; + case 0x2126: + bufpush(0x03C9); + break; + case 0x212A: + bufpush(0x006B); + break; + case 0x212B: + bufpush(0x00E5); + break; + case 0x2160: + bufpush(0x2170); + break; + case 0x2161: + bufpush(0x2171); + break; + case 0x2162: + bufpush(0x2172); + break; + case 0x2163: + bufpush(0x2173); + break; + case 0x2164: + bufpush(0x2174); + break; + case 0x2165: + bufpush(0x2175); + break; + case 0x2166: + bufpush(0x2176); + break; + case 0x2167: + bufpush(0x2177); + break; + case 0x2168: + bufpush(0x2178); + break; + case 0x2169: + bufpush(0x2179); + break; + case 0x216A: + bufpush(0x217A); + break; + case 0x216B: + bufpush(0x217B); + break; + case 0x216C: + bufpush(0x217C); + break; + case 0x216D: + bufpush(0x217D); + break; + case 0x216E: + bufpush(0x217E); + break; + case 0x216F: + bufpush(0x217F); + break; + case 0x24B6: + bufpush(0x24D0); + break; + case 0x24B7: + bufpush(0x24D1); + break; + case 0x24B8: + bufpush(0x24D2); + break; + case 0x24B9: + bufpush(0x24D3); + break; + case 0x24BA: + bufpush(0x24D4); + break; + case 0x24BB: + bufpush(0x24D5); + break; + case 0x24BC: + bufpush(0x24D6); + break; + case 0x24BD: + bufpush(0x24D7); + break; + case 0x24BE: + bufpush(0x24D8); + break; + case 0x24BF: + bufpush(0x24D9); + break; + case 0x24C0: + bufpush(0x24DA); + break; + case 0x24C1: + bufpush(0x24DB); + break; + case 0x24C2: + bufpush(0x24DC); + break; + case 0x24C3: + bufpush(0x24DD); + break; + case 0x24C4: + bufpush(0x24DE); + break; + case 0x24C5: + bufpush(0x24DF); + break; + case 0x24C6: + bufpush(0x24E0); + break; + case 0x24C7: + bufpush(0x24E1); + break; + case 0x24C8: + bufpush(0x24E2); + break; + case 0x24C9: + bufpush(0x24E3); + break; + case 0x24CA: + bufpush(0x24E4); + break; + case 0x24CB: + bufpush(0x24E5); + break; + case 0x24CC: + bufpush(0x24E6); + break; + case 0x24CD: + bufpush(0x24E7); + break; + case 0x24CE: + bufpush(0x24E8); + break; + case 0x24CF: + bufpush(0x24E9); + break; + case 0xFB00: + bufpush(0x0066); + bufpush(0x0066); + break; + case 0xFB01: + bufpush(0x0066); + bufpush(0x0069); + break; + case 0xFB02: + bufpush(0x0066); + bufpush(0x006C); + break; + case 0xFB03: + bufpush(0x0066); + bufpush(0x0066); + bufpush(0x0069); + break; + case 0xFB04: + bufpush(0x0066); + bufpush(0x0066); + bufpush(0x006C); + break; + case 0xFB05: + bufpush(0x0073); + bufpush(0x0074); + break; + case 0xFB06: + bufpush(0x0073); + bufpush(0x0074); + break; + case 0xFB13: + bufpush(0x0574); + bufpush(0x0576); + break; + case 0xFB14: + bufpush(0x0574); + bufpush(0x0565); + break; + case 0xFB15: + bufpush(0x0574); + bufpush(0x056B); + break; + case 0xFB16: + bufpush(0x057E); + bufpush(0x0576); + break; + case 0xFB17: + bufpush(0x0574); + bufpush(0x056D); + break; + case 0xFF21: + bufpush(0xFF41); + break; + case 0xFF22: + bufpush(0xFF42); + break; + case 0xFF23: + bufpush(0xFF43); + break; + case 0xFF24: + bufpush(0xFF44); + break; + case 0xFF25: + bufpush(0xFF45); + break; + case 0xFF26: + bufpush(0xFF46); + break; + case 0xFF27: + bufpush(0xFF47); + break; + case 0xFF28: + bufpush(0xFF48); + break; + case 0xFF29: + bufpush(0xFF49); + break; + case 0xFF2A: + bufpush(0xFF4A); + break; + case 0xFF2B: + bufpush(0xFF4B); + break; + case 0xFF2C: + bufpush(0xFF4C); + break; + case 0xFF2D: + bufpush(0xFF4D); + break; + case 0xFF2E: + bufpush(0xFF4E); + break; + case 0xFF2F: + bufpush(0xFF4F); + break; + case 0xFF30: + bufpush(0xFF50); + break; + case 0xFF31: + bufpush(0xFF51); + break; + case 0xFF32: + bufpush(0xFF52); + break; + case 0xFF33: + bufpush(0xFF53); + break; + case 0xFF34: + bufpush(0xFF54); + break; + case 0xFF35: + bufpush(0xFF55); + break; + case 0xFF36: + bufpush(0xFF56); + break; + case 0xFF37: + bufpush(0xFF57); + break; + case 0xFF38: + bufpush(0xFF58); + break; + case 0xFF39: + bufpush(0xFF59); + break; + case 0xFF3A: + bufpush(0xFF5A); + break; + case 0x10400: + bufpush(0x10428); + break; + case 0x10401: + bufpush(0x10429); + break; + case 0x10402: + bufpush(0x1042A); + break; + case 0x10403: + bufpush(0x1042B); + break; + case 0x10404: + bufpush(0x1042C); + break; + case 0x10405: + bufpush(0x1042D); + break; + case 0x10406: + bufpush(0x1042E); + break; + case 0x10407: + bufpush(0x1042F); + break; + case 0x10408: + bufpush(0x10430); + break; + case 0x10409: + bufpush(0x10431); + break; + case 0x1040A: + bufpush(0x10432); + break; + case 0x1040B: + bufpush(0x10433); + break; + case 0x1040C: + bufpush(0x10434); + break; + case 0x1040D: + bufpush(0x10435); + break; + case 0x1040E: + bufpush(0x10436); + break; + case 0x1040F: + bufpush(0x10437); + break; + case 0x10410: + bufpush(0x10438); + break; + case 0x10411: + bufpush(0x10439); + break; + case 0x10412: + bufpush(0x1043A); + break; + case 0x10413: + bufpush(0x1043B); + break; + case 0x10414: + bufpush(0x1043C); + break; + case 0x10415: + bufpush(0x1043D); + break; + case 0x10416: + bufpush(0x1043E); + break; + case 0x10417: + bufpush(0x1043F); + break; + case 0x10418: + bufpush(0x10440); + break; + case 0x10419: + bufpush(0x10441); + break; + case 0x1041A: + bufpush(0x10442); + break; + case 0x1041B: + bufpush(0x10443); + break; + case 0x1041C: + bufpush(0x10444); + break; + case 0x1041D: + bufpush(0x10445); + break; + case 0x1041E: + bufpush(0x10446); + break; + case 0x1041F: + bufpush(0x10447); + break; + case 0x10420: + bufpush(0x10448); + break; + case 0x10421: + bufpush(0x10449); + break; + case 0x10422: + bufpush(0x1044A); + break; + case 0x10423: + bufpush(0x1044B); + break; + case 0x10424: + bufpush(0x1044C); + break; + case 0x10425: + bufpush(0x1044D); + break; + } diff --git a/src/debug.h b/src/debug.h new file mode 100644 index 0000000..af1d017 --- /dev/null +++ b/src/debug.h @@ -0,0 +1,36 @@ +#ifndef __debug_h__ +#define __debug_h__ +#include +#include +#include + +#ifdef NDEBUG +#define debug(M, ...) +#else +#define debug(M, ...) \ + fprintf(stderr, "DEBUG %s:%d: " M "\n", __FILE__, __LINE__, ##__VA_ARGS__) +#endif + +#define clean_errno() (errno == 0 ? "None" : strerror(errno)) + +#define log_err(M, ...) \ + fprintf(stderr, "[ERROR] (%s:%d: errno: %s) " M "\n", __FILE__, __LINE__, \ + clean_errno(), ##__VA_ARGS__) + +#define log_warn(M, ...) \ + fprintf(stderr, "[WARN] (%s:%d: errno: %s) " M "\n", __FILE__, __LINE__, \ + clean_errno(), ##__VA_ARGS__) + +#define log_info(M, ...) fprintf(stderr, "[INFO] (%s:%d) " M "\n", __FILE__, \ + __LINE__, ##__VA_ARGS__) + +#define check(A, M, ...) \ + if(!(A)) { log_err(M, ##__VA_ARGS__); errno=0; goto error; } + +#define sentinel(M, ...) \ + { log_err(M, ##__VA_ARGS__); errno=0; goto error; } + +#define check_debug(A, M, ...) \ + if(!(A)) { debug(M, ##__VA_ARGS__); errno=0; goto error; } + +#endif diff --git a/src/detab.c b/src/detab.c new file mode 100644 index 0000000..e03fcf7 --- /dev/null +++ b/src/detab.c @@ -0,0 +1,48 @@ +#include "bstrlib.h" + +// UTF-8 aware detab: assumes s has no newlines, or only a final newline. +// Return 0 on success, BSTR_ERR if invalid UTF-8. +extern int bdetab(bstring s, int utf8) +{ + unsigned char c; + int pos = 0; // a count of characters + int byte = 0; // a count of bytes + int high_chars_to_skip = 0; + int numspaces = 0; + while ((c = bchar(s, byte))) { + if (utf8 && high_chars_to_skip > 0) { + if (c >= 0x80) { + high_chars_to_skip--; + byte++; + } else { + return BSTR_ERR; // invalid utf-8 + } + } else if (c == '\t') { + bdelete(s, byte, 1); // delete tab character + numspaces = 4 - (pos % 4); + binsertch(s, byte, numspaces, ' '); + byte += numspaces; + pos += numspaces; + } else if (c <= 0x80 || !utf8) { + byte++; + pos++; + } else { // multibyte utf8 sequences + if (c >> 1 == 0176) { + high_chars_to_skip = 5; + } else if (c >> 2 == 076) { + high_chars_to_skip = 4; + } else if (c >> 3 == 036) { + high_chars_to_skip = 3; + } else if (c >> 4 == 016) { + high_chars_to_skip = 2; + } else if (c >> 5 == 06) { + high_chars_to_skip = 1; + } else { + return BSTR_ERR; // invalid utf-8 + } + pos++; + byte++; + } + } + return 0; +} diff --git a/src/getopt.c b/src/getopt.c new file mode 100644 index 0000000..321dd9f --- /dev/null +++ b/src/getopt.c @@ -0,0 +1,199 @@ +/* $Id: getopt.c 4022 2008-03-31 06:11:07Z rra $ + * + * Replacement implementation of getopt. + * + * This is a replacement implementation for getopt based on the my_getopt + * distribution by Benjamin Sittler. Only the getopt interface is included, + * since remctl doesn't use GNU long options, and the code has been rearranged + * and reworked somewhat to fit with the remctl coding style. + * + * Copyright 1997, 2000, 2001, 2002 Benjamin Sittler + * Copyright 2008 Russ Allbery + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +/* + * If we're running the test suite, rename getopt and the global variables to + * avoid conflicts with the system version. + */ +#if TESTING +# define getopt test_getopt +int test_getopt(int, char **, const char *); +# define optind test_optind +# define opterr test_opterr +# define optopt test_optopt +# define optarg test_optarg +#endif + +/* Initialize global interface variables. */ +int optind = 1; +int opterr = 1; +int optopt = 0; +char *optarg = NULL; + +/* + * This is the plain old UNIX getopt, with GNU-style extensions. If you're + * porting some piece of UNIX software, this is all you need. It supports + * GNU-style permutation and optional arguments, but does not support the GNU + * -W extension. + * + * This function is not re-entrant or thread-safe, has static variables, and + * generally isn't a great interface, but normally you only call it once. + */ +int +getopt(int argc, char *argv[], const char *optstring) +{ + const char *p; + size_t offset = 0; + char mode = '\0'; + int colon_mode = 0; + int option = -1; + + /* Holds the current position in the parameter being parsed. */ + static int charind = 0; + + /* + * By default, getopt permutes argv as it scans and leaves all non-options + * at the end. This can be changed with the first character of optstring + * or the environment variable POSIXLY_CORRECT. With a first character of + * '+' or when POSIXLY_CORRECT is set, option processing stops at the + * first non-option. If the first character is '-', each non-option argv + * element is handled as if it were the argument of an option with + * character code 1. mode holds this character. + * + * After the optional leading '+' and '-', optstring may contain ':'. If + * present, missing arguments return ':' instead of '?'. colon_mode holds + * this setting. + */ + if (getenv("POSIXLY_CORRECT") != NULL) { + mode = '+'; + colon_mode = '+'; + } else { + if (optstring[offset] == '+' || optstring[offset] == '-') { + mode = optstring[offset]; + offset++; + } + if (optstring[offset] == ':') { + colon_mode = 1; + offset++; + } + } + + /* + * charind holds where we left off. If it's set, we were in the middle + * of an argv element; if not, we pick up with the next element of + * optind. + */ + optarg = NULL; + if (charind == 0) { + if (optind >= argc) + option = -1; + else if (strcmp(argv[optind], "--") == 0) { + optind++; + option = -1; + } else if (argv[optind][0] != '-' || argv[optind][1] == '\0') { + char *tmp; + int i, j, k, end; + + if (mode == '+') + option = -1; + else if (mode == '-') { + optarg = argv[optind]; + optind++; + option = 1; + } else { + for (i = optind + 1, j = optind; i < argc; i++) + if ((argv[i][0] == '-') && (argv[i][1] != '\0')) { + optind = i; + option = getopt(argc, argv, optstring); + while (i > j) { + --i; + tmp = argv[i]; + end = (charind == 0) ? optind - 1 : optind; + for (k = i; k + 1 <= end; k++) { + argv[k] = argv[k + 1]; + } + argv[end] = tmp; + --optind; + } + break; + } + if (i == argc) + option = -1; + } + return option; + } else { + charind = 1; + } + } + if (charind != 0) { + optopt = argv[optind][charind]; + for (p = optstring + offset; *p != '\0'; p++) + if (optopt == *p) { + p++; + if (*p == ':') { + if (argv[optind][charind + 1] != '\0') { + optarg = &argv[optind][charind + 1]; + optind++; + charind = 0; + } else { + p++; + if (*p != ':') { + charind = 0; + optind++; + if (optind >= argc) { + if (opterr) + fprintf(stderr, "%s: option requires" + " an argument -- %c\n", argv[0], + optopt); + option = colon_mode ? ':' : '?'; + goto done; + } else { + optarg = argv[optind]; + optind++; + } + } + } + } + option = optopt; + } + if (option == -1) { + if (opterr) + fprintf(stderr, "%s: illegal option -- %c\n", argv[0], optopt); + option = '?'; + } + } + +done: + if (charind != 0) { + charind++; + if (argv[optind][charind] == '\0') { + optind++; + charind = 0; + } + } + if (optind > argc) + optind = argc; + return option; +} diff --git a/src/html.c b/src/html.c new file mode 100644 index 0000000..56d5dbb --- /dev/null +++ b/src/html.c @@ -0,0 +1,276 @@ +#include +#include +#include +#include "bstrlib.h" +#include "stmd.h" +#include "debug.h" +#include "scanners.h" + +// Functions to convert block and inline lists to HTML strings. + +// Escape special characters in HTML. More efficient than +// three calls to bfindreplace. If preserve_entities is set, +// existing entities are left alone. +static bstring escape_html(bstring inp, bool preserve_entities) +{ + int pos = 0; + int match; + char c; + bstring escapable = blk2bstr("&<>\"", 4); + bstring ent; + bstring s = bstrcpy(inp); + while ((pos = binchr(s, pos, escapable)) != BSTR_ERR) { + c = bchar(s,pos); + switch (c) { + case '<': + bdelete(s, pos, 1); + ent = blk2bstr("<", 4); + binsert(s, pos, ent, ' '); + bdestroy(ent); + pos += 4; + break; + case '>': + bdelete(s, pos, 1); + ent = blk2bstr(">", 4); + binsert(s, pos, ent, ' '); + bdestroy(ent); + pos += 4; + break; + case '&': + if (preserve_entities && (match = scan_entity(s, pos))) { + pos += match; + } else { + bdelete(s, pos, 1); + ent = blk2bstr("&", 5); + binsert(s, pos, ent, ' '); + bdestroy(ent); + pos += 5; + } + break; + case '"': + bdelete(s, pos, 1); + ent = blk2bstr(""", 6); + binsert(s, pos, ent, ' '); + bdestroy(ent); + pos += 6; + break; + default: + bdelete(s, pos, 1); + log_err("unexpected character %02x", c); + } + } + bdestroy(escapable); + return s; +} + +static inline void cr(bstring buffer) +{ + int c = bchar(buffer, blength(buffer) - 1); + if (c != '\n' && c) { + bconchar(buffer, '\n'); + } +} + +// Convert a block list to HTML. Returns 0 on success, and sets result. +extern int blocks_to_html(block* b, bstring* result, bool tight) +{ + bstring contents = NULL; + bstring escaped, escaped2; + struct bstrList * info_words; + struct ListData * data; + bstring mbstart; + bstring html = blk2bstr("", 0); + + while(b != NULL) { + switch(b->tag) { + case document: + check(blocks_to_html(b->children, &contents, false) == 0, + "error converting blocks to html"); + bformata(html, "%s", contents->data); + bdestroy(contents); + break; + case paragraph: + check(inlines_to_html(b->inline_content, &contents) == 0, + "error converting inlines to html"); + if (tight) { + bformata(html, "%s", contents->data); + } else { + cr(html); + bformata(html, "

%s

", contents->data); + cr(html); + } + bdestroy(contents); + break; + case block_quote: + check(blocks_to_html(b->children, &contents, false) == 0, + "error converting blocks to html"); + cr(html); + bformata(html, "
\n%s
", contents->data); + cr(html); + bdestroy(contents); + break; + case list_item: + check(blocks_to_html(b->children, &contents, tight) == 0, + "error converting blocks to html"); + brtrimws(contents); + cr(html); + bformata(html, "
  • %s
  • ", contents->data); + cr(html); + bdestroy(contents); + break; + case list: + // make sure a list starts at the beginning of the line: + cr(html); + data = &(b->attributes.list_data); + check(blocks_to_html(b->children, &contents, data->tight) == 0, + "error converting blocks to html"); + mbstart = bformat(" start=\"%d\"", data->start); + bformata(html, "<%s%s>\n%s", + data->list_type == bullet ? "ul" : "ol", + data->start == 1 ? "" : (char*) mbstart->data, + contents->data, + data->list_type == bullet ? "ul" : "ol"); + cr(html); + bdestroy(contents); + bdestroy(mbstart); + break; + case atx_header: + case setext_header: + check(inlines_to_html(b->inline_content, &contents) == 0, + "error converting inlines to html"); + cr(html); + bformata(html, "%s", + b->attributes.header_level, + contents->data, + b->attributes.header_level); + cr(html); + bdestroy(contents); + break; + case indented_code: + escaped = escape_html(b->string_content, false); + cr(html); + bformata(html, "
    %s
    ", escaped->data); + cr(html); + bdestroy(escaped); + break; + case fenced_code: + escaped = escape_html(b->string_content, false); + cr(html); + bformata(html, "attributes.fenced_code_data.info) > 0) { + escaped2 = escape_html(b->attributes.fenced_code_data.info, true); + info_words = bsplit(escaped2, ' '); + bformata(html, " class=\"%s\"", info_words->entry[0]->data); + bdestroy(escaped2); + bstrListDestroy(info_words); + } + bformata(html, ">%s", escaped->data); + cr(html); + bdestroy(escaped); + break; + case html_block: + bformata(html, "%s", b->string_content->data); + break; + case hrule: + bformata(html, "
    "); + cr(html); + break; + case reference_def: + break; + default: + log_warn("block type %d not implemented\n", b->tag); + break; + } + b = b->next; + } + *result = html; + return 0; + error: + return -1; +} + +// Convert an inline list to HTML. Returns 0 on success, and sets result. +extern int inlines_to_html(inl* ils, bstring* result) +{ + bstring contents = NULL; + bstring html = blk2bstr("", 0); + bstring mbtitle, escaped, escaped2; + + while(ils != NULL) { + switch(ils->tag) { + case str: + escaped = escape_html(ils->content.literal, false); + bformata(html, "%s", escaped->data); + bdestroy(escaped); + break; + case linebreak: + bformata(html, "
    \n"); + break; + case softbreak: + bformata(html, "\n"); + break; + case code: + escaped = escape_html(ils->content.literal, false); + bformata(html, "%s", escaped->data); + bdestroy(escaped); + break; + case raw_html: + case entity: + bformata(html, "%s", ils->content.literal->data); + break; + case link: + check(inlines_to_html(ils->content.inlines, &contents) == 0, + "error converting inlines to html"); + if (blength(ils->content.linkable.title) > 0) { + escaped = escape_html(ils->content.linkable.title, true); + mbtitle = bformat(" title=\"%s\"", escaped->data); + bdestroy(escaped); + } else { + mbtitle = blk2bstr("",0); + } + escaped = escape_html(ils->content.linkable.url, true); + bformata(html, "%s", + escaped->data, + mbtitle->data, + contents->data); + bdestroy(escaped); + bdestroy(mbtitle); + bdestroy(contents); + break; + case image: + check(inlines_to_html(ils->content.inlines, &contents) == 0, + "error converting inlines to html"); + escaped = escape_html(ils->content.linkable.url, true); + escaped2 = escape_html(contents, false); + bdestroy(contents); + bformata(html, "\"%s\"",data, escaped2->data); + bdestroy(escaped); + bdestroy(escaped2); + if (blength(ils->content.linkable.title) > 0) { + escaped = escape_html(ils->content.linkable.title, true); + bformata(html, " title=\"%s\"", escaped->data); + bdestroy(escaped); + } + bformata(html, " />"); + break; + case strong: + check(inlines_to_html(ils->content.inlines, &contents) == 0, + "error converting inlines to html"); + bformata(html, "%s", contents->data); + bdestroy(contents); + break; + case emph: + check(inlines_to_html(ils->content.inlines, &contents) == 0, + "error converting inlines to html"); + bformata(html, "%s", contents->data); + bdestroy(contents); + break; + } + ils = ils->next; + } + *result = html; + return 0; + error: + return -1; +} diff --git a/src/inlines.c b/src/inlines.c new file mode 100644 index 0000000..9e35178 --- /dev/null +++ b/src/inlines.c @@ -0,0 +1,998 @@ +#include +#include +#include +#include +#include "bstrlib.h" +#include "stmd.h" +#include "uthash.h" +#include "debug.h" +#include "scanners.h" +#include "utf8.h" + +extern void free_reference(reference *ref) { + bdestroy(ref->label); + bdestroy(ref->url); + bdestroy(ref->title); + free(ref); +} + +extern void free_reference_map(reference **refmap) { + /* free the hash table contents */ + reference *s; + reference *tmp; + if (refmap != NULL) { + HASH_ITER(hh, *refmap, s, tmp) { + HASH_DEL(*refmap, s); + free_reference(s); + } + free(refmap); + } +} + +// normalize reference: collapse internal whitespace to single space, +// remove leading/trailing whitespace, case fold +static bstring normalize_reference(bstring s) +{ + bstring normalized = case_fold(s); + int pos = 0; + int startpos; + char c; + while ((c = bchar(normalized, pos))) { + if (isspace(c)) { + startpos = pos; + // skip til next non-space + pos++; + while (isspace(bchar(s, pos))) { + pos++; + } + bdelete(normalized, startpos, pos - startpos); + binsertch(normalized, startpos, 1, ' '); + pos = startpos + 1; + } + pos++; + } + btrimws(normalized); + return normalized; +} + +// Returns reference if refmap contains a reference with matching +// label, otherwise NULL. +extern reference* lookup_reference(reference** refmap, bstring lab) +{ + reference * ref = NULL; + bstring label = normalize_reference(lab); + if (refmap != NULL) { + HASH_FIND_STR(*refmap, (char*) label->data, ref); + } + bdestroy(label); + return ref; +} + +extern reference* make_reference(bstring label, bstring url, bstring title) +{ + reference * ref; + ref = malloc(sizeof(reference)); + ref->label = normalize_reference(label); + ref->url = bstrcpy(url); + ref->title = bstrcpy(title); + return ref; +} + +extern void add_reference(reference** refmap, reference* ref) +{ + reference * t = NULL; + HASH_FIND(hh, *refmap, (char*) ref->label->data, + (unsigned) blength(ref->label), t); + if (t == NULL) { + HASH_ADD_KEYPTR(hh, *refmap, (char*) ref->label->data, + (unsigned) blength(ref->label), ref); + } else { + free_reference(ref); // we free this now since it won't be in the refmap + } +} + +// Create an inline with a linkable string value. +inline static inl* make_linkable(int t, inl* label, bstring url, bstring title) +{ + inl* e = (inl*) malloc(sizeof(inl)); + e->tag = t; + e->content.linkable.label = label; + e->content.linkable.url = url; + e->content.linkable.title = title; + e->next = NULL; + return e; +} + +inline static inl* make_inlines(int t, inl* contents) +{ + inl* e = (inl*) malloc(sizeof(inl)); + e->tag = t; + e->content.inlines = contents; + e->next = NULL; + return e; +} + +// Create an inline with a literal string value. +inline static inl* make_literal(int t, bstring s) +{ + inl* e = (inl*) malloc(sizeof(inl)); + e->tag = t; + e->content.literal = s; + e->next = NULL; + return e; +} + +// Create an inline with no value. +inline static inl* make_simple(int t) +{ + inl* e = (inl*) malloc(sizeof(inl)); + e->tag = t; + e->next = NULL; + return e; +} + +// Macros for creating various kinds of inlines. +#define make_str(s) make_literal(str, s) +#define make_code(s) make_literal(code, s) +#define make_raw_html(s) make_literal(raw_html, s) +#define make_entity(s) make_literal(entity, s) +#define make_linebreak() make_simple(linebreak) +#define make_softbreak() make_simple(softbreak) +#define make_link(label, url, title) make_linkable(link, label, url, title) +#define make_image(alt, url, title) make_linkable(image, alt, url, title) +#define make_emph(contents) make_inlines(emph, contents) +#define make_strong(contents) make_inlines(strong, contents) + +// Free an inline list. +extern void free_inlines(inl* e) +{ + inl * next; + while (e != NULL) { + switch (e->tag){ + case str: + case raw_html: + case code: + case entity: + bdestroy(e->content.literal); + break; + case linebreak: + case softbreak: + break; + case link: + case image: + bdestroy(e->content.linkable.url); + bdestroy(e->content.linkable.title); + free_inlines(e->content.linkable.label); + break; + case emph: + case strong: + free_inlines(e->content.inlines); + break; + default: + break; + } + next = e->next; + free(e); + e = next; + } +} + +// Append inline list b to the end of inline list a. +// Return pointer to head of new list. +inline static inl* append_inlines(inl* a, inl* b) +{ + if (a == NULL) { // NULL acts like an empty list + return b; + } + inl* cur = a; + while (cur->next) { + cur = cur->next; + } + cur->next = b; + return a; +} + +// Make a 'subject' from an input string. +static subject* make_subject(bstring s, reference** refmap) +{ + subject* e = (subject*) malloc(sizeof(subject)); + // remove final whitespace + brtrimws(s); + e->buffer = s; + e->pos = 0; + e->label_nestlevel = 0; + e->reference_map = refmap; + return e; +} + +inline static int isbacktick(int c) +{ + return (c == '`'); +} + +// Return the next character in the subject, without advancing. +// Return 0 if at the end of the subject. +#define peek_char(subj) bchar(subj->buffer, subj->pos) + +// Return true if there are more characters in the subject. +inline static int is_eof(subject* subj) +{ + return (subj->pos >= blength(subj->buffer)); +} + +// Advance the subject. Doesn't check for eof. +#define advance(subj) subj->pos += 1 + +// Take characters while a predicate holds, and return a string. +inline static bstring take_while(subject* subj, int (*f)(int)) +{ + unsigned char c; + int startpos = subj->pos; + int len = 0; + while ((c = peek_char(subj)) && (*f)(c)) { + advance(subj); + len++; + } + return bmidstr(subj->buffer, startpos, len); +} + +// Take one character and return a string, or NULL if eof. +inline static bstring take_one(subject* subj) +{ + int startpos = subj->pos; + if (is_eof(subj)){ + return NULL; + } else { + advance(subj); + return bmidstr(subj->buffer, startpos, 1); + } +} + +// Try to process a backtick code span that began with a +// span of ticks of length openticklength length (already +// parsed). Return 0 if you don't find matching closing +// backticks, otherwise return the position in the subject +// after the closing backticks. +static int scan_to_closing_backticks(subject* subj, int openticklength) +{ + // read non backticks + char c; + while ((c = peek_char(subj)) && c != '`') { + advance(subj); + } + if (is_eof(subj)) { + return 0; // did not find closing ticks, return 0 + } + int numticks = 0; + while (peek_char(subj) == '`') { + advance(subj); + numticks++; + } + if (numticks != openticklength){ + return(scan_to_closing_backticks(subj, openticklength)); + } + return (subj->pos); +} + +// Destructively modify bstring, collapsing consecutive +// space and newline characters into a single space. +static int normalize_whitespace(bstring s) +{ + bool last_char_was_space = false; + int pos = 0; + char c; + while ((c = bchar(s, pos))) { + switch (c) { + case ' ': + if (last_char_was_space) { + bdelete(s, pos, 1); + } else { + pos++; + } + last_char_was_space = true; + break; + case '\n': + if (last_char_was_space) { + bdelete(s, pos, 1); + } else { + bdelete(s, pos, 1); + binsertch(s, pos, 1, ' '); + pos++; + } + last_char_was_space = true; + break; + default: + pos++; + last_char_was_space = false; + } + } + return 0; +} + +// Parse backtick code section or raw backticks, return an inline. +// Assumes that the subject has a backtick at the current position. +static inl* handle_backticks(subject *subj) +{ + bstring openticks = take_while(subj, isbacktick); + bstring result; + int ticklength = blength(openticks); + int startpos = subj->pos; + int endpos = scan_to_closing_backticks(subj, ticklength); + if (endpos == 0) { // not found + subj->pos = startpos; // rewind + return make_str(openticks); + } else { + bdestroy(openticks); + result = bmidstr(subj->buffer, startpos, endpos - startpos - ticklength); + btrimws(result); + normalize_whitespace(result); + return make_code(result); + } +} + +// Scan ***, **, or * and return number scanned, or 0. +// Don't advance position. +static int scan_delims(subject* subj, char c, bool * can_open, bool * can_close) +{ + int numdelims = 0; + char char_before, char_after; + int startpos = subj->pos; + + char_before = subj->pos == 0 ? '\n' : bchar(subj->buffer, subj->pos - 1); + while (peek_char(subj) == c) { + numdelims++; + advance(subj); + } + char_after = peek_char(subj); + *can_open = numdelims > 0 && numdelims <= 3 && !isspace(char_after); + *can_close = numdelims > 0 && numdelims <= 3 && !isspace(char_before); + if (c == '_') { + *can_open = *can_open && !isalnum(char_before); + *can_close = *can_close && !isalnum(char_after); + } + subj->pos = startpos; + return numdelims; +} + +// Parse strong/emph or a fallback. +// Assumes the subject has '_' or '*' at the current position. +static inl* handle_strong_emph(subject* subj, char c) +{ + bool can_open, can_close; + inl * result = NULL; + inl ** last = malloc(sizeof(inl *)); + inl * new; + inl * il; + inl * first_head = NULL; + inl * first_close = NULL; + int first_close_delims = 0; + int numdelims; + + *last = NULL; + + numdelims = scan_delims(subj, c, &can_open, &can_close); + subj->pos += numdelims; + + new = make_str(bmidstr(subj->buffer, subj->pos - numdelims, numdelims)); + *last = new; + first_head = new; + result = new; + + if (!can_open || numdelims == 0) { + goto done; + } + + switch (numdelims) { + case 1: + while (true) { + numdelims = scan_delims(subj, c, &can_open, &can_close); + if (numdelims >= 1 && can_close) { + subj->pos += 1; + first_head->tag = emph; + bdestroy(first_head->content.literal); + first_head->content.inlines = first_head->next; + first_head->next = NULL; + goto done; + } else { + if (!parse_inline(subj, last)) { + goto done; + } + } + } + break; + case 2: + while (true) { + numdelims = scan_delims(subj, c, &can_open, &can_close); + if (numdelims >= 2 && can_close) { + subj->pos += 2; + first_head->tag = strong; + bdestroy(first_head->content.literal); + first_head->content.inlines = first_head->next; + first_head->next = NULL; + goto done; + } else { + if (!parse_inline(subj, last)) { + goto done; + } + } + } + break; + case 3: + while (true) { + numdelims = scan_delims(subj, c, &can_open, &can_close); + if (can_close && numdelims >= 1 && numdelims <= 3 && + numdelims != first_close_delims) { + new = make_str(bmidstr(subj->buffer, subj->pos, numdelims)); + append_inlines(*last, new); + *last = new; + if (numdelims == 3) { + numdelims = 1; + } + subj->pos += numdelims; + if (first_close) { + first_head->tag = first_close_delims == 1 ? strong : emph; + bdestroy(first_head->content.literal); + first_head->content.inlines = + make_inlines(first_close_delims == 1 ? emph : strong, + first_head->next); + + il = first_head->next; + while (il->next && il->next != first_close) { + il = il->next; + } + il->next = NULL; + + first_head->content.inlines->next = first_close->next; + + il = first_head->content.inlines; + while (il->next && il->next != *last) { + il = il->next; + } + il->next = NULL; + free_inlines(*last); + + first_close->next = NULL; + free_inlines(first_close); + first_head->next = NULL; + goto done; + } else { + first_close = *last; + first_close_delims = numdelims; + } + } else { + if (!parse_inline(subj, last)) { + goto done; + } + } + } + break; + default: + goto done; + } + + done: + free(last); + return result; +} + +// Parse backslash-escape or just a backslash, returning an inline. +static inl* handle_backslash(subject *subj) +{ + advance(subj); + unsigned char nextchar = peek_char(subj); + if (ispunct(nextchar)) { // only ascii symbols and newline can be escaped + advance(subj); + return make_str(bformat("%c", nextchar)); + } else if (nextchar == '\n') { + advance(subj); + return make_linebreak(); + } else { + return make_str(bfromcstr("\\")); + } +} + +// Parse an entity or a regular "&" string. +// Assumes the subject has an '&' character at the current position. +static inl* handle_entity(subject* subj) +{ + int match; + inl * result; + match = scan_entity(subj->buffer, subj->pos); + if (match) { + result = make_entity(bmidstr(subj->buffer, subj->pos, match)); + subj->pos += match; + } else { + advance(subj); + result = make_str(bfromcstr("&")); + } + return result; +} + +// Like make_str, but parses entities. +// Returns an inline sequence consisting of str and entity elements. +static inl * make_str_with_entities(bstring s) +{ + inl * result = NULL; + inl * new; + int searchpos; + char c; + subject * subj = make_subject(s, NULL); + + while ((c = peek_char(subj))) { + switch (c) { + case '&': + new = handle_entity(subj); + break; + default: + searchpos = bstrchrp(subj->buffer, '&', subj->pos); + if (searchpos == BSTR_ERR) { + searchpos = blength(subj->buffer); + } + new = make_str(bmidstr(subj->buffer, subj->pos, searchpos - subj->pos)); + subj->pos = searchpos; + } + result = append_inlines(result, new); + } + free(subj); + return result; +} + +// Destructively unescape a string: remove backslashes before punctuation chars. +extern int unescape(bstring url) +{ + // remove backslashes before punctuation chars: + int searchpos = 0; + while ((searchpos = bstrchrp(url, '\\', searchpos)) != BSTR_ERR) { + if (ispunct(bchar(url, searchpos + 1))) { + bdelete(url, searchpos, 1); + } else { + searchpos++; + } + } + return 0; +} + +// Clean a URL: remove surrounding whitespace and surrounding <>, +// and remove \ that escape punctuation. +static int clean_url(bstring url) +{ + // remove surrounding <> if any: + int urllength = blength(url); + btrimws(url); + if (bchar(url, 0) == '<' && bchar(url, urllength - 1) == '>') { + bdelete(url, 0, 1); + bdelete(url, urllength - 2, 1); + } + unescape(url); + return 0; +} + +// Clean a title: remove surrounding quotes and remove \ that escape punctuation. +static int clean_title(bstring title) +{ + // remove surrounding quotes if any: + int titlelength = blength(title); + if ((bchar(title, 0) == '\'' && bchar(title, titlelength - 1) == '\'') || + (bchar(title, 0) == '(' && bchar(title, titlelength - 1) == ')') || + (bchar(title, 0) == '"' && bchar(title, titlelength - 1) == '"')) { + bdelete(title, 0, 1); + bdelete(title, titlelength - 2, 1); + } + unescape(title); + return 0; +} + +// Parse an autolink or HTML tag. +// Assumes the subject has a '<' character at the current position. +static inl* handle_pointy_brace(subject* subj) +{ + int matchlen = 0; + bstring contents; + inl* result; + + advance(subj); // advance past first < + // first try to match a URL autolink + matchlen = scan_autolink_uri(subj->buffer, subj->pos); + if (matchlen > 0) { + contents = bmidstr(subj->buffer, subj->pos, matchlen - 1); + subj->pos += matchlen; + result = make_link(make_str_with_entities(contents), + bstrcpy(contents), bfromcstr("")); + bdestroy(contents); + return result; + } + // next try to match an email autolink + matchlen = scan_autolink_email(subj->buffer, subj->pos); + if (matchlen > 0) { + contents = bmidstr(subj->buffer, subj->pos, matchlen - 1); + subj->pos += matchlen; + result = make_link(make_str_with_entities(contents), + bformat("mailto:%s", contents->data), + bfromcstr("")); + bdestroy(contents); + return result; + } + // finally, try to match an html tag + matchlen = scan_html_tag(subj->buffer, subj->pos); + if (matchlen > 0) { + contents = bmidstr(subj->buffer, subj->pos, matchlen); + binsertch(contents, 0, 1, '<'); + subj->pos += matchlen; + return make_raw_html(contents); + } else {// if nothing matches, just return the opening <: + return make_str(bfromcstr("<")); + } +} + +// Parse a link label. Returns 1 if successful. +// Unless raw_label is null, it is set to point to the raw contents of the []. +// Assumes the subject has a '[' character at the current position. +// Returns 0 and does not advance if no matching ] is found. +// Note the precedence: code backticks have precedence over label bracket +// markers, which have precedence over *, _, and other inline formatting +// markers. So, 2 below contains a link while 1 does not: +// 1. [a link `with a ](/url)` character +// 2. [a link *with emphasized ](/url) text* +static int link_label(subject* subj, bstring* raw_label) +{ + int nestlevel = 0; + inl* tmp = NULL; + bstring raw; + int startpos = subj->pos; + if (subj->label_nestlevel) { + // if we've already checked to the end of the subject + // for a label, even with a different starting [, we + // know we won't find one here and we can just return. + // Note: nestlevel 1 would be: [foo [bar] + // nestlevel 2 would be: [foo [bar [baz] + subj->label_nestlevel--; + return 0; + } + advance(subj); // advance past [ + char c; + while ((c = peek_char(subj)) && (c != ']' || nestlevel > 0)) { + switch (c) { + case '`': + tmp = handle_backticks(subj); + free_inlines(tmp); + break; + case '<': + tmp = handle_pointy_brace(subj); + free_inlines(tmp); + break; + case '[': // nested [] + nestlevel++; + advance(subj); + break; + case ']': // nested [] + nestlevel--; + advance(subj); + break; + case '\\': + advance(subj); + if (ispunct(peek_char(subj))) { + advance(subj); + } + break; + default: + advance(subj); + } + } + if (c == ']') { + if (raw_label != NULL) { + raw = bmidstr(subj->buffer, startpos + 1, subj->pos - (startpos + 1)); + *raw_label = raw; + } + subj->label_nestlevel = 0; + advance(subj); // advance past ] + return 1; + } else { + if (c == 0) { + subj->label_nestlevel = nestlevel; + } + subj->pos = startpos; // rewind + return 0; + } +} + +// Parse a link or the link portion of an image, or return a fallback. +static inl* handle_left_bracket(subject* subj) +{ + inl* lab = NULL; + inl* result = NULL; + reference* ref; + int n; + int sps; + int found_label; + int endlabel, starturl, endurl, starttitle, endtitle, endall; + bstring url, title, rawlabel, reflabel; + bstring rawlabel2 = NULL; + found_label = link_label(subj, &rawlabel); + endlabel = subj->pos; + if (found_label) { + if (peek_char(subj) == '(' && + ((sps = scan_spacechars(subj->buffer, subj->pos + 1)) > -1) && + ((n = scan_link_url(subj->buffer, subj->pos + 1 + sps)) > -1)) { + // try to parse an explicit link: + starturl = subj->pos + 1 + sps; // after ( + endurl = starturl + n; + starttitle = endurl + scan_spacechars(subj->buffer, endurl); + // ensure there are spaces btw url and title + endtitle = (starttitle == endurl) ? starttitle : + starttitle + scan_link_title(subj->buffer, starttitle); + endall = endtitle + scan_spacechars(subj->buffer, endtitle); + if (bchar(subj->buffer, endall) == ')') { + subj->pos = endall + 1; + url = bmidstr(subj->buffer, starturl, endurl - starturl); + clean_url(url); + title = bmidstr(subj->buffer, starttitle, endtitle - starttitle); + clean_title(title); + lab = parse_inlines(rawlabel, NULL); + bdestroy(rawlabel); + return make_link(lab, url, title); + } else { + // if we get here, we matched a label but didn't get further: + subj->pos = endlabel; + lab = parse_inlines(rawlabel, subj->reference_map); + bdestroy(rawlabel); + result = append_inlines(make_str(bfromcstr("[")), + append_inlines(lab, + make_str(bfromcstr("]")))); + return result; + } + } else { + // Check for reference link. + // First, see if there's another label: + subj->pos = subj->pos + scan_spacechars(subj->buffer, endlabel); + reflabel = rawlabel; + // if followed by a nonempty link label, we change reflabel to it: + if (peek_char(subj) == '[' && + link_label(subj, &rawlabel2)) { + if (blength(rawlabel2) > 0) { + reflabel = rawlabel2; + } + } else { + subj->pos = endlabel; + } + // lookup rawlabel in subject->reference_map: + ref = lookup_reference(subj->reference_map, reflabel); + if (ref != NULL) { // found + lab = parse_inlines(rawlabel, NULL); + result = make_link(lab, bstrcpy(ref->url), bstrcpy(ref->title)); + } else { + subj->pos = endlabel; + lab = parse_inlines(rawlabel, subj->reference_map); + result = append_inlines(make_str(bfromcstr("[")), + append_inlines(lab, make_str(bfromcstr("]")))); + } + bdestroy(rawlabel); + bdestroy(rawlabel2); + return result; + } + } + // If we fall through to here, it means we didn't match a link: + advance(subj); // advance past [ + return make_str(bfromcstr("[")); +} + +// Parse a hard or soft linebreak, returning an inline. +// Assumes the subject has a newline at the current position. +static inl* handle_newline(subject *subj) +{ + int nlpos = subj->pos; + // skip over newline + advance(subj); + // skip spaces at beginning of line + while (peek_char(subj) == ' ') { + advance(subj); + } + if (nlpos > 1 && + bchar(subj->buffer, nlpos - 1) == ' ' && + bchar(subj->buffer, nlpos - 2) == ' ') { + return make_linebreak(); + } else { + return make_softbreak(); + } +} + +inline static int not_eof(subject* subj) +{ + return !is_eof(subj); +} + +// Parse inlines while a predicate is satisfied. Return inlines. +extern inl* parse_inlines_while(subject* subj, int (*f)(subject*)) +{ + inl* result = NULL; + inl** last = &result; + while ((*f)(subj) && parse_inline(subj, last)) { + } + return result; +} + +// Parse an inline, advancing subject, and add it to last element. +// Adjust tail to point to new last element of list. +// Return 0 if no inline can be parsed, 1 otherwise. +extern int parse_inline(subject* subj, inl ** last) +{ + inl* new = NULL; + bstring contents; + bstring special_chars; + unsigned char c; + int endpos; + c = peek_char(subj); + if (c == 0) { + return 0; + } + switch(c){ + case '\n': + new = handle_newline(subj); + break; + case '`': + new = handle_backticks(subj); + break; + case '\\': + new = handle_backslash(subj); + break; + case '&': + new = handle_entity(subj); + break; + case '<': + new = handle_pointy_brace(subj); + break; + case '_': + if (subj->pos > 0 && (isalnum(bchar(subj->buffer, subj->pos - 1)) || + bchar(subj->buffer, subj->pos - 1) == '_')) { + new = make_str(take_one(subj)); + } else { + new = handle_strong_emph(subj, '_'); + } + break; + case '*': + new = handle_strong_emph(subj, '*'); + break; + case '[': + new = handle_left_bracket(subj); + break; + case '!': + advance(subj); + if (peek_char(subj) == '[') { + new = handle_left_bracket(subj); + if (new != NULL && new->tag == link) { + new->tag = image; + } else { + new = append_inlines(make_str(bfromcstr("!")), new); + } + } else { + new = make_str(bfromcstr("!")); + } + break; + default: + // we read until we hit a special character + special_chars = bfromcstr("\n\\`&_*[]buffer, subj->pos, special_chars); + bdestroy(special_chars); + if (endpos == subj->pos) { + // current char is special: read a 1-character str + contents = take_one(subj); + } else if (endpos == BSTR_ERR) { + // special char not found, take whole rest of buffer: + endpos = subj->buffer->slen; + contents = bmidstr(subj->buffer, subj->pos, endpos - subj->pos); + subj->pos = endpos; + } else { + // take buffer from subj->pos to endpos to str. + contents = bmidstr(subj->buffer, subj->pos, endpos - subj->pos); + subj->pos = endpos; + // if we're at a newline, strip trailing spaces. + if (peek_char(subj) == '\n') { + brtrimws(contents); + } + } + new = make_str(contents); + } + if (*last == NULL) { + *last = new; + } else { + append_inlines(*last, new); + } + return 1; +} + +extern inl* parse_inlines(bstring input, reference** refmap) +{ + subject * subj = make_subject(input, refmap); + inl * result = parse_inlines_while(subj, not_eof); + free(subj); + return result; +} + +// Parse zero or more space characters, including at most one newline. +void spnl(subject* subj) +{ + bool seen_newline = false; + while (peek_char(subj) == ' ' || + (!seen_newline && + (seen_newline = peek_char(subj) == '\n'))) { + advance(subj); + } +} + +// Parse reference. Assumes string begins with '[' character. +// Modify refmap if a reference is encountered. +// Return 0 if no reference found, otherwise position of subject +// after reference is parsed. +extern int parse_reference(bstring input, reference** refmap) +{ + subject * subj = make_subject(input, NULL); + bstring lab = NULL; + bstring url = NULL; + bstring title = NULL; + int matchlen = 0; + int beforetitle; + reference * new = NULL; + int newpos; + + // parse label: + if (!link_label(subj, &lab)) { + free(subj); + return 0; + } + // colon: + if (peek_char(subj) == ':') { + advance(subj); + } else { + free(subj); + bdestroy(lab); + return 0; + } + // parse link url: + spnl(subj); + matchlen = scan_link_url(subj->buffer, subj->pos); + if (matchlen) { + url = bmidstr(subj->buffer, subj->pos, matchlen); + clean_url(url); + subj->pos += matchlen; + } else { + free(subj); + bdestroy(lab); + bdestroy(url); + return 0; + } + // parse optional link_title + beforetitle = subj->pos; + spnl(subj); + matchlen = scan_link_title(subj->buffer, subj->pos); + if (matchlen) { + title = bmidstr(subj->buffer, subj->pos, matchlen); + clean_title(title); + subj->pos += matchlen; + } else { + subj->pos = beforetitle; + title = bfromcstr(""); + } + // parse final spaces and newline: + while (peek_char(subj) == ' ') { + advance(subj); + } + if (peek_char(subj) == '\n') { + advance(subj); + } else if (peek_char(subj) != 0) { + free(subj); + bdestroy(lab); + bdestroy(url); + bdestroy(title); + return 0; + } + // insert reference into refmap + new = make_reference(lab, url, title); + add_reference(refmap, new); + + newpos = subj->pos; + free(subj); + bdestroy(lab); + bdestroy(url); + bdestroy(title); + return newpos; +} + diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..40a63bc --- /dev/null +++ b/src/main.c @@ -0,0 +1,102 @@ +#include +#include +#include "bstrlib.h" +#include "stmd.h" +#include "debug.h" + +void print_usage() +{ + printf("Usage: stmd [FILE*]\n"); + printf("Options: --help, -h Print usage information\n"); + printf(" --ast Print AST instead of HTML\n"); + printf(" --version Print version\n"); +} + +int main(int argc, char *argv[]) { + int i; + bool ast = false; + int g = 0; + int numfps = 0; + int files[argc]; + + for (i=1; i < argc; i++) { + if (strcmp(argv[i], "--version") == 0) { + printf("stmd %s", VERSION); + printf(" - standard markdown converter (c) 2014 John MacFarlane\n"); + exit(0); + } else if ((strcmp(argv[i], "--help") == 0) || + (strcmp(argv[i], "-h") == 0)) { + print_usage(); + exit(0); + } else if (strcmp(argv[i], "--ast") == 0) { + ast = true; + } else if (*argv[i] == '-') { + print_usage(); + exit(1); + } else { // treat as file argument + files[g] = i; + g++; + } + } + + numfps = g; + bstring s = NULL; + bstring html; + g = 0; + block * cur = make_document(); + int linenum = 1; + extern int errno; + FILE * fp = NULL; + + if (numfps == 0) { + // read from stdin + while ((s = bgets((bNgetc) fgetc, stdin, '\n'))) { + check(incorporate_line(s, linenum, &cur) == 0, + "error incorporating line %d", linenum); + bdestroy(s); + linenum++; + } + } else { + // iterate over input file pointers + for (g=0; g < numfps; g++) { + + fp = fopen(argv[files[g]], "r"); + if (fp == NULL) { + fprintf(stderr, "Error opening file %s: %s\n", + argv[files[g]], strerror(errno)); + exit(1); + } + + struct bStream *stream = bsopen((bNread)fread, fp); + if (stream == NULL) { + printf("Error opening stream\n"); + } + while (bsreadln(s, stream, '\n') != BSTR_ERR) { + check(incorporate_line(s, linenum, &cur) == 0, + "error incorporating line %d of %s", linenum, argv[files[g]]); + linenum++; + } + bsclose(stream); + } + } + + while (cur != cur->top) { + finalize(cur, linenum); + cur = cur->parent; + } + check(cur == cur->top, "problems finalizing open containers"); + finalize(cur, linenum); + process_inlines(cur, cur->attributes.refmap); + if (ast) { + print_blocks(cur, 0); + } else { + check(blocks_to_html(cur, &html, false) == 0, "could not format as HTML"); + printf("%s", html->data); + bdestroy(html); + } + free_blocks(cur); + return 0; +error: + return -1; +} + diff --git a/src/print.c b/src/print.c new file mode 100644 index 0000000..a924870 --- /dev/null +++ b/src/print.c @@ -0,0 +1,168 @@ +#include +#include +#include "bstrlib.h" +#include "stmd.h" +#include "debug.h" + +static bstring format_str(bstring s) +{ + int pos = 0; + int len = blength(s); + bstring result = bfromcstr(""); + char c; + bformata(result, "\""); + while (pos < len) { + c = bchar(s, pos); + switch (c) { + case '\n': + bformata(result, "\\n"); + break; + case '"': + bformata(result, "\\\""); + break; + case '\\': + bformata(result, "\\\\"); + break; + default: + bformata(result, "%c", c); + } + pos++; + } + bformata(result, "\""); + return result; +} + +// Functions to pretty-print inline and block lists, for debugging. +// Prettyprint an inline list, for debugging. +extern void print_blocks(block* b, int indent) +{ + struct ListData * data; + while(b != NULL) { + // printf("%3d %3d %3d| ", b->start_line, b->start_column, b->end_line); + for (int i=0; i < indent; i++) { + putchar(' '); + } + switch(b->tag) { + case document: + printf("document\n"); + print_blocks(b->children, indent + 2); + break; + case block_quote: + printf("block_quote\n"); + print_blocks(b->children, indent + 2); + break; + case list_item: + data = &(b->attributes.list_data); + printf("list_item\n"); + print_blocks(b->children, indent + 2); + break; + case list: + data = &(b->attributes.list_data); + if (data->list_type == ordered) { + printf("list (type=ordered tight=%s start=%d delim=%s)\n", + (data->tight ? "true" : "false"), + data->start, + (data->delimiter == parens ? "parens" : "period")); + } else { + printf("list (type=bullet tight=%s bullet_char=%c)\n", + (data->tight ? "true" : "false"), + data->bullet_char); + } + print_blocks(b->children, indent + 2); + break; + case atx_header: + printf("atx_header (level=%d)\n", b->attributes.header_level); + print_inlines(b->inline_content, indent + 2); + break; + case setext_header: + printf("setext_header (level=%d)\n", b->attributes.header_level); + print_inlines(b->inline_content, indent + 2); + break; + case paragraph: + printf("paragraph\n"); + print_inlines(b->inline_content, indent + 2); + break; + case hrule: + printf("hrule\n"); + break; + case indented_code: + printf("indented_code %s\n", format_str(b->string_content)->data); + break; + case fenced_code: + printf("fenced_code length=%d info=%s %s\n", + b->attributes.fenced_code_data.fence_length, + format_str(b->attributes.fenced_code_data.info)->data, + format_str(b->string_content)->data); + break; + case html_block: + printf("html_block %s\n", format_str(b->string_content)->data); + break; + case reference_def: + printf("reference_def\n"); + break; + default: + log_warn("block type %d not implemented\n", b->tag); + break; + } + b = b->next; + } +} + +// Prettyprint an inline list, for debugging. +extern void print_inlines(inl* ils, int indent) +{ + while(ils != NULL) { + /* + // we add 11 extra spaces for the line/column info + for (int i=0; i < 11; i++) { + putchar(' '); + } + putchar('|'); + putchar(' '); + */ + for (int i=0; i < indent; i++) { + putchar(' '); + } + switch(ils->tag) { + case str: + printf("str %s\n", format_str(ils->content.literal)->data); + break; + case linebreak: + printf("linebreak\n"); + break; + case softbreak: + printf("softbreak\n"); + break; + case code: + printf("code %s\n", format_str(ils->content.literal)->data); + break; + case raw_html: + printf("html %s\n", format_str(ils->content.literal)->data); + break; + case entity: + printf("entity %s\n", format_str(ils->content.literal)->data); + break; + case link: + printf("link url=%s title=%s\n", + format_str(ils->content.linkable.url)->data, + format_str(ils->content.linkable.title)->data); + print_inlines(ils->content.linkable.label, indent + 2); + break; + case image: + printf("image url=%s title=%s\n", + format_str(ils->content.linkable.url)->data, + format_str(ils->content.linkable.title)->data); + print_inlines(ils->content.linkable.label, indent + 2); + break; + case strong: + printf("strong\n"); + print_inlines(ils->content.linkable.label, indent + 2); + break; + case emph: + printf("emph\n"); + print_inlines(ils->content.linkable.label, indent + 2); + break; + } + ils = ils->next; + } +} diff --git a/src/scanners.h b/src/scanners.h new file mode 100644 index 0000000..71e0520 --- /dev/null +++ b/src/scanners.h @@ -0,0 +1,15 @@ +#include "bstrlib.h" + +int scan_autolink_uri(bstring s, int pos); +int scan_autolink_email(bstring s, int pos); +int scan_html_tag(bstring s, int pos); +int scan_html_block_tag(bstring s, int pos); +int scan_link_url(bstring s, int pos); +int scan_link_title(bstring s, int pos); +int scan_spacechars(bstring s, int pos); +int scan_atx_header_start(bstring s, int pos); +int scan_setext_header_line(bstring s, int pos); +int scan_hrule(bstring s, int pos); +int scan_open_code_fence(bstring s, int pos); +int scan_close_code_fence(bstring s, int pos, int len); +int scan_entity(bstring s, int pos); diff --git a/src/scanners.re b/src/scanners.re new file mode 100644 index 0000000..f90238d --- /dev/null +++ b/src/scanners.re @@ -0,0 +1,238 @@ +#include "bstrlib.h" + +/*!re2c + re2c:define:YYCTYPE = "unsigned char"; + re2c:define:YYCURSOR = p; + re2c:define:YYMARKER = marker; + re2c:define:YYCTXMARKER = marker; + re2c:yyfill:enable = 0; + + wordchar = [^\x00-\x20]; + + spacechar = [ \t\n]; + + reg_char = [^\\()\x00-\x20]; + + escaped_char = [\\][!"#$%&'()*+,./:;<=>?@[\\\]^_`{|}~-]; + + tagname = [A-Za-z][A-Za-z0-9]*; + + blocktagname = 'article'|'header'|'aside'|'hgroup'|'blockquote'|'hr'|'body'|'li'|'br'|'map'|'button'|'object'|'canvas'|'ol'|'caption'|'output'|'col'|'p'|'colgroup'|'pre'|'dd'|'progress'|'div'|'section'|'dl'|'table'|'td'|'dt'|'tbody'|'embed'|'textarea'|'fieldset'|'tfoot'|'figcaption'|'th'|'figure'|'thead'|'footer'|'footer'|'tr'|'form'|'ul'|'h1'|'h2'|'h3'|'h4'|'h5'|'h6'|'video'|'script'|'style'; + + attributename = [a-zA-Z_:][a-zA-Z0-9:._-]*; + + unquotedvalue = [^\"'=<>`\x00]+; + singlequotedvalue = ['][^'\x00]*[']; + doublequotedvalue = [\"][^\"\x00]*[\"]; + + attributevalue = unquotedvalue | singlequotedvalue | doublequotedvalue; + + attributevaluespec = spacechar* [=] spacechar* attributevalue; + + attribute = spacechar+ attributename attributevaluespec?; + + opentag = tagname attribute* spacechar* [/]? [>]; + closetag = [/] tagname spacechar* [>]; + + htmlcomment = "!--" ([^-\x00]+ | [-][^-\x00]+)* "-->"; + + processinginstruction = "?" ([^?>\x00]+ | [?][^>\x00])* "?>"; + + declaration = "!" [A-Z]+ spacechar+ [^>\x00]* ">"; + + cdata = "![CDATA[" ([^\]\x00]+ | "]" [^\]\x00] | "]]" [^>\x00])* "]]>"; + + htmltag = opentag | closetag | htmlcomment | processinginstruction | + declaration | cdata; + + in_parens_nosp = [(] (reg_char|escaped_char)* [)]; + + in_double_quotes = ["] (escaped_char|[^"\x00])* ["]; + in_single_quotes = ['] (escaped_char|[^'\x00])* [']; + in_parens = [(] (escaped_char|[^)\x00])* [)]; + + scheme = 'coap'|'doi'|'javascript'|'aaa'|'aaas'|'about'|'acap'|'cap'|'cid'|'crid'|'data'|'dav'|'dict'|'dns'|'file'|'ftp'|'geo'|'go'|'gopher'|'h323'|'http'|'https'|'iax'|'icap'|'im'|'imap'|'info'|'ipp'|'iris'|'iris.beep'|'iris.xpc'|'iris.xpcs'|'iris.lwz'|'ldap'|'mailto'|'mid'|'msrp'|'msrps'|'mtqp'|'mupdate'|'news'|'nfs'|'ni'|'nih'|'nntp'|'opaquelocktoken'|'pop'|'pres'|'rtsp'|'service'|'session'|'shttp'|'sieve'|'sip'|'sips'|'sms'|'snmp'|'soap.beep'|'soap.beeps'|'tag'|'tel'|'telnet'|'tftp'|'thismessage'|'tn3270'|'tip'|'tv'|'urn'|'vemmi'|'ws'|'wss'|'xcon'|'xcon-userid'|'xmlrpc.beep'|'xmlrpc.beeps'|'xmpp'|'z39.50r'|'z39.50s'|'adiumxtra'|'afp'|'afs'|'aim'|'apt'|'attachment'|'aw'|'beshare'|'bitcoin'|'bolo'|'callto'|'chrome'|'chrome-extension'|'com-eventbrite-attendee'|'content'|'cvs'|'dlna-playsingle'|'dlna-playcontainer'|'dtn'|'dvb'|'ed2k'|'facetime'|'feed'|'finger'|'fish'|'gg'|'git'|'gizmoproject'|'gtalk'|'hcp'|'icon'|'ipn'|'irc'|'irc6'|'ircs'|'itms'|'jar'|'jms'|'keyparc'|'lastfm'|'ldaps'|'magnet'|'maps'|'market'|'message'|'mms'|'ms-help'|'msnim'|'mumble'|'mvn'|'notes'|'oid'|'palm'|'paparazzi'|'platform'|'proxy'|'psyc'|'query'|'res'|'resource'|'rmi'|'rsync'|'rtmp'|'secondlife'|'sftp'|'sgn'|'skype'|'smb'|'soldat'|'spotify'|'ssh'|'steam'|'svn'|'teamspeak'|'things'|'udp'|'unreal'|'ut2004'|'ventrilo'|'view-source'|'webcal'|'wtai'|'wyciwyg'|'xfire'|'xri'|'ymsgr'; +*/ + +// Try to match URI autolink after first <, returning number of chars matched. +extern int scan_autolink_uri(bstring s, int pos) +{ + unsigned char * marker = NULL; + unsigned char * p = &(s->data[pos]); + unsigned char * start = p; +/*!re2c + scheme [:]([^\x00-\x20<>\\]|escaped_char)*[>] { return (p - start); } + .? { return 0; } +*/ +} + +// Try to match email autolink after first <, returning num of chars matched. +extern int scan_autolink_email(bstring s, int pos) +{ + unsigned char * marker = NULL; + unsigned char * p = &(s->data[pos]); + unsigned char * start = p; +/*!re2c + [a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+ + [@] + [a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])? + ([.][a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)* + [>] { return (p - start); } + .? { return 0; } +*/ +} + +// Try to match an HTML tag after first <, returning num of chars matched. +extern int scan_html_tag(bstring s, int pos) +{ + unsigned char * marker = NULL; + unsigned char * p = &(s->data[pos]); + unsigned char * start = p; +/*!re2c + htmltag { return (p - start); } + .? { return 0; } +*/ +} + +// Try to match an HTML block tag including first <, +// returning num of chars matched. +extern int scan_html_block_tag(bstring s, int pos) +{ + unsigned char * marker = NULL; + unsigned char * p = &(s->data[pos]); + unsigned char * start = p; +/*!re2c + [<] [/] blocktagname (spacechar | [>]) { return (p - start); } + [<] blocktagname (spacechar | [/>]) { return (p - start); } + [<] [!?] { return (p - start); } + .? { return 0; } +*/ +} + +// Try to match a URL in a link or reference, return number of chars matched. +// This may optionally be contained in <..>; otherwise +// whitespace and unbalanced right parentheses aren't allowed. +// Newlines aren't ever allowed. +extern int scan_link_url(bstring s, int pos) +{ + unsigned char * marker = NULL; + unsigned char * p = &(s->data[pos]); + unsigned char * start = p; +/*!re2c + [ \n]* [<] ([^<>\n\\\x00] | escaped_char | [\\])* [>] { return (p - start); } + [ \n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (p - start); } + .? { return 0; } +*/ +} + +// Try to match a link title (in single quotes, in double quotes, or +// in parentheses), returning number of chars matched. Allow one +// level of internal nesting (quotes within quotes). +extern int scan_link_title(bstring s, int pos) +{ + unsigned char * marker = NULL; + unsigned char * p = &(s->data[pos]); + unsigned char * start = p; +/*!re2c + ["] (escaped_char|[^"\x00])* ["] { return (p - start); } + ['] (escaped_char|[^'\x00])* ['] { return (p - start); } + [(] (escaped_char|[^)\x00])* [)] { return (p - start); } + .? { return 0; } +*/ +} + +// Match space characters, including newlines. +extern int scan_spacechars(bstring s, int pos) +{ + unsigned char * p = &(s->data[pos]); + unsigned char * start = p; +/*!re2c + [ \t\n]* { return (p - start); } + . { return 0; } +*/ +} + +// Match ATX header start. +extern int scan_atx_header_start(bstring s, int pos) +{ + unsigned char * marker = NULL; + unsigned char * p = &(s->data[pos]); + unsigned char * start = p; +/*!re2c + [#]{1,6} ([ ]+|[\n]) { return (p - start); } + .? { return 0; } +*/ +} + +// Match sexext header line. Return 1 for level-1 header, +// 2 for level-2, 0 for no match. +extern int scan_setext_header_line(bstring s, int pos) +{ + unsigned char * marker = NULL; + unsigned char * p = &(s->data[pos]); +/*!re2c + [=]+ [ ]* [\n] { return 1; } + [-]+ [ ]* [\n] { return 2; } + .? { return 0; } +*/ +} + +// Scan a horizontal rule line: "...three or more hyphens, asterisks, +// or underscores on a line by themselves. If you wish, you may use +// spaces between the hyphens or asterisks." +extern int scan_hrule(bstring s, int pos) +{ + unsigned char * marker = NULL; + unsigned char * p = &(s->data[pos]); + unsigned char * start = p; +/*!re2c + ([*][ ]*){3,} [ \t]* [\n] { return (p - start); } + ([_][ ]*){3,} [ \t]* [\n] { return (p - start); } + ([-][ ]*){3,} [ \t]* [\n] { return (p - start); } + .? { return 0; } +*/ +} + +// Scan an opening code fence. +extern int scan_open_code_fence(bstring s, int pos) +{ + unsigned char * marker = NULL; + unsigned char * p = &(s->data[pos]); + unsigned char * start = p; +/*!re2c + [`]{3,} / [^`\n\x00]*[\n] { return (p - start); } + [~]{3,} / [^~\n\x00]*[\n] { return (p - start); } + .? { return 0; } +*/ +} + +// Scan a closing code fence with length at least len. +extern int scan_close_code_fence(bstring s, int pos, int len) +{ + unsigned char * marker = NULL; + unsigned char * p = &(s->data[pos]); + unsigned char * start = p; +/*!re2c + ([`]{3,} | [~]{3,}) / spacechar* [\n] + { if (p - start > len) { + return (p - start); + } else { + return 0; + } } + .? { return 0; } +*/ +} + +// Scans an entity. +// Returns number of chars matched. +extern int scan_entity(bstring s, int pos) +{ + unsigned char * marker = NULL; + unsigned char * p = &(s->data[pos]); + unsigned char * start = p; +/*!re2c + [&] ([#] ([Xx][A-Fa-f0-9]{1,8}|[0-9]{1,8}) |[A-Za-z][A-Za-z0-9]{1,31} ) [;] + { return (p - start); } + .? { return 0; } +*/ +} diff --git a/src/stmd.h b/src/stmd.h new file mode 100644 index 0000000..5e34399 --- /dev/null +++ b/src/stmd.h @@ -0,0 +1,121 @@ +#include +#include "bstrlib.h" +#include "uthash.h" + +#define VERSION "0.1" +#define CODE_INDENT 4 + +typedef struct Inline { + enum { str, softbreak, linebreak, code, raw_html, entity, + emph, strong, link, image } tag; + union { + bstring literal; + struct Inline* inlines; + struct { struct Inline* label; + bstring url; + bstring title; + } linkable; + } content; + struct Inline* next; +} inl; + +typedef struct Reference { + bstring label; + bstring url; + bstring title; + UT_hash_handle hh; // used by uthash +} reference; + +typedef struct Subject { + bstring buffer; + int pos; + reference** reference_map; + int label_nestlevel; +} subject; + +// Types for blocks + +struct ListData { + enum { bullet, + ordered } list_type; + int marker_offset; + int padding; + int start; + enum { period, + parens } delimiter; + unsigned char bullet_char; + bool tight; +}; + +struct FencedCodeData { + int fence_length; + int fence_offset; + char fence_char; + bstring info; +}; + +typedef struct Block { + enum { document, + block_quote, + list, + list_item, + fenced_code, + indented_code, + html_block, + paragraph, + atx_header, + setext_header, + hrule, + reference_def + } tag; + int start_line; + int start_column; + int end_line; + bool open; + bool last_line_blank; + struct Block* children; + struct Block* last_child; + struct Block* parent; + struct Block* top; + bstring string_content; + inl* inline_content; + union { + struct ListData list_data; + struct FencedCodeData fenced_code_data; + int header_level; + reference** refmap; + } attributes; + struct Block * next; + struct Block * prev; +} block; + +int parse_inline(subject* subj, inl ** last); +inl* parse_inlines(bstring input, reference** refmap); +inl* parse_inlines_while(subject* subj, int (*f)(subject*)); +void free_inlines(inl* e); +int parse_reference(bstring input, reference** refmap); +void free_reference(reference *ref); +void free_reference_map(reference **refmap); +reference* make_reference(bstring label, bstring url, bstring title); +reference* lookup_reference(reference** refmap, bstring label); +void add_reference(reference** refmap, reference* ref); +int unescape(bstring s); + +extern block* make_document(); +extern block* add_child(block* parent, + int block_type, int start_line, int start_column); +void free_blocks(block* e); + +// FOR NOW: +int process_inlines(block* cur, reference** refmap); +int incorporate_line(bstring ln, int line_number, block** curptr); +int finalize(block* b, int line_number); + +void print_inlines(inl* ils, int indent); +void print_blocks(block* blk, int indent); + +int blocks_to_html(block* b, bstring* result, bool tight); +int inlines_to_html(inl* b, bstring* result); + +int bdetab(bstring s, int utf8); + diff --git a/src/utf8.c b/src/utf8.c new file mode 100644 index 0000000..4bb3b35 --- /dev/null +++ b/src/utf8.c @@ -0,0 +1,106 @@ +#include +#include "bstrlib.h" +#include "debug.h" + +#define advance(s) \ + s++; \ + check(*s >> 6 == 0x02, "UTF-8 decode error on byte %x", *s); + +// Reads a unicode code point from a UTF8-encoded string, and +// puts it in the pointer n. If something illegal +// is encountered, 0xFFFD is emitted. +// Returns a pointer to next position in string, or NULL if no +// more characters remain. +extern unsigned char * from_utf8(unsigned char * s, unsigned int *n) +{ + int x = 0; + + if (*s == 0) { + return NULL; + } else if (*s < 0x80) { + x = *s; + } else if (*s >> 5 == 0x06) { + x = *s & 0x1F; + advance(s); + x = (x << 6) + (*s & 0x3F); + } else if (*s >> 4 == 0x0E) { + x = *s & 0x0F; + advance(s); + x = (x << 6) + (*s & 0x3F); + advance(s); + x = (x << 6) + (*s & 0x3F); + } else if (*s >> 3 == 0x1E) { + x = *s & 0x07; + advance(s); + x = (x << 6) + (*s & 0x3F); + advance(s); + x = (x << 6) + (*s & 0x3F); + advance(s); + x = (x << 6) + (*s & 0x3F); + } else if (*s >> 2 == 0x3E) { + x = *s & 0x03; + advance(s); + x = (x << 6) + (*s & 0x3F); + advance(s); + x = (x << 6) + (*s & 0x3F); + advance(s); + x = (x << 6) + (*s & 0x3F); + advance(s); + x = (x << 6) + (*s & 0x3F); + } else { + log_err("UTF-8 decode error on byte %x", *s); + goto error; + } + *n = x; + s++; + return s; + error: + *n = 0xFFFD; + return s; +} + +// Converts the unicode code point c to UTF-8, +// putting the result in dest. Returns 0 on success, -1 on error. +extern int to_utf8(unsigned int c, bstring dest) +{ + if (c < 0x80) { + bconchar(dest, c); + } else if (c < 0x800) { + bconchar(dest, 192 + c/64); + bconchar(dest, 128 + c%64); + } else if (c - 0xd800u < 0x800) { + goto error; + } else if (c < 0x10000) { + bconchar(dest, 224 + c / 4096); + bconchar(dest, 128 + c /64%64); + bconchar(dest, 128 + c%64); + } else if (c < 0x110000) { + bconchar(dest, 240 + c/262144); + bconchar(dest, 128 + c/4096%64); + bconchar(dest, 128 + c/64%64); + bconchar(dest, 128 + c%64); + } else { + goto error; + } + return 0; +error: + return -1; +} + +#define bufpush(x) \ + check(to_utf8(x, buf) == 0, "UTF-8 encode error on code point %04x", x) + +// Returns the case-folded version of the source string, or NULL on error. +extern bstring case_fold(bstring source) +{ + unsigned char * s = source->data; + unsigned int c = 0; + bstring buf = bfromcstr(""); + while ((s = from_utf8(s, &c))) { +#include "case_fold_switch.c" + } + return buf; +error: + return NULL; +} + diff --git a/src/utf8.h b/src/utf8.h new file mode 100644 index 0000000..fe59a90 --- /dev/null +++ b/src/utf8.h @@ -0,0 +1,6 @@ +#include +#include "bstrlib.h" + +extern unsigned char * from_utf8(unsigned char * s, unsigned int *n); +extern int to_utf8(unsigned int c, bstring dest); +extern bstring case_fold(bstring source); diff --git a/src/uthash.h b/src/uthash.h new file mode 100644 index 0000000..b9bc7e9 --- /dev/null +++ b/src/uthash.h @@ -0,0 +1,948 @@ +/* +Copyright (c) 2003-2013, Troy D. Hanson http://troydhanson.github.com/uthash/ +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef UTHASH_H +#define UTHASH_H + +#include /* memcmp,strlen */ +#include /* ptrdiff_t */ +#include /* exit() */ + +/* These macros use decltype or the earlier __typeof GNU extension. + As decltype is only available in newer compilers (VS2010 or gcc 4.3+ + when compiling c++ source) this code uses whatever method is needed + or, for VS2008 where neither is available, uses casting workarounds. */ +#ifdef _MSC_VER /* MS compiler */ +#if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */ +#define DECLTYPE(x) (decltype(x)) +#else /* VS2008 or older (or VS2010 in C mode) */ +#define NO_DECLTYPE +#define DECLTYPE(x) +#endif +#else /* GNU, Sun and other compilers */ +#define DECLTYPE(x) (__typeof(x)) +#endif + +#ifdef NO_DECLTYPE +#define DECLTYPE_ASSIGN(dst,src) \ +do { \ + char **_da_dst = (char**)(&(dst)); \ + *_da_dst = (char*)(src); \ +} while(0) +#else +#define DECLTYPE_ASSIGN(dst,src) \ +do { \ + (dst) = DECLTYPE(dst)(src); \ +} while(0) +#endif + +/* a number of the hash function use uint32_t which isn't defined on win32 */ +#ifdef _MSC_VER +typedef unsigned int uint32_t; +typedef unsigned char uint8_t; +#else +#include /* uint32_t */ +#endif + +#define UTHASH_VERSION 1.9.8 + +#ifndef uthash_fatal +#define uthash_fatal(msg) exit(-1) /* fatal error (out of memory,etc) */ +#endif +#ifndef uthash_malloc +#define uthash_malloc(sz) malloc(sz) /* malloc fcn */ +#endif +#ifndef uthash_free +#define uthash_free(ptr,sz) free(ptr) /* free fcn */ +#endif + +#ifndef uthash_noexpand_fyi +#define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */ +#endif +#ifndef uthash_expand_fyi +#define uthash_expand_fyi(tbl) /* can be defined to log expands */ +#endif + +/* initial number of buckets */ +#define HASH_INITIAL_NUM_BUCKETS 32 /* initial number of buckets */ +#define HASH_INITIAL_NUM_BUCKETS_LOG2 5 /* lg2 of initial number of buckets */ +#define HASH_BKT_CAPACITY_THRESH 10 /* expand when bucket count reaches */ + +/* calculate the element whose hash handle address is hhe */ +#define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho))) + +#define HASH_FIND(hh,head,keyptr,keylen,out) \ +do { \ + unsigned _hf_bkt,_hf_hashv; \ + out=NULL; \ + if (head) { \ + HASH_FCN(keyptr,keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt); \ + if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv)) { \ + HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], \ + keyptr,keylen,out); \ + } \ + } \ +} while (0) + +#ifdef HASH_BLOOM +#define HASH_BLOOM_BITLEN (1ULL << HASH_BLOOM) +#define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8) + ((HASH_BLOOM_BITLEN%8) ? 1:0) +#define HASH_BLOOM_MAKE(tbl) \ +do { \ + (tbl)->bloom_nbits = HASH_BLOOM; \ + (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \ + if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \ + memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \ + (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \ +} while (0) + +#define HASH_BLOOM_FREE(tbl) \ +do { \ + uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ +} while (0) + +#define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8] |= (1U << ((idx)%8))) +#define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8] & (1U << ((idx)%8))) + +#define HASH_BLOOM_ADD(tbl,hashv) \ + HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1))) + +#define HASH_BLOOM_TEST(tbl,hashv) \ + HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1))) + +#else +#define HASH_BLOOM_MAKE(tbl) +#define HASH_BLOOM_FREE(tbl) +#define HASH_BLOOM_ADD(tbl,hashv) +#define HASH_BLOOM_TEST(tbl,hashv) (1) +#define HASH_BLOOM_BYTELEN 0 +#endif + +#define HASH_MAKE_TABLE(hh,head) \ +do { \ + (head)->hh.tbl = (UT_hash_table*)uthash_malloc( \ + sizeof(UT_hash_table)); \ + if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \ + memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \ + (head)->hh.tbl->tail = &((head)->hh); \ + (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \ + (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \ + (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \ + (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \ + HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ + if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \ + memset((head)->hh.tbl->buckets, 0, \ + HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ + HASH_BLOOM_MAKE((head)->hh.tbl); \ + (head)->hh.tbl->signature = HASH_SIGNATURE; \ +} while(0) + +#define HASH_ADD(hh,head,fieldname,keylen_in,add) \ + HASH_ADD_KEYPTR(hh,head,&((add)->fieldname),keylen_in,add) + +#define HASH_REPLACE(hh,head,fieldname,keylen_in,add,replaced) \ +do { \ + replaced=NULL; \ + HASH_FIND(hh,head,&((add)->fieldname),keylen_in,replaced); \ + if (replaced!=NULL) { \ + HASH_DELETE(hh,head,replaced); \ + }; \ + HASH_ADD(hh,head,fieldname,keylen_in,add); \ +} while(0) + +#define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \ +do { \ + unsigned _ha_bkt; \ + (add)->hh.next = NULL; \ + (add)->hh.key = (char*)(keyptr); \ + (add)->hh.keylen = (unsigned)(keylen_in); \ + if (!(head)) { \ + head = (add); \ + (head)->hh.prev = NULL; \ + HASH_MAKE_TABLE(hh,head); \ + } else { \ + (head)->hh.tbl->tail->next = (add); \ + (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \ + (head)->hh.tbl->tail = &((add)->hh); \ + } \ + (head)->hh.tbl->num_items++; \ + (add)->hh.tbl = (head)->hh.tbl; \ + HASH_FCN(keyptr,keylen_in, (head)->hh.tbl->num_buckets, \ + (add)->hh.hashv, _ha_bkt); \ + HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt],&(add)->hh); \ + HASH_BLOOM_ADD((head)->hh.tbl,(add)->hh.hashv); \ + HASH_EMIT_KEY(hh,head,keyptr,keylen_in); \ + HASH_FSCK(hh,head); \ +} while(0) + +#define HASH_TO_BKT( hashv, num_bkts, bkt ) \ +do { \ + bkt = ((hashv) & ((num_bkts) - 1)); \ +} while(0) + +/* delete "delptr" from the hash table. + * "the usual" patch-up process for the app-order doubly-linked-list. + * The use of _hd_hh_del below deserves special explanation. + * These used to be expressed using (delptr) but that led to a bug + * if someone used the same symbol for the head and deletee, like + * HASH_DELETE(hh,users,users); + * We want that to work, but by changing the head (users) below + * we were forfeiting our ability to further refer to the deletee (users) + * in the patch-up process. Solution: use scratch space to + * copy the deletee pointer, then the latter references are via that + * scratch pointer rather than through the repointed (users) symbol. + */ +#define HASH_DELETE(hh,head,delptr) \ +do { \ + unsigned _hd_bkt; \ + struct UT_hash_handle *_hd_hh_del; \ + if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \ + uthash_free((head)->hh.tbl->buckets, \ + (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ + HASH_BLOOM_FREE((head)->hh.tbl); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + head = NULL; \ + } else { \ + _hd_hh_del = &((delptr)->hh); \ + if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \ + (head)->hh.tbl->tail = \ + (UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \ + (head)->hh.tbl->hho); \ + } \ + if ((delptr)->hh.prev) { \ + ((UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \ + (head)->hh.tbl->hho))->next = (delptr)->hh.next; \ + } else { \ + DECLTYPE_ASSIGN(head,(delptr)->hh.next); \ + } \ + if (_hd_hh_del->next) { \ + ((UT_hash_handle*)((ptrdiff_t)_hd_hh_del->next + \ + (head)->hh.tbl->hho))->prev = \ + _hd_hh_del->prev; \ + } \ + HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ + HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \ + (head)->hh.tbl->num_items--; \ + } \ + HASH_FSCK(hh,head); \ +} while (0) + + +/* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */ +#define HASH_FIND_STR(head,findstr,out) \ + HASH_FIND(hh,head,findstr,strlen(findstr),out) +#define HASH_ADD_STR(head,strfield,add) \ + HASH_ADD(hh,head,strfield,strlen(add->strfield),add) +#define HASH_REPLACE_STR(head,strfield,add,replaced) \ + HASH_REPLACE(hh,head,strfield,strlen(add->strfield),add,replaced) +#define HASH_FIND_INT(head,findint,out) \ + HASH_FIND(hh,head,findint,sizeof(int),out) +#define HASH_ADD_INT(head,intfield,add) \ + HASH_ADD(hh,head,intfield,sizeof(int),add) +#define HASH_REPLACE_INT(head,intfield,add,replaced) \ + HASH_REPLACE(hh,head,intfield,sizeof(int),add,replaced) +#define HASH_FIND_PTR(head,findptr,out) \ + HASH_FIND(hh,head,findptr,sizeof(void *),out) +#define HASH_ADD_PTR(head,ptrfield,add) \ + HASH_ADD(hh,head,ptrfield,sizeof(void *),add) +#define HASH_REPLACE_PTR(head,ptrfield,add,replaced) \ + HASH_REPLACE(hh,head,ptrfield,sizeof(void *),add,replaced) +#define HASH_DEL(head,delptr) \ + HASH_DELETE(hh,head,delptr) + +/* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined. + * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined. + */ +#ifdef HASH_DEBUG +#define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0) +#define HASH_FSCK(hh,head) \ +do { \ + unsigned _bkt_i; \ + unsigned _count, _bkt_count; \ + char *_prev; \ + struct UT_hash_handle *_thh; \ + if (head) { \ + _count = 0; \ + for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \ + _bkt_count = 0; \ + _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \ + _prev = NULL; \ + while (_thh) { \ + if (_prev != (char*)(_thh->hh_prev)) { \ + HASH_OOPS("invalid hh_prev %p, actual %p\n", \ + _thh->hh_prev, _prev ); \ + } \ + _bkt_count++; \ + _prev = (char*)(_thh); \ + _thh = _thh->hh_next; \ + } \ + _count += _bkt_count; \ + if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \ + HASH_OOPS("invalid bucket count %d, actual %d\n", \ + (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \ + } \ + } \ + if (_count != (head)->hh.tbl->num_items) { \ + HASH_OOPS("invalid hh item count %d, actual %d\n", \ + (head)->hh.tbl->num_items, _count ); \ + } \ + /* traverse hh in app order; check next/prev integrity, count */ \ + _count = 0; \ + _prev = NULL; \ + _thh = &(head)->hh; \ + while (_thh) { \ + _count++; \ + if (_prev !=(char*)(_thh->prev)) { \ + HASH_OOPS("invalid prev %p, actual %p\n", \ + _thh->prev, _prev ); \ + } \ + _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \ + _thh = ( _thh->next ? (UT_hash_handle*)((char*)(_thh->next) + \ + (head)->hh.tbl->hho) : NULL ); \ + } \ + if (_count != (head)->hh.tbl->num_items) { \ + HASH_OOPS("invalid app item count %d, actual %d\n", \ + (head)->hh.tbl->num_items, _count ); \ + } \ + } \ +} while (0) +#else +#define HASH_FSCK(hh,head) +#endif + +/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to + * the descriptor to which this macro is defined for tuning the hash function. + * The app can #include to get the prototype for write(2). */ +#ifdef HASH_EMIT_KEYS +#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \ +do { \ + unsigned _klen = fieldlen; \ + write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \ + write(HASH_EMIT_KEYS, keyptr, fieldlen); \ +} while (0) +#else +#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) +#endif + +/* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */ +#ifdef HASH_FUNCTION +#define HASH_FCN HASH_FUNCTION +#else +#define HASH_FCN HASH_JEN +#endif + +/* The Bernstein hash function, used in Perl prior to v5.6 */ +#define HASH_BER(key,keylen,num_bkts,hashv,bkt) \ +do { \ + unsigned _hb_keylen=keylen; \ + char *_hb_key=(char*)(key); \ + (hashv) = 0; \ + while (_hb_keylen--) { (hashv) = ((hashv) * 33) + *_hb_key++; } \ + bkt = (hashv) & (num_bkts-1); \ +} while (0) + + +/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at + * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */ +#define HASH_SAX(key,keylen,num_bkts,hashv,bkt) \ +do { \ + unsigned _sx_i; \ + char *_hs_key=(char*)(key); \ + hashv = 0; \ + for(_sx_i=0; _sx_i < keylen; _sx_i++) \ + hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \ + bkt = hashv & (num_bkts-1); \ +} while (0) + +#define HASH_FNV(key,keylen,num_bkts,hashv,bkt) \ +do { \ + unsigned _fn_i; \ + char *_hf_key=(char*)(key); \ + hashv = 2166136261UL; \ + for(_fn_i=0; _fn_i < keylen; _fn_i++) \ + hashv = (hashv * 16777619) ^ _hf_key[_fn_i]; \ + bkt = hashv & (num_bkts-1); \ +} while(0) + +#define HASH_OAT(key,keylen,num_bkts,hashv,bkt) \ +do { \ + unsigned _ho_i; \ + char *_ho_key=(char*)(key); \ + hashv = 0; \ + for(_ho_i=0; _ho_i < keylen; _ho_i++) { \ + hashv += _ho_key[_ho_i]; \ + hashv += (hashv << 10); \ + hashv ^= (hashv >> 6); \ + } \ + hashv += (hashv << 3); \ + hashv ^= (hashv >> 11); \ + hashv += (hashv << 15); \ + bkt = hashv & (num_bkts-1); \ +} while(0) + +#define HASH_JEN_MIX(a,b,c) \ +do { \ + a -= b; a -= c; a ^= ( c >> 13 ); \ + b -= c; b -= a; b ^= ( a << 8 ); \ + c -= a; c -= b; c ^= ( b >> 13 ); \ + a -= b; a -= c; a ^= ( c >> 12 ); \ + b -= c; b -= a; b ^= ( a << 16 ); \ + c -= a; c -= b; c ^= ( b >> 5 ); \ + a -= b; a -= c; a ^= ( c >> 3 ); \ + b -= c; b -= a; b ^= ( a << 10 ); \ + c -= a; c -= b; c ^= ( b >> 15 ); \ +} while (0) + +#define HASH_JEN(key,keylen,num_bkts,hashv,bkt) \ +do { \ + unsigned _hj_i,_hj_j,_hj_k; \ + unsigned char *_hj_key=(unsigned char*)(key); \ + hashv = 0xfeedbeef; \ + _hj_i = _hj_j = 0x9e3779b9; \ + _hj_k = (unsigned)(keylen); \ + while (_hj_k >= 12) { \ + _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \ + + ( (unsigned)_hj_key[2] << 16 ) \ + + ( (unsigned)_hj_key[3] << 24 ) ); \ + _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \ + + ( (unsigned)_hj_key[6] << 16 ) \ + + ( (unsigned)_hj_key[7] << 24 ) ); \ + hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \ + + ( (unsigned)_hj_key[10] << 16 ) \ + + ( (unsigned)_hj_key[11] << 24 ) ); \ + \ + HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ + \ + _hj_key += 12; \ + _hj_k -= 12; \ + } \ + hashv += keylen; \ + switch ( _hj_k ) { \ + case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); \ + case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); \ + case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); \ + case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); \ + case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); \ + case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); \ + case 5: _hj_j += _hj_key[4]; \ + case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); \ + case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); \ + case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); \ + case 1: _hj_i += _hj_key[0]; \ + } \ + HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ + bkt = hashv & (num_bkts-1); \ +} while(0) + +/* The Paul Hsieh hash function */ +#undef get16bits +#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ + || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) +#define get16bits(d) (*((const uint16_t *) (d))) +#endif + +#if !defined (get16bits) +#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \ + +(uint32_t)(((const uint8_t *)(d))[0]) ) +#endif +#define HASH_SFH(key,keylen,num_bkts,hashv,bkt) \ +do { \ + unsigned char *_sfh_key=(unsigned char*)(key); \ + uint32_t _sfh_tmp, _sfh_len = keylen; \ + \ + int _sfh_rem = _sfh_len & 3; \ + _sfh_len >>= 2; \ + hashv = 0xcafebabe; \ + \ + /* Main loop */ \ + for (;_sfh_len > 0; _sfh_len--) { \ + hashv += get16bits (_sfh_key); \ + _sfh_tmp = (uint32_t)(get16bits (_sfh_key+2)) << 11 ^ hashv; \ + hashv = (hashv << 16) ^ _sfh_tmp; \ + _sfh_key += 2*sizeof (uint16_t); \ + hashv += hashv >> 11; \ + } \ + \ + /* Handle end cases */ \ + switch (_sfh_rem) { \ + case 3: hashv += get16bits (_sfh_key); \ + hashv ^= hashv << 16; \ + hashv ^= (uint32_t)(_sfh_key[sizeof (uint16_t)] << 18); \ + hashv += hashv >> 11; \ + break; \ + case 2: hashv += get16bits (_sfh_key); \ + hashv ^= hashv << 11; \ + hashv += hashv >> 17; \ + break; \ + case 1: hashv += *_sfh_key; \ + hashv ^= hashv << 10; \ + hashv += hashv >> 1; \ + } \ + \ + /* Force "avalanching" of final 127 bits */ \ + hashv ^= hashv << 3; \ + hashv += hashv >> 5; \ + hashv ^= hashv << 4; \ + hashv += hashv >> 17; \ + hashv ^= hashv << 25; \ + hashv += hashv >> 6; \ + bkt = hashv & (num_bkts-1); \ +} while(0) + +#ifdef HASH_USING_NO_STRICT_ALIASING +/* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads. + * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error. + * MurmurHash uses the faster approach only on CPU's where we know it's safe. + * + * Note the preprocessor built-in defines can be emitted using: + * + * gcc -m64 -dM -E - < /dev/null (on gcc) + * cc -## a.c (where a.c is a simple test file) (Sun Studio) + */ +#if (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86)) +#define MUR_GETBLOCK(p,i) p[i] +#else /* non intel */ +#define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 0x3) == 0) +#define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 0x3) == 1) +#define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 0x3) == 2) +#define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 0x3) == 3) +#define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL)) +#if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__)) +#define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24)) +#define MUR_TWO_TWO(p) ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16)) +#define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >> 8)) +#else /* assume little endian non-intel */ +#define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24)) +#define MUR_TWO_TWO(p) ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16)) +#define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) << 8)) +#endif +#define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) : \ + (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \ + (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) : \ + MUR_ONE_THREE(p)))) +#endif +#define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) +#define MUR_FMIX(_h) \ +do { \ + _h ^= _h >> 16; \ + _h *= 0x85ebca6b; \ + _h ^= _h >> 13; \ + _h *= 0xc2b2ae35l; \ + _h ^= _h >> 16; \ +} while(0) + +#define HASH_MUR(key,keylen,num_bkts,hashv,bkt) \ +do { \ + const uint8_t *_mur_data = (const uint8_t*)(key); \ + const int _mur_nblocks = (keylen) / 4; \ + uint32_t _mur_h1 = 0xf88D5353; \ + uint32_t _mur_c1 = 0xcc9e2d51; \ + uint32_t _mur_c2 = 0x1b873593; \ + uint32_t _mur_k1 = 0; \ + const uint8_t *_mur_tail; \ + const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+_mur_nblocks*4); \ + int _mur_i; \ + for(_mur_i = -_mur_nblocks; _mur_i; _mur_i++) { \ + _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i); \ + _mur_k1 *= _mur_c1; \ + _mur_k1 = MUR_ROTL32(_mur_k1,15); \ + _mur_k1 *= _mur_c2; \ + \ + _mur_h1 ^= _mur_k1; \ + _mur_h1 = MUR_ROTL32(_mur_h1,13); \ + _mur_h1 = _mur_h1*5+0xe6546b64; \ + } \ + _mur_tail = (const uint8_t*)(_mur_data + _mur_nblocks*4); \ + _mur_k1=0; \ + switch((keylen) & 3) { \ + case 3: _mur_k1 ^= _mur_tail[2] << 16; \ + case 2: _mur_k1 ^= _mur_tail[1] << 8; \ + case 1: _mur_k1 ^= _mur_tail[0]; \ + _mur_k1 *= _mur_c1; \ + _mur_k1 = MUR_ROTL32(_mur_k1,15); \ + _mur_k1 *= _mur_c2; \ + _mur_h1 ^= _mur_k1; \ + } \ + _mur_h1 ^= (keylen); \ + MUR_FMIX(_mur_h1); \ + hashv = _mur_h1; \ + bkt = hashv & (num_bkts-1); \ +} while(0) +#endif /* HASH_USING_NO_STRICT_ALIASING */ + +/* key comparison function; return 0 if keys equal */ +#define HASH_KEYCMP(a,b,len) memcmp(a,b,len) + +/* iterate over items in a known bucket to find desired item */ +#define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out) \ +do { \ + if (head.hh_head) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,head.hh_head)); \ + else out=NULL; \ + while (out) { \ + if ((out)->hh.keylen == keylen_in) { \ + if ((HASH_KEYCMP((out)->hh.key,keyptr,keylen_in)) == 0) break; \ + } \ + if ((out)->hh.hh_next) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,(out)->hh.hh_next)); \ + else out = NULL; \ + } \ +} while(0) + +/* add an item to a bucket */ +#define HASH_ADD_TO_BKT(head,addhh) \ +do { \ + head.count++; \ + (addhh)->hh_next = head.hh_head; \ + (addhh)->hh_prev = NULL; \ + if (head.hh_head) { (head).hh_head->hh_prev = (addhh); } \ + (head).hh_head=addhh; \ + if (head.count >= ((head.expand_mult+1) * HASH_BKT_CAPACITY_THRESH) \ + && (addhh)->tbl->noexpand != 1) { \ + HASH_EXPAND_BUCKETS((addhh)->tbl); \ + } \ +} while(0) + +/* remove an item from a given bucket */ +#define HASH_DEL_IN_BKT(hh,head,hh_del) \ + (head).count--; \ + if ((head).hh_head == hh_del) { \ + (head).hh_head = hh_del->hh_next; \ + } \ + if (hh_del->hh_prev) { \ + hh_del->hh_prev->hh_next = hh_del->hh_next; \ + } \ + if (hh_del->hh_next) { \ + hh_del->hh_next->hh_prev = hh_del->hh_prev; \ + } + +/* Bucket expansion has the effect of doubling the number of buckets + * and redistributing the items into the new buckets. Ideally the + * items will distribute more or less evenly into the new buckets + * (the extent to which this is true is a measure of the quality of + * the hash function as it applies to the key domain). + * + * With the items distributed into more buckets, the chain length + * (item count) in each bucket is reduced. Thus by expanding buckets + * the hash keeps a bound on the chain length. This bounded chain + * length is the essence of how a hash provides constant time lookup. + * + * The calculation of tbl->ideal_chain_maxlen below deserves some + * explanation. First, keep in mind that we're calculating the ideal + * maximum chain length based on the *new* (doubled) bucket count. + * In fractions this is just n/b (n=number of items,b=new num buckets). + * Since the ideal chain length is an integer, we want to calculate + * ceil(n/b). We don't depend on floating point arithmetic in this + * hash, so to calculate ceil(n/b) with integers we could write + * + * ceil(n/b) = (n/b) + ((n%b)?1:0) + * + * and in fact a previous version of this hash did just that. + * But now we have improved things a bit by recognizing that b is + * always a power of two. We keep its base 2 log handy (call it lb), + * so now we can write this with a bit shift and logical AND: + * + * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0) + * + */ +#define HASH_EXPAND_BUCKETS(tbl) \ +do { \ + unsigned _he_bkt; \ + unsigned _he_bkt_i; \ + struct UT_hash_handle *_he_thh, *_he_hh_nxt; \ + UT_hash_bucket *_he_new_buckets, *_he_newbkt; \ + _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \ + 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ + if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \ + memset(_he_new_buckets, 0, \ + 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ + tbl->ideal_chain_maxlen = \ + (tbl->num_items >> (tbl->log2_num_buckets+1)) + \ + ((tbl->num_items & ((tbl->num_buckets*2)-1)) ? 1 : 0); \ + tbl->nonideal_items = 0; \ + for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \ + { \ + _he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \ + while (_he_thh) { \ + _he_hh_nxt = _he_thh->hh_next; \ + HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2, _he_bkt); \ + _he_newbkt = &(_he_new_buckets[ _he_bkt ]); \ + if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \ + tbl->nonideal_items++; \ + _he_newbkt->expand_mult = _he_newbkt->count / \ + tbl->ideal_chain_maxlen; \ + } \ + _he_thh->hh_prev = NULL; \ + _he_thh->hh_next = _he_newbkt->hh_head; \ + if (_he_newbkt->hh_head) _he_newbkt->hh_head->hh_prev = \ + _he_thh; \ + _he_newbkt->hh_head = _he_thh; \ + _he_thh = _he_hh_nxt; \ + } \ + } \ + uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ + tbl->num_buckets *= 2; \ + tbl->log2_num_buckets++; \ + tbl->buckets = _he_new_buckets; \ + tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \ + (tbl->ineff_expands+1) : 0; \ + if (tbl->ineff_expands > 1) { \ + tbl->noexpand=1; \ + uthash_noexpand_fyi(tbl); \ + } \ + uthash_expand_fyi(tbl); \ +} while(0) + + +/* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */ +/* Note that HASH_SORT assumes the hash handle name to be hh. + * HASH_SRT was added to allow the hash handle name to be passed in. */ +#define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn) +#define HASH_SRT(hh,head,cmpfcn) \ +do { \ + unsigned _hs_i; \ + unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \ + struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \ + if (head) { \ + _hs_insize = 1; \ + _hs_looping = 1; \ + _hs_list = &((head)->hh); \ + while (_hs_looping) { \ + _hs_p = _hs_list; \ + _hs_list = NULL; \ + _hs_tail = NULL; \ + _hs_nmerges = 0; \ + while (_hs_p) { \ + _hs_nmerges++; \ + _hs_q = _hs_p; \ + _hs_psize = 0; \ + for ( _hs_i = 0; _hs_i < _hs_insize; _hs_i++ ) { \ + _hs_psize++; \ + _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ + ((void*)((char*)(_hs_q->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + if (! (_hs_q) ) break; \ + } \ + _hs_qsize = _hs_insize; \ + while ((_hs_psize > 0) || ((_hs_qsize > 0) && _hs_q )) { \ + if (_hs_psize == 0) { \ + _hs_e = _hs_q; \ + _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ + ((void*)((char*)(_hs_q->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + _hs_qsize--; \ + } else if ( (_hs_qsize == 0) || !(_hs_q) ) { \ + _hs_e = _hs_p; \ + if (_hs_p){ \ + _hs_p = (UT_hash_handle*)((_hs_p->next) ? \ + ((void*)((char*)(_hs_p->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + } \ + _hs_psize--; \ + } else if (( \ + cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \ + DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \ + ) <= 0) { \ + _hs_e = _hs_p; \ + if (_hs_p){ \ + _hs_p = (UT_hash_handle*)((_hs_p->next) ? \ + ((void*)((char*)(_hs_p->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + } \ + _hs_psize--; \ + } else { \ + _hs_e = _hs_q; \ + _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ + ((void*)((char*)(_hs_q->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + _hs_qsize--; \ + } \ + if ( _hs_tail ) { \ + _hs_tail->next = ((_hs_e) ? \ + ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL); \ + } else { \ + _hs_list = _hs_e; \ + } \ + if (_hs_e) { \ + _hs_e->prev = ((_hs_tail) ? \ + ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL); \ + } \ + _hs_tail = _hs_e; \ + } \ + _hs_p = _hs_q; \ + } \ + if (_hs_tail){ \ + _hs_tail->next = NULL; \ + } \ + if ( _hs_nmerges <= 1 ) { \ + _hs_looping=0; \ + (head)->hh.tbl->tail = _hs_tail; \ + DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \ + } \ + _hs_insize *= 2; \ + } \ + HASH_FSCK(hh,head); \ + } \ +} while (0) + +/* This function selects items from one hash into another hash. + * The end result is that the selected items have dual presence + * in both hashes. There is no copy of the items made; rather + * they are added into the new hash through a secondary hash + * hash handle that must be present in the structure. */ +#define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \ +do { \ + unsigned _src_bkt, _dst_bkt; \ + void *_last_elt=NULL, *_elt; \ + UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \ + ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \ + if (src) { \ + for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \ + for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \ + _src_hh; \ + _src_hh = _src_hh->hh_next) { \ + _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \ + if (cond(_elt)) { \ + _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \ + _dst_hh->key = _src_hh->key; \ + _dst_hh->keylen = _src_hh->keylen; \ + _dst_hh->hashv = _src_hh->hashv; \ + _dst_hh->prev = _last_elt; \ + _dst_hh->next = NULL; \ + if (_last_elt_hh) { _last_elt_hh->next = _elt; } \ + if (!dst) { \ + DECLTYPE_ASSIGN(dst,_elt); \ + HASH_MAKE_TABLE(hh_dst,dst); \ + } else { \ + _dst_hh->tbl = (dst)->hh_dst.tbl; \ + } \ + HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \ + HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh); \ + (dst)->hh_dst.tbl->num_items++; \ + _last_elt = _elt; \ + _last_elt_hh = _dst_hh; \ + } \ + } \ + } \ + } \ + HASH_FSCK(hh_dst,dst); \ +} while (0) + +#define HASH_CLEAR(hh,head) \ +do { \ + if (head) { \ + uthash_free((head)->hh.tbl->buckets, \ + (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \ + HASH_BLOOM_FREE((head)->hh.tbl); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + (head)=NULL; \ + } \ +} while(0) + +#define HASH_OVERHEAD(hh,head) \ + (size_t)((((head)->hh.tbl->num_items * sizeof(UT_hash_handle)) + \ + ((head)->hh.tbl->num_buckets * sizeof(UT_hash_bucket)) + \ + (sizeof(UT_hash_table)) + \ + (HASH_BLOOM_BYTELEN))) + +#ifdef NO_DECLTYPE +#define HASH_ITER(hh,head,el,tmp) \ +for((el)=(head), (*(char**)(&(tmp)))=(char*)((head)?(head)->hh.next:NULL); \ + el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL)) +#else +#define HASH_ITER(hh,head,el,tmp) \ +for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL); \ + el; (el)=(tmp),(tmp)=DECLTYPE(el)((tmp)?(tmp)->hh.next:NULL)) +#endif + +/* obtain a count of items in the hash */ +#define HASH_COUNT(head) HASH_CNT(hh,head) +#define HASH_CNT(hh,head) ((head)?((head)->hh.tbl->num_items):0) + +typedef struct UT_hash_bucket { + struct UT_hash_handle *hh_head; + unsigned count; + + /* expand_mult is normally set to 0. In this situation, the max chain length + * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If + * the bucket's chain exceeds this length, bucket expansion is triggered). + * However, setting expand_mult to a non-zero value delays bucket expansion + * (that would be triggered by additions to this particular bucket) + * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH. + * (The multiplier is simply expand_mult+1). The whole idea of this + * multiplier is to reduce bucket expansions, since they are expensive, in + * situations where we know that a particular bucket tends to be overused. + * It is better to let its chain length grow to a longer yet-still-bounded + * value, than to do an O(n) bucket expansion too often. + */ + unsigned expand_mult; + +} UT_hash_bucket; + +/* random signature used only to find hash tables in external analysis */ +#define HASH_SIGNATURE 0xa0111fe1 +#define HASH_BLOOM_SIGNATURE 0xb12220f2 + +typedef struct UT_hash_table { + UT_hash_bucket *buckets; + unsigned num_buckets, log2_num_buckets; + unsigned num_items; + struct UT_hash_handle *tail; /* tail hh in app order, for fast append */ + ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */ + + /* in an ideal situation (all buckets used equally), no bucket would have + * more than ceil(#items/#buckets) items. that's the ideal chain length. */ + unsigned ideal_chain_maxlen; + + /* nonideal_items is the number of items in the hash whose chain position + * exceeds the ideal chain maxlen. these items pay the penalty for an uneven + * hash distribution; reaching them in a chain traversal takes >ideal steps */ + unsigned nonideal_items; + + /* ineffective expands occur when a bucket doubling was performed, but + * afterward, more than half the items in the hash had nonideal chain + * positions. If this happens on two consecutive expansions we inhibit any + * further expansion, as it's not helping; this happens when the hash + * function isn't a good fit for the key domain. When expansion is inhibited + * the hash will still work, albeit no longer in constant time. */ + unsigned ineff_expands, noexpand; + + uint32_t signature; /* used only to find hash tables in external analysis */ +#ifdef HASH_BLOOM + uint32_t bloom_sig; /* used only to test bloom exists in external analysis */ + uint8_t *bloom_bv; + char bloom_nbits; +#endif + +} UT_hash_table; + +typedef struct UT_hash_handle { + struct UT_hash_table *tbl; + void *prev; /* prev element in app order */ + void *next; /* next element in app order */ + struct UT_hash_handle *hh_prev; /* previous hh in bucket order */ + struct UT_hash_handle *hh_next; /* next hh in bucket order */ + void *key; /* ptr to enclosing struct's key */ + unsigned keylen; /* enclosing struct's key len */ + unsigned hashv; /* result of hash-fcn(key) */ +} UT_hash_handle; + +#endif /* UTHASH_H */ -- cgit v1.2.3