summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2014-07-21 22:29:16 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2014-08-13 22:56:32 -0700
commit870e63be7360b5a0097a27656048e853bc720464 (patch)
treee8f19ee2d62e529115cb71dcda5f3298cca7d389 /src
parent650ad87f35f4405a2ca8270d2b2835daa442e5f1 (diff)
Initial commit
Diffstat (limited to 'src')
-rw-r--r--src/blocks.c747
-rw-r--r--src/bstrlib.c2979
-rw-r--r--src/bstrlib.h304
-rw-r--r--src/case_fold_switch.c2637
-rw-r--r--src/casefold.c2699
-rw-r--r--src/debug.h36
-rw-r--r--src/detab.c48
-rw-r--r--src/getopt.c199
-rw-r--r--src/html.c276
-rw-r--r--src/inlines.c998
-rw-r--r--src/main.c102
-rw-r--r--src/print.c168
-rw-r--r--src/scanners.h15
-rw-r--r--src/scanners.re238
-rw-r--r--src/stmd.h121
-rw-r--r--src/utf8.c106
-rw-r--r--src/utf8.h6
-rw-r--r--src/uthash.h948
18 files changed, 12627 insertions, 0 deletions
diff --git a/src/blocks.c b/src/blocks.c
new file mode 100644
index 0000000..2776231
--- /dev/null
+++ b/src/blocks.c
@@ -0,0 +1,747 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <ctype.h>
+#include "bstrlib.h"
+#include "stmd.h"
+#include "uthash.h"
+#include "debug.h"
+#include "scanners.h"
+
+static block* make_block(int tag, int start_line, int start_column)
+{
+ block* e;
+ e = (block*) malloc(sizeof(block));
+ e->tag = tag;
+ e->open = true;
+ e->last_line_blank = false;
+ e->start_line = start_line;
+ e->start_column = start_column;
+ e->end_line = start_line;
+ e->children = NULL;
+ e->last_child = NULL;
+ e->parent = NULL;
+ e->top = NULL;
+ e->attributes.refmap = NULL;
+ e->string_content = bfromcstr("");
+ e->inline_content = NULL;
+ e->next = NULL;
+ e->prev = NULL;
+ return e;
+}
+
+// Create a root document block.
+extern block* make_document()
+{
+ block * e = make_block(document, 1, 1);
+ reference * map = NULL;
+ reference ** refmap;
+ refmap = (reference**) malloc(sizeof(reference*));
+ *refmap = map;
+ e->attributes.refmap = refmap;
+ e->top = e;
+ return e;
+}
+
+// Returns true if line has only space characters, else false.
+bool is_blank(bstring s, int offset)
+{
+ char c;
+ while ((c = bchar(s, offset))) {
+ if (c == '\n') {
+ return true;
+ } else if (c == ' ') {
+ offset++;
+ } else {
+ return false;
+ }
+ }
+ return true;
+}
+
+static inline bool can_contain(int parent_type, int child_type)
+{
+ return ( parent_type == document ||
+ parent_type == block_quote ||
+ parent_type == list_item ||
+ (parent_type == list && child_type == list_item) );
+}
+
+static inline bool accepts_lines(int block_type)
+{
+ return (block_type == paragraph ||
+ block_type == atx_header ||
+ block_type == indented_code ||
+ block_type == fenced_code);
+}
+
+static int add_line(block* block, bstring ln, int offset)
+{
+ bstring s = bmidstr(ln, offset, blength(ln) - offset);
+ check(block->open, "attempted to add line (%s) to closed container (%d)",
+ ln->data, block->tag);
+ check(bformata(block->string_content, "%s", s->data) == 0,
+ "could not append line to string_content");
+ bdestroy(s);
+ return 0;
+ error:
+ return -1;
+}
+
+static int remove_trailing_blank_lines(bstring ln)
+{
+ bstring tofind = bfromcstr(" \t\r\n");
+ int pos;
+ // find last nonspace:
+ pos = bninchrr(ln, blength(ln) - 1, tofind);
+ if (pos == BSTR_ERR) { // all spaces
+ bassigncstr(ln, "");
+ } else {
+ // find next newline after it
+ pos = bstrchrp(ln, '\n', pos);
+ if (pos != BSTR_ERR) {
+ check(bdelete(ln, pos, blength(ln) - pos) != BSTR_ERR,
+ "failed to delete trailing blank lines");
+ }
+ }
+ bdestroy(tofind);
+ return 0;
+ error:
+ return -1;
+}
+
+// Check to see if a block ends with a blank line, descending
+// if needed into lists and sublists.
+static bool ends_with_blank_line(block* block)
+{
+ if (block->last_line_blank) {
+ return true;
+ }
+ if ((block->tag == list || block->tag == list_item) && block->last_child) {
+ return ends_with_blank_line(block->last_child);
+ } else {
+ return false;
+ }
+}
+
+// Break out of all containing lists
+static int break_out_of_lists(block ** bptr, int line_number)
+{
+ block * container = *bptr;
+ block * b = container->top;
+ // find first containing list:
+ while (b && b->tag != list) {
+ b = b->last_child;
+ }
+ if (b) {
+ while (container && container != b) {
+ finalize(container, line_number);
+ container = container->parent;
+ }
+ finalize(b, line_number);
+ *bptr = b->parent;
+ }
+ return 0;
+}
+
+
+extern int finalize(block* b, int line_number)
+{
+ int firstlinelen;
+ int pos;
+ block* item;
+ block* subitem;
+
+ check(b != NULL, "finalize called on null block");
+ if (!b->open) {
+ return 0; // don't do anything if the block is already closed
+ }
+ b->open = false;
+ if (line_number > b->start_line) {
+ b->end_line = line_number - 1;
+ } else {
+ b->end_line = line_number;
+ }
+
+ switch (b->tag) {
+
+ case paragraph:
+ pos = 0;
+ while (bchar(b->string_content, 0) == '[' &&
+ (pos = parse_reference(b->string_content,
+ b->top->attributes.refmap))) {
+ bdelete(b->string_content, 0, pos);
+ }
+ if (is_blank(b->string_content, 0)) {
+ b->tag = reference_def;
+ }
+ break;
+
+ case indented_code:
+ remove_trailing_blank_lines(b->string_content);
+ bformata(b->string_content, "\n");
+ break;
+
+ case fenced_code:
+ // first line of contents becomes info
+ firstlinelen = bstrchr(b->string_content, '\n');
+ b->attributes.fenced_code_data.info =
+ bmidstr(b->string_content, 0, firstlinelen);
+ bdelete(b->string_content, 0, firstlinelen + 1); // +1 for \n
+ btrimws(b->attributes.fenced_code_data.info);
+ unescape(b->attributes.fenced_code_data.info);
+ break;
+
+ case list: // determine tight/loose status
+ b->attributes.list_data.tight = true; // tight by default
+ item = b->children;
+
+ while (item) {
+ // check for non-final non-empty list item ending with blank line:
+ if (item->last_line_blank && item->next) {
+ b->attributes.list_data.tight = false;
+ break;
+ }
+ // recurse into children of list item, to see if there are
+ // spaces between them:
+ subitem = item->children;
+ while (subitem) {
+ if (ends_with_blank_line(subitem) &&
+ (item->next || subitem->next)) {
+ b->attributes.list_data.tight = false;
+ break;
+ }
+ subitem = subitem->next;
+ }
+ if (!(b->attributes.list_data.tight)) {
+ break;
+ }
+ item = item->next;
+ }
+
+ break;
+
+ default:
+ break;
+ }
+
+ return 0;
+ error:
+ return -1;
+}
+
+// Add a block as child of another. Return pointer to child.
+extern block* add_child(block* parent,
+ int block_type, int start_line, int start_column)
+{
+ // if 'parent' isn't the kind of block that can accept this child,
+ // then back up til we hit a block that can.
+ while (!can_contain(parent->tag, block_type)) {
+ finalize(parent, start_line);
+ parent = parent->parent;
+ }
+
+ check(parent != NULL, "parent container cannot accept children");
+
+ block* child = make_block(block_type, start_line, start_column);
+ child->parent = parent;
+ child->top = parent->top;
+
+ if (parent->last_child) {
+ parent->last_child->next = child;
+ child->prev = parent->last_child;
+ } else {
+ parent->children = child;
+ child->prev = NULL;
+ }
+ parent->last_child = child;
+ return child;
+ error:
+ return NULL;
+}
+
+// Free a block list and any children.
+extern void free_blocks(block* e)
+{
+ block * next;
+ while (e != NULL) {
+ next = e->next;
+ free_inlines(e->inline_content);
+ bdestroy(e->string_content);
+ if (e->tag == fenced_code) {
+ bdestroy(e->attributes.fenced_code_data.info);
+ } else if (e->tag == document) {
+ free_reference_map(e->attributes.refmap);
+ }
+ free_blocks(e->children);
+ free(e);
+ e = next;
+ }
+}
+
+// Walk through block and all children, recursively, parsing
+// string content into inline content where appropriate.
+int process_inlines(block* cur, reference** refmap)
+{
+ switch (cur->tag) {
+
+ case paragraph:
+ case atx_header:
+ case setext_header:
+ check(cur->string_content != NULL, "string_content is NULL");
+ cur->inline_content = parse_inlines(cur->string_content, refmap);
+ bdestroy(cur->string_content);
+ cur->string_content = NULL;
+ break;
+
+ default:
+ break;
+ }
+
+ block * child = cur->children;
+ while (child != NULL) {
+ process_inlines(child, refmap);
+ child = child->next;
+ }
+
+ return 0;
+ error:
+ return -1;
+}
+
+// Attempts to parse a list item marker (bullet or enumerated).
+// On success, returns length of the marker, and populates
+// data with the details. On failure, returns 0.
+static int parse_list_marker(bstring ln, int pos,
+ struct ListData ** dataptr)
+{
+ char c;
+ int startpos;
+ int start = 1;
+ struct ListData * data;
+
+ startpos = pos;
+ c = bchar(ln, pos);
+
+ if ((c == '*' || c == '-' || c == '+') && !scan_hrule(ln, pos)) {
+ pos++;
+ if (!isspace(bchar(ln, pos))) {
+ return 0;
+ }
+ data = malloc(sizeof(struct ListData));
+ data->marker_offset = 0; // will be adjusted later
+ data->list_type = bullet;
+ data->bullet_char = c;
+ data->start = 1;
+ data->delimiter = period;
+ data->tight = false;
+
+ } else if (isdigit(c)) {
+
+ pos++;
+ while (isdigit(bchar(ln, pos))) {
+ pos++;
+ }
+
+ if (!sscanf((char *) ln->data + startpos, "%d", &start)) {
+ log_err("sscanf failed");
+ return 0;
+ }
+
+ c = bchar(ln, pos);
+ if (c == '.' || c == ')') {
+ pos++;
+ if (!isspace(bchar(ln, pos))) {
+ return 0;
+ }
+ data = malloc(sizeof(struct ListData));
+ data->marker_offset = 0; // will be adjusted later
+ data->list_type = ordered;
+ data->bullet_char = 0;
+ data->start = start;
+ data->delimiter = (c == '.' ? period : parens);
+ data->tight = false;
+ } else {
+ return 0;
+ }
+
+ } else {
+ return 0;
+ }
+
+ *dataptr = data;
+ return (pos - startpos);
+}
+
+// Return 1 if list item belongs in list, else 0.
+static int lists_match(struct ListData list_data,
+ struct ListData item_data)
+{
+ return (list_data.list_type == item_data.list_type &&
+ list_data.delimiter == item_data.delimiter &&
+ // list_data.marker_offset == item_data.marker_offset &&
+ list_data.bullet_char == item_data.bullet_char);
+}
+
+// Process one line at a time, modifying a block.
+// Returns 0 if successful. curptr is changed to point to
+// the currently open block.
+extern int incorporate_line(bstring ln, int line_number, block** curptr)
+{
+ block* last_matched_container;
+ int offset = 0;
+ int matched = 0;
+ int lev = 0;
+ int i;
+ struct ListData * data = NULL;
+ bool all_matched = true;
+ block* container;
+ block* cur = *curptr;
+ bool blank = false;
+ int first_nonspace;
+ int indent;
+
+ // detab input line
+ check(bdetab(ln, 1) != BSTR_ERR,
+ "invalid UTF-8 sequence in line %d\n", line_number);
+
+ // container starts at the document root.
+ container = cur->top;
+
+ // for each containing block, try to parse the associated line start.
+ // bail out on failure: container will point to the last matching block.
+
+ while (container->last_child && container->last_child->open) {
+ container = container->last_child;
+
+ first_nonspace = offset;
+ while (bchar(ln, first_nonspace) == ' ') {
+ first_nonspace++;
+ }
+
+ indent = first_nonspace - offset;
+ blank = bchar(ln, first_nonspace) == '\n';
+
+ if (container->tag == block_quote) {
+
+ matched = indent <= 3 && bchar(ln, first_nonspace) == '>';
+ if (matched) {
+ offset = first_nonspace + 1;
+ if (bchar(ln, offset) == ' ') {
+ offset++;
+ }
+ } else {
+ all_matched = false;
+ }
+
+ } else if (container->tag == list_item) {
+
+ if (indent >= container->attributes.list_data.marker_offset +
+ container->attributes.list_data.padding) {
+ offset += container->attributes.list_data.marker_offset +
+ container->attributes.list_data.padding;
+ } else if (blank) {
+ offset = first_nonspace;
+ } else {
+ all_matched = false;
+ }
+
+ } else if (container->tag == indented_code) {
+
+ if (indent >= CODE_INDENT) {
+ offset += CODE_INDENT;
+ } else if (blank) {
+ offset = first_nonspace;
+ } else {
+ all_matched = false;
+ }
+
+ } else if (container->tag == atx_header ||
+ container->tag == setext_header) {
+
+ // a header can never contain more than one line
+ all_matched = false;
+
+ } else if (container->tag == fenced_code) {
+
+ // skip optional spaces of fence offset
+ i = container->attributes.fenced_code_data.fence_offset;
+ while (i > 0 && bchar(ln, offset) == ' ') {
+ offset++;
+ i--;
+ }
+
+ } else if (container->tag == html_block) {
+
+ if (blank) {
+ all_matched = false;
+ }
+
+ } else if (container->tag == paragraph) {
+
+ if (blank) {
+ container->last_line_blank =true;
+ all_matched = false;
+ }
+
+ }
+
+ if (!all_matched) {
+ container = container->parent; // back up to last matching block
+ break;
+ }
+ }
+
+ last_matched_container = container;
+
+ // check to see if we've hit 2nd blank line, break out of list:
+ if (blank && container->last_line_blank) {
+ break_out_of_lists(&container, line_number);
+ }
+
+ // unless last matched container is code block, try new container starts:
+ while (container->tag != fenced_code && container->tag != indented_code &&
+ container->tag != html_block) {
+
+ first_nonspace = offset;
+ while (bchar(ln, first_nonspace) == ' ') {
+ first_nonspace++;
+ }
+
+ indent = first_nonspace - offset;
+ blank = bchar(ln, first_nonspace) == '\n';
+
+ if (indent >= CODE_INDENT) {
+
+ if (cur->tag != paragraph && !blank) {
+ offset += CODE_INDENT;
+ container = add_child(container, indented_code, line_number, offset + 1);
+ } else { // indent > 4 in lazy line
+ break;
+ }
+
+ } else if (bchar(ln, first_nonspace) == '>') {
+
+ offset = first_nonspace + 1;
+ // optional following character
+ if (bchar(ln, offset) == ' ') {
+ offset++;
+ }
+ container = add_child(container, block_quote, line_number, offset + 1);
+
+ } else if ((matched = scan_atx_header_start(ln, first_nonspace))) {
+
+ offset = first_nonspace + matched;
+ container = add_child(container, atx_header, line_number, offset + 1);
+ int hashpos = bstrchrp(ln, '#', first_nonspace);
+ check(hashpos != BSTR_ERR, "no # found in atx header start");
+ int level = 0;
+ while (bchar(ln, hashpos) == '#') {
+ level++;
+ hashpos++;
+ }
+ container->attributes.header_level = level;
+
+ } else if ((matched = scan_open_code_fence(ln, first_nonspace))) {
+
+ container = add_child(container, fenced_code, line_number,
+ first_nonspace + 1);
+ container->attributes.fenced_code_data.fence_char = bchar(ln,
+ first_nonspace);
+ container->attributes.fenced_code_data.fence_length = matched;
+ container->attributes.fenced_code_data.fence_offset =
+ first_nonspace - offset;
+ offset = first_nonspace + matched;
+
+ } else if ((matched = scan_html_block_tag(ln, first_nonspace))) {
+
+ container = add_child(container, html_block, line_number,
+ first_nonspace + 1);
+ // note, we don't adjust offset because the tag is part of the text
+
+ } else if (container->tag == paragraph &&
+ (lev = scan_setext_header_line(ln, first_nonspace)) &&
+ // check that there is only one line in the paragraph:
+ bstrrchrp(container->string_content, '\n',
+ blength(container->string_content) - 2) == BSTR_ERR) {
+
+ container->tag = setext_header;
+ container->attributes.header_level = lev;
+ offset = blength(ln) - 1;
+
+ } else if (!(container->tag == paragraph && !all_matched) &&
+ (matched = scan_hrule(ln, first_nonspace))) {
+
+ // it's only now that we know the line is not part of a setext header:
+ container = add_child(container, hrule, line_number, first_nonspace + 1);
+ finalize(container, line_number);
+ container = container->parent;
+ offset = blength(ln) - 1;
+
+ } else if ((matched = parse_list_marker(ln, first_nonspace, &data))) {
+
+ // compute padding:
+ offset = first_nonspace + matched;
+ i = 0;
+ while (i <= 5 && bchar(ln, offset + i) == ' ') {
+ i++;
+ }
+ // i = number of spaces after marker, up to 5
+ if (i >= 5 || i < 1 || bchar(ln, offset) == '\n') {
+ data->padding = matched + 1;
+ if (i > 0) {
+ offset += 1;
+ }
+ } else {
+ data->padding = matched + i;
+ offset += i;
+ }
+
+ // check container; if it's a list, see if this list item
+ // can continue the list; otherwise, create a list container.
+
+ data->marker_offset = indent;
+
+ if (container->tag != list ||
+ !lists_match(container->attributes.list_data, *data)) {
+ container = add_child(container, list, line_number,
+ first_nonspace + 1);
+ container->attributes.list_data = *data;
+ }
+
+ // add the list item
+ container = add_child(container, list_item, line_number,
+ first_nonspace + 1);
+ container->attributes.list_data = *data;
+ free(data);
+
+ } else {
+ break;
+ }
+
+ if (accepts_lines(container->tag)) {
+ // if it's a line container, it can't contain other containers
+ break;
+ }
+ }
+
+ // what remains at offset is a text line. add the text to the
+ // appropriate container.
+
+ first_nonspace = offset;
+ while (bchar(ln, first_nonspace) == ' ') {
+ first_nonspace++;
+ }
+
+ indent = first_nonspace - offset;
+ blank = bchar(ln, first_nonspace) == '\n';
+
+ // block quote lines are never blank as they start with >
+ // and we don't count blanks in fenced code for purposes of tight/loose
+ // lists or breaking out of lists. we also don't set last_line_blank
+ // on an empty list item.
+ container->last_line_blank = (blank &&
+ container->tag != block_quote &&
+ container->tag != fenced_code &&
+ !(container->tag == list_item &&
+ container->children == NULL &&
+ container->start_line == line_number));
+
+ block *cont = container;
+ while (cont->parent) {
+ cont->parent->last_line_blank = false;
+ cont = cont->parent;
+ }
+
+ if (cur != last_matched_container &&
+ container == last_matched_container &&
+ !blank &&
+ cur->tag == paragraph &&
+ blength(cur->string_content) > 0) {
+
+ check(add_line(cur, ln, offset) == 0, "could not add line");
+
+ } else { // not a lazy continuation
+
+ // finalize any blocks that were not matched and set cur to container:
+ while (cur != last_matched_container) {
+
+ finalize(cur, line_number);
+ cur = cur->parent;
+ check(cur != NULL, "cur is NULL, last_matched_container->tag = %d",
+ last_matched_container->tag);
+
+ }
+
+ if (container->tag == indented_code) {
+
+ check(add_line(container, ln, offset) == 0, "could not add line");
+
+ } else if (container->tag == fenced_code) {
+
+ matched = (indent <= 3
+ && bchar(ln, first_nonspace) == container->attributes.fenced_code_data.fence_char)
+ && scan_close_code_fence(ln, first_nonspace,
+ container->attributes.fenced_code_data.fence_length);
+ if (matched) {
+ // if closing fence, don't add line to container; instead, close it:
+ finalize(container, line_number);
+ container = container->parent; // back up to parent
+ } else {
+ check(add_line(container, ln, offset) == 0, "could not add line");
+ }
+
+ } else if (container->tag == html_block) {
+
+ check(add_line(container, ln, offset) == 0, "could not add line");
+
+ } else if (blank) {
+
+ // ??? do nothing
+
+ } else if (container->tag == atx_header) {
+
+ // chop off trailing ###s...use a scanner?
+ brtrimws(ln);
+ int p = blength(ln) - 1;
+ int numhashes = 0;
+ // if string ends in #s, remove these:
+ while (bchar(ln, p) == '#') {
+ p--;
+ numhashes++;
+ }
+ if (bchar(ln, p) == '\\') {
+ // the last # was escaped, so we include it.
+ p++;
+ numhashes--;
+ }
+ check(bdelete(ln, p + 1, numhashes) != BSTR_ERR,
+ "could not delete final hashes");
+ check(add_line(container, ln, first_nonspace) == 0, "could not add line");
+ finalize(container, line_number);
+ container = container->parent;
+
+ } else if (accepts_lines(container->tag)) {
+
+ check(add_line(container, ln, first_nonspace) == 0, "could not add line");
+
+ } else if (container->tag != hrule && container->tag != setext_header) {
+
+ // create paragraph container for line
+ container = add_child(container, paragraph, line_number, first_nonspace + 1);
+ check(add_line(container, ln, first_nonspace) == 0, "could not add line");
+
+ } else {
+
+ log_warn("Line %d with container type %d did not match any condition:\n\"%s\"",
+ line_number, container->tag, ln->data);
+
+ }
+ *curptr = container;
+ }
+
+ return 0;
+ error:
+ return -1;
+}
+
diff --git a/src/bstrlib.c b/src/bstrlib.c
new file mode 100644
index 0000000..1b19dbe
--- /dev/null
+++ b/src/bstrlib.c
@@ -0,0 +1,2979 @@
+/*
+ * This source file is part of the bstring string library. This code was
+ * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause
+ * BSD open source license or GPL v2.0. Refer to the accompanying documentation
+ * for details on usage and license.
+ */
+
+/*
+ * bstrlib.c
+ *
+ * This file is the core module for implementing the bstring functions.
+ */
+
+#if defined (_MSC_VER)
+/* These warnings from MSVC++ are totally pointless. */
+# define _CRT_SECURE_NO_WARNINGS
+#endif
+
+#include <stdio.h>
+#include <stddef.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include "bstrlib.h"
+
+/* Optionally include a mechanism for debugging memory */
+
+#if defined(MEMORY_DEBUG) || defined(BSTRLIB_MEMORY_DEBUG)
+#include "memdbg.h"
+#endif
+
+#ifndef bstr__alloc
+#define bstr__alloc(x) malloc (x)
+#endif
+
+#ifndef bstr__free
+#define bstr__free(p) free (p)
+#endif
+
+#ifndef bstr__realloc
+#define bstr__realloc(p,x) realloc ((p), (x))
+#endif
+
+#ifndef bstr__memcpy
+#define bstr__memcpy(d,s,l) memcpy ((d), (s), (l))
+#endif
+
+#ifndef bstr__memmove
+#define bstr__memmove(d,s,l) memmove ((d), (s), (l))
+#endif
+
+#ifndef bstr__memset
+#define bstr__memset(d,c,l) memset ((d), (c), (l))
+#endif
+
+#ifndef bstr__memcmp
+#define bstr__memcmp(d,c,l) memcmp ((d), (c), (l))
+#endif
+
+#ifndef bstr__memchr
+#define bstr__memchr(s,c,l) memchr ((s), (c), (l))
+#endif
+
+/* Just a length safe wrapper for memmove. */
+
+#define bBlockCopy(D,S,L) { if ((L) > 0) bstr__memmove ((D),(S),(L)); }
+
+/* Compute the snapped size for a given requested size. By snapping to powers
+ of 2 like this, repeated reallocations are avoided. */
+static int snapUpSize (int i) {
+ if (i < 8) {
+ i = 8;
+ } else {
+ unsigned int j;
+ j = (unsigned int) i;
+
+ j |= (j >> 1);
+ j |= (j >> 2);
+ j |= (j >> 4);
+ j |= (j >> 8); /* Ok, since int >= 16 bits */
+#if (UINT_MAX != 0xffff)
+ j |= (j >> 16); /* For 32 bit int systems */
+#if (UINT_MAX > 0xffffffffUL)
+ j |= (j >> 32); /* For 64 bit int systems */
+#endif
+#endif
+ /* Least power of two greater than i */
+ j++;
+ if ((int) j >= i) i = (int) j;
+ }
+ return i;
+}
+
+/* int balloc (bstring b, int len)
+ *
+ * Increase the size of the memory backing the bstring b to at least len.
+ */
+int balloc (bstring b, int olen) {
+ int len;
+ if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen <= 0 ||
+ b->mlen < b->slen || olen <= 0) {
+ return BSTR_ERR;
+ }
+
+ if (olen >= b->mlen) {
+ unsigned char * x;
+
+ if ((len = snapUpSize (olen)) <= b->mlen) return BSTR_OK;
+
+ /* Assume probability of a non-moving realloc is 0.125 */
+ if (7 * b->mlen < 8 * b->slen) {
+
+ /* If slen is close to mlen in size then use realloc to reduce
+ the memory defragmentation */
+
+ reallocStrategy:;
+
+ x = (unsigned char *) bstr__realloc (b->data, (size_t) len);
+ if (x == NULL) {
+
+ /* Since we failed, try allocating the tighest possible
+ allocation */
+
+ if (NULL == (x = (unsigned char *) bstr__realloc (b->data, (size_t) (len = olen)))) {
+ return BSTR_ERR;
+ }
+ }
+ } else {
+
+ /* If slen is not close to mlen then avoid the penalty of copying
+ the extra bytes that are allocated, but not considered part of
+ the string */
+
+ if (NULL == (x = (unsigned char *) bstr__alloc ((size_t) len))) {
+
+ /* Perhaps there is no available memory for the two
+ allocations to be in memory at once */
+
+ goto reallocStrategy;
+
+ } else {
+ if (b->slen) bstr__memcpy ((char *) x, (char *) b->data, (size_t) b->slen);
+ bstr__free (b->data);
+ }
+ }
+ b->data = x;
+ b->mlen = len;
+ b->data[b->slen] = (unsigned char) '\0';
+ }
+
+ return BSTR_OK;
+}
+
+/* int ballocmin (bstring b, int len)
+ *
+ * Set the size of the memory backing the bstring b to len or b->slen+1,
+ * whichever is larger. Note that repeated use of this function can degrade
+ * performance.
+ */
+int ballocmin (bstring b, int len) {
+ unsigned char * s;
+
+ if (b == NULL || b->data == NULL || (b->slen+1) < 0 || b->mlen <= 0 ||
+ b->mlen < b->slen || len <= 0) {
+ return BSTR_ERR;
+ }
+
+ if (len < b->slen + 1) len = b->slen + 1;
+
+ if (len != b->mlen) {
+ s = (unsigned char *) bstr__realloc (b->data, (size_t) len);
+ if (NULL == s) return BSTR_ERR;
+ s[b->slen] = (unsigned char) '\0';
+ b->data = s;
+ b->mlen = len;
+ }
+
+ return BSTR_OK;
+}
+
+/* bstring bfromcstr (const char * str)
+ *
+ * Create a bstring which contains the contents of the '\0' terminated char *
+ * buffer str.
+ */
+bstring bfromcstr (const char * str) {
+bstring b;
+int i;
+size_t j;
+
+ if (str == NULL) return NULL;
+ j = (strlen) (str);
+ i = snapUpSize ((int) (j + (2 - (j != 0))));
+ if (i <= (int) j) return NULL;
+
+ b = (bstring) bstr__alloc (sizeof (struct tagbstring));
+ if (NULL == b) return NULL;
+ b->slen = (int) j;
+ if (NULL == (b->data = (unsigned char *) bstr__alloc (b->mlen = i))) {
+ bstr__free (b);
+ return NULL;
+ }
+
+ bstr__memcpy (b->data, str, j+1);
+ return b;
+}
+
+/* bstring bfromcstralloc (int mlen, const char * str)
+ *
+ * Create a bstring which contains the contents of the '\0' terminated char *
+ * buffer str. The memory buffer backing the string is at least len
+ * characters in length.
+ */
+bstring bfromcstralloc (int mlen, const char * str) {
+bstring b;
+int i;
+size_t j;
+
+ if (str == NULL) return NULL;
+ j = (strlen) (str);
+ i = snapUpSize ((int) (j + (2 - (j != 0))));
+ if (i <= (int) j) return NULL;
+
+ b = (bstring) bstr__alloc (sizeof (struct tagbstring));
+ if (b == NULL) return NULL;
+ b->slen = (int) j;
+ if (i < mlen) i = mlen;
+
+ if (NULL == (b->data = (unsigned char *) bstr__alloc (b->mlen = i))) {
+ bstr__free (b);
+ return NULL;
+ }
+
+ bstr__memcpy (b->data, str, j+1);
+ return b;
+}
+
+/* bstring blk2bstr (const void * blk, int len)
+ *
+ * Create a bstring which contains the content of the block blk of length
+ * len.
+ */
+bstring blk2bstr (const void * blk, int len) {
+bstring b;
+int i;
+
+ if (blk == NULL || len < 0) return NULL;
+ b = (bstring) bstr__alloc (sizeof (struct tagbstring));
+ if (b == NULL) return NULL;
+ b->slen = len;
+
+ i = len + (2 - (len != 0));
+ i = snapUpSize (i);
+
+ b->mlen = i;
+
+ b->data = (unsigned char *) bstr__alloc ((size_t) b->mlen);
+ if (b->data == NULL) {
+ bstr__free (b);
+ return NULL;
+ }
+
+ if (len > 0) bstr__memcpy (b->data, blk, (size_t) len);
+ b->data[len] = (unsigned char) '\0';
+
+ return b;
+}
+
+/* char * bstr2cstr (const_bstring s, char z)
+ *
+ * Create a '\0' terminated char * buffer which is equal to the contents of
+ * the bstring s, except that any contained '\0' characters are converted
+ * to the character in z. This returned value should be freed with a
+ * bcstrfree () call, by the calling application.
+ */
+char * bstr2cstr (const_bstring b, char z) {
+int i, l;
+char * r;
+
+ if (b == NULL || b->slen < 0 || b->data == NULL) return NULL;
+ l = b->slen;
+ r = (char *) bstr__alloc ((size_t) (l + 1));
+ if (r == NULL) return r;
+
+ for (i=0; i < l; i ++) {
+ r[i] = (char) ((b->data[i] == '\0') ? z : (char) (b->data[i]));
+ }
+
+ r[l] = (unsigned char) '\0';
+
+ return r;
+}
+
+/* int bcstrfree (char * s)
+ *
+ * Frees a C-string generated by bstr2cstr (). This is normally unnecessary
+ * since it just wraps a call to bstr__free (), however, if bstr__alloc ()
+ * and bstr__free () have been redefined as a macros within the bstrlib
+ * module (via defining them in memdbg.h after defining
+ * BSTRLIB_MEMORY_DEBUG) with some difference in behaviour from the std
+ * library functions, then this allows a correct way of freeing the memory
+ * that allows higher level code to be independent from these macro
+ * redefinitions.
+ */
+int bcstrfree (char * s) {
+ if (s) {
+ bstr__free (s);
+ return BSTR_OK;
+ }
+ return BSTR_ERR;
+}
+
+/* int bconcat (bstring b0, const_bstring b1)
+ *
+ * Concatenate the bstring b1 to the bstring b0.
+ */
+int bconcat (bstring b0, const_bstring b1) {
+int len, d;
+bstring aux = (bstring) b1;
+
+ if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL) return BSTR_ERR;
+
+ d = b0->slen;
+ len = b1->slen;
+ if ((d | (b0->mlen - d) | len | (d + len)) < 0) return BSTR_ERR;
+
+ if (b0->mlen <= d + len + 1) {
+ ptrdiff_t pd = b1->data - b0->data;
+ if (0 <= pd && pd < b0->mlen) {
+ if (NULL == (aux = bstrcpy (b1))) return BSTR_ERR;
+ }
+ if (balloc (b0, d + len + 1) != BSTR_OK) {
+ if (aux != b1) bdestroy (aux);
+ return BSTR_ERR;
+ }
+ }
+
+ bBlockCopy (&b0->data[d], &aux->data[0], (size_t) len);
+ b0->data[d + len] = (unsigned char) '\0';
+ b0->slen = d + len;
+ if (aux != b1) bdestroy (aux);
+ return BSTR_OK;
+}
+
+/* int bconchar (bstring b, char c)
+/ *
+ * Concatenate the single character c to the bstring b.
+ */
+int bconchar (bstring b, char c) {
+int d;
+
+ if (b == NULL) return BSTR_ERR;
+ d = b->slen;
+ if ((d | (b->mlen - d)) < 0 || balloc (b, d + 2) != BSTR_OK) return BSTR_ERR;
+ b->data[d] = (unsigned char) c;
+ b->data[d + 1] = (unsigned char) '\0';
+ b->slen++;
+ return BSTR_OK;
+}
+
+/* int bcatcstr (bstring b, const char * s)
+ *
+ * Concatenate a char * string to a bstring.
+ */
+int bcatcstr (bstring b, const char * s) {
+char * d;
+int i, l;
+
+ if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen < b->slen
+ || b->mlen <= 0 || s == NULL) return BSTR_ERR;
+
+ /* Optimistically concatenate directly */
+ l = b->mlen - b->slen;
+ d = (char *) &b->data[b->slen];
+ for (i=0; i < l; i++) {
+ if ((*d++ = *s++) == '\0') {
+ b->slen += i;
+ return BSTR_OK;
+ }
+ }
+ b->slen += i;
+
+ /* Need to explicitely resize and concatenate tail */
+ return bcatblk (b, (const void *) s, (int) strlen (s));
+}
+
+/* int bcatblk (bstring b, const void * s, int len)
+ *
+ * Concatenate a fixed length buffer to a bstring.
+ */
+int bcatblk (bstring b, const void * s, int len) {
+int nl;
+
+ if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen < b->slen
+ || b->mlen <= 0 || s == NULL || len < 0) return BSTR_ERR;
+
+ if (0 > (nl = b->slen + len)) return BSTR_ERR; /* Overflow? */
+ if (b->mlen <= nl && 0 > balloc (b, nl + 1)) return BSTR_ERR;
+
+ bBlockCopy (&b->data[b->slen], s, (size_t) len);
+ b->slen = nl;
+ b->data[nl] = (unsigned char) '\0';
+ return BSTR_OK;
+}
+
+/* bstring bstrcpy (const_bstring b)
+ *
+ * Create a copy of the bstring b.
+ */
+bstring bstrcpy (const_bstring b) {
+bstring b0;
+int i,j;
+
+ /* Attempted to copy an invalid string? */
+ if (b == NULL || b->slen < 0 || b->data == NULL) return NULL;
+
+ b0 = (bstring) bstr__alloc (sizeof (struct tagbstring));
+ if (b0 == NULL) {
+ /* Unable to allocate memory for string header */
+ return NULL;
+ }
+
+ i = b->slen;
+ j = snapUpSize (i + 1);
+
+ b0->data = (unsigned char *) bstr__alloc (j);
+ if (b0->data == NULL) {
+ j = i + 1;
+ b0->data = (unsigned char *) bstr__alloc (j);
+ if (b0->data == NULL) {
+ /* Unable to allocate memory for string data */
+ bstr__free (b0);
+ return NULL;
+ }
+ }
+
+ b0->mlen = j;
+ b0->slen = i;
+
+ if (i) bstr__memcpy ((char *) b0->data, (char *) b->data, i);
+ b0->data[b0->slen] = (unsigned char) '\0';
+
+ return b0;
+}
+
+/* int bassign (bstring a, const_bstring b)
+ *
+ * Overwrite the string a with the contents of string b.
+ */
+int bassign (bstring a, const_bstring b) {
+ if (b == NULL || b->data == NULL || b->slen < 0)
+ return BSTR_ERR;
+ if (b->slen != 0) {
+ if (balloc (a, b->slen) != BSTR_OK) return BSTR_ERR;
+ bstr__memmove (a->data, b->data, b->slen);
+ } else {
+ if (a == NULL || a->data == NULL || a->mlen < a->slen ||
+ a->slen < 0 || a->mlen == 0)
+ return BSTR_ERR;
+ }
+ a->data[b->slen] = (unsigned char) '\0';
+ a->slen = b->slen;
+ return BSTR_OK;
+}
+
+/* int bassignmidstr (bstring a, const_bstring b, int left, int len)
+ *
+ * Overwrite the string a with the middle of contents of string b
+ * starting from position left and running for a length len. left and
+ * len are clamped to the ends of b as with the function bmidstr.
+ */
+int bassignmidstr (bstring a, const_bstring b, int left, int len) {
+ if (b == NULL || b->data == NULL || b->slen < 0)
+ return BSTR_ERR;
+
+ if (left < 0) {
+ len += left;
+ left = 0;
+ }
+
+ if (len > b->slen - left) len = b->slen - left;
+
+ if (a == NULL || a->data == NULL || a->mlen < a->slen ||
+ a->slen < 0 || a->mlen == 0)
+ return BSTR_ERR;
+
+ if (len > 0) {
+ if (balloc (a, len) != BSTR_OK) return BSTR_ERR;
+ bstr__memmove (a->data, b->data + left, len);
+ a->slen = len;
+ } else {
+ a->slen = 0;
+ }
+ a->data[a->slen] = (unsigned char) '\0';
+ return BSTR_OK;
+}
+
+/* int bassigncstr (bstring a, const char * str)
+ *
+ * Overwrite the string a with the contents of char * string str. Note that
+ * the bstring a must be a well defined and writable bstring. If an error
+ * occurs BSTR_ERR is returned however a may be partially overwritten.
+ */
+int bassigncstr (bstring a, const char * str) {
+int i;
+size_t len;
+ if (a == NULL || a->data == NULL || a->mlen < a->slen ||
+ a->slen < 0 || a->mlen == 0 || NULL == str)
+ return BSTR_ERR;
+
+ for (i=0; i < a->mlen; i++) {
+ if ('\0' == (a->data[i] = str[i])) {
+ a->slen = i;
+ return BSTR_OK;
+ }
+ }
+
+ a->slen = i;
+ len = strlen (str + i);
+ if (len > INT_MAX || i + len + 1 > INT_MAX ||
+ 0 > balloc (a, (int) (i + len + 1))) return BSTR_ERR;
+ bBlockCopy (a->data + i, str + i, (size_t) len + 1);
+ a->slen += (int) len;
+ return BSTR_OK;
+}
+
+/* int bassignblk (bstring a, const void * s, int len)
+ *
+ * Overwrite the string a with the contents of the block (s, len). Note that
+ * the bstring a must be a well defined and writable bstring. If an error
+ * occurs BSTR_ERR is returned and a is not overwritten.
+ */
+int bassignblk (bstring a, const void * s, int len) {
+ if (a == NULL || a->data == NULL || a->mlen < a->slen ||
+ a->slen < 0 || a->mlen == 0 || NULL == s || len + 1 < 1)
+ return BSTR_ERR;
+ if (len + 1 > a->mlen && 0 > balloc (a, len + 1)) return BSTR_ERR;
+ bBlockCopy (a->data, s, (size_t) len);
+ a->data[len] = (unsigned char) '\0';
+ a->slen = len;
+ return BSTR_OK;
+}
+
+/* int btrunc (bstring b, int n)
+ *
+ * Truncate the bstring to at most n characters.
+ */
+int btrunc (bstring b, int n) {
+ if (n < 0 || b == NULL || b->data == NULL || b->mlen < b->slen ||
+ b->slen < 0 || b->mlen <= 0) return BSTR_ERR;
+ if (b->slen > n) {
+ b->slen = n;
+ b->data[n] = (unsigned char) '\0';
+ }
+ return BSTR_OK;
+}
+
+#define upcase(c) (toupper ((unsigned char) c))
+#define downcase(c) (tolower ((unsigned char) c))
+#define wspace(c) (isspace ((unsigned char) c))
+
+/* int btoupper (bstring b)
+ *
+ * Convert contents of bstring to upper case.
+ */
+int btoupper (bstring b) {
+int i, len;
+ if (b == NULL || b->data == NULL || b->mlen < b->slen ||
+ b->slen < 0 || b->mlen <= 0) return BSTR_ERR;
+ for (i=0, len = b->slen; i < len; i++) {
+ b->data[i] = (unsigned char) upcase (b->data[i]);
+ }
+ return BSTR_OK;
+}
+
+/* int btolower (bstring b)
+ *
+ * Convert contents of bstring to lower case.
+ */
+int btolower (bstring b) {
+int i, len;
+ if (b == NULL || b->data == NULL || b->mlen < b->slen ||
+ b->slen < 0 || b->mlen <= 0) return BSTR_ERR;
+ for (i=0, len = b->slen; i < len; i++) {
+ b->data[i] = (unsigned char) downcase (b->data[i]);
+ }
+ return BSTR_OK;
+}
+
+/* int bstricmp (const_bstring b0, const_bstring b1)
+ *
+ * Compare two strings without differentiating between case. The return
+ * value is the difference of the values of the characters where the two
+ * strings first differ after lower case transformation, otherwise 0 is
+ * returned indicating that the strings are equal. If the lengths are
+ * different, then a difference from 0 is given, but if the first extra
+ * character is '\0', then it is taken to be the value UCHAR_MAX+1.
+ */
+int bstricmp (const_bstring b0, const_bstring b1) {
+int i, v, n;
+
+ if (bdata (b0) == NULL || b0->slen < 0 ||
+ bdata (b1) == NULL || b1->slen < 0) return SHRT_MIN;
+ if ((n = b0->slen) > b1->slen) n = b1->slen;
+ else if (b0->slen == b1->slen && b0->data == b1->data) return BSTR_OK;
+
+ for (i = 0; i < n; i ++) {
+ v = (char) downcase (b0->data[i])
+ - (char) downcase (b1->data[i]);
+ if (0 != v) return v;
+ }
+
+ if (b0->slen > n) {
+ v = (char) downcase (b0->data[n]);
+ if (v) return v;
+ return UCHAR_MAX + 1;
+ }
+ if (b1->slen > n) {
+ v = - (char) downcase (b1->data[n]);
+ if (v) return v;
+ return - (int) (UCHAR_MAX + 1);
+ }
+ return BSTR_OK;
+}
+
+/* int bstrnicmp (const_bstring b0, const_bstring b1, int n)
+ *
+ * Compare two strings without differentiating between case for at most n
+ * characters. If the position where the two strings first differ is
+ * before the nth position, the return value is the difference of the values
+ * of the characters, otherwise 0 is returned. If the lengths are different
+ * and less than n characters, then a difference from 0 is given, but if the
+ * first extra character is '\0', then it is taken to be the value
+ * UCHAR_MAX+1.
+ */
+int bstrnicmp (const_bstring b0, const_bstring b1, int n) {
+int i, v, m;
+
+ if (bdata (b0) == NULL || b0->slen < 0 ||
+ bdata (b1) == NULL || b1->slen < 0 || n < 0) return SHRT_MIN;
+ m = n;
+ if (m > b0->slen) m = b0->slen;
+ if (m > b1->slen) m = b1->slen;
+
+ if (b0->data != b1->data) {
+ for (i = 0; i < m; i ++) {
+ v = (char) downcase (b0->data[i]);
+ v -= (char) downcase (b1->data[i]);
+ if (v != 0) return b0->data[i] - b1->data[i];
+ }
+ }
+
+ if (n == m || b0->slen == b1->slen) return BSTR_OK;
+
+ if (b0->slen > m) {
+ v = (char) downcase (b0->data[m]);
+ if (v) return v;
+ return UCHAR_MAX + 1;
+ }
+
+ v = - (char) downcase (b1->data[m]);
+ if (v) return v;
+ return - (int) (UCHAR_MAX + 1);
+}
+
+/* int biseqcaseless (const_bstring b0, const_bstring b1)
+ *
+ * Compare two strings for equality without differentiating between case.
+ * If the strings differ other than in case, 0 is returned, if the strings
+ * are the same, 1 is returned, if there is an error, -1 is returned. If
+ * the length of the strings are different, this function is O(1). '\0'
+ * termination characters are not treated in any special way.
+ */
+int biseqcaseless (const_bstring b0, const_bstring b1) {
+int i, n;
+
+ if (bdata (b0) == NULL || b0->slen < 0 ||
+ bdata (b1) == NULL || b1->slen < 0) return BSTR_ERR;
+ if (b0->slen != b1->slen) return BSTR_OK;
+ if (b0->data == b1->data || b0->slen == 0) return 1;
+ for (i=0, n=b0->slen; i < n; i++) {
+ if (b0->data[i] != b1->data[i]) {
+ unsigned char c = (unsigned char) downcase (b0->data[i]);
+ if (c != (unsigned char) downcase (b1->data[i])) return 0;
+ }
+ }
+ return 1;
+}
+
+/* int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len)
+ *
+ * Compare beginning of string b0 with a block of memory of length len
+ * without differentiating between case for equality. If the beginning of b0
+ * differs from the memory block other than in case (or if b0 is too short),
+ * 0 is returned, if the strings are the same, 1 is returned, if there is an
+ * error, -1 is returned. '\0' characters are not treated in any special
+ * way.
+ */
+int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len) {
+int i;
+
+ if (bdata (b0) == NULL || b0->slen < 0 || NULL == blk || len < 0)
+ return BSTR_ERR;
+ if (b0->slen < len) return BSTR_OK;
+ if (b0->data == (const unsigned char *) blk || len == 0) return 1;
+
+ for (i = 0; i < len; i ++) {
+ if (b0->data[i] != ((const unsigned char *) blk)[i]) {
+ if (downcase (b0->data[i]) !=
+ downcase (((const unsigned char *) blk)[i])) return 0;
+ }
+ }
+ return 1;
+}
+
+/*
+ * int bltrimws (bstring b)
+ *
+ * Delete whitespace contiguous from the left end of the string.
+ */
+int bltrimws (bstring b) {
+int i, len;
+
+ if (b == NULL || b->data == NULL || b->mlen < b->slen ||
+ b->slen < 0 || b->mlen <= 0) return BSTR_ERR;
+
+ for (len = b->slen, i = 0; i < len; i++) {
+ if (!wspace (b->data[i])) {
+ return bdelete (b, 0, i);
+ }
+ }
+
+ b->data[0] = (unsigned char) '\0';
+ b->slen = 0;
+ return BSTR_OK;
+}
+
+/*
+ * int brtrimws (bstring b)
+ *
+ * Delete whitespace contiguous from the right end of the string.
+ */
+int brtrimws (bstring b) {
+int i;
+
+ if (b == NULL || b->data == NULL || b->mlen < b->slen ||
+ b->slen < 0 || b->mlen <= 0) return BSTR_ERR;
+
+ for (i = b->slen - 1; i >= 0; i--) {
+ if (!wspace (b->data[i])) {
+ if (b->mlen > i) b->data[i+1] = (unsigned char) '\0';
+ b->slen = i + 1;
+ return BSTR_OK;
+ }
+ }
+
+ b->data[0] = (unsigned char) '\0';
+ b->slen = 0;
+ return BSTR_OK;
+}
+
+/*
+ * int btrimws (bstring b)
+ *
+ * Delete whitespace contiguous from both ends of the string.
+ */
+int btrimws (bstring b) {
+int i, j;
+
+ if (b == NULL || b->data == NULL || b->mlen < b->slen ||
+ b->slen < 0 || b->mlen <= 0) return BSTR_ERR;
+
+ for (i = b->slen - 1; i >= 0; i--) {
+ if (!wspace (b->data[i])) {
+ if (b->mlen > i) b->data[i+1] = (unsigned char) '\0';
+ b->slen = i + 1;
+ for (j = 0; wspace (b->data[j]); j++) {}
+ return bdelete (b, 0, j);
+ }
+ }
+
+ b->data[0] = (unsigned char) '\0';
+ b->slen = 0;
+ return BSTR_OK;
+}
+
+/* int biseq (const_bstring b0, const_bstring b1)
+ *
+ * Compare the string b0 and b1. If the strings differ, 0 is returned, if
+ * the strings are the same, 1 is returned, if there is an error, -1 is
+ * returned. If the length of the strings are different, this function is
+ * O(1). '\0' termination characters are not treated in any special way.
+ */
+int biseq (const_bstring b0, const_bstring b1) {
+ if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL ||
+ b0->slen < 0 || b1->slen < 0) return BSTR_ERR;
+ if (b0->slen != b1->slen) return BSTR_OK;
+ if (b0->data == b1->data || b0->slen == 0) return 1;
+ return !bstr__memcmp (b0->data, b1->data, b0->slen);
+}
+
+/* int bisstemeqblk (const_bstring b0, const void * blk, int len)
+ *
+ * Compare beginning of string b0 with a block of memory of length len for
+ * equality. If the beginning of b0 differs from the memory block (or if b0
+ * is too short), 0 is returned, if the strings are the same, 1 is returned,
+ * if there is an error, -1 is returned. '\0' characters are not treated in
+ * any special way.
+ */
+int bisstemeqblk (const_bstring b0, const void * blk, int len) {
+int i;
+
+ if (bdata (b0) == NULL || b0->slen < 0 || NULL == blk || len < 0)
+ return BSTR_ERR;
+ if (b0->slen < len) return BSTR_OK;
+ if (b0->data == (const unsigned char *) blk || len == 0) return 1;
+
+ for (i = 0; i < len; i ++) {
+ if (b0->data[i] != ((const unsigned char *) blk)[i]) return BSTR_OK;
+ }
+ return 1;
+}
+
+/* int biseqcstr (const_bstring b, const char *s)
+ *
+ * Compare the bstring b and char * string s. The C string s must be '\0'
+ * terminated at exactly the length of the bstring b, and the contents
+ * between the two must be identical with the bstring b with no '\0'
+ * characters for the two contents to be considered equal. This is
+ * equivalent to the condition that their current contents will be always be
+ * equal when comparing them in the same format after converting one or the
+ * other. If the strings are equal 1 is returned, if they are unequal 0 is
+ * returned and if there is a detectable error BSTR_ERR is returned.
+ */
+int biseqcstr (const_bstring b, const char * s) {
+int i;
+ if (b == NULL || s == NULL || b->data == NULL || b->slen < 0) return BSTR_ERR;
+ for (i=0; i < b->slen; i++) {
+ if (s[i] == '\0' || b->data[i] != (unsigned char) s[i]) return BSTR_OK;
+ }
+ return s[i] == '\0';
+}
+
+/* int biseqcstrcaseless (const_bstring b, const char *s)
+ *
+ * Compare the bstring b and char * string s. The C string s must be '\0'
+ * terminated at exactly the length of the bstring b, and the contents
+ * between the two must be identical except for case with the bstring b with
+ * no '\0' characters for the two contents to be considered equal. This is
+ * equivalent to the condition that their current contents will be always be
+ * equal ignoring case when comparing them in the same format after
+ * converting one or the other. If the strings are equal, except for case,
+ * 1 is returned, if they are unequal regardless of case 0 is returned and
+ * if there is a detectable error BSTR_ERR is returned.
+ */
+int biseqcstrcaseless (const_bstring b, const char * s) {
+int i;
+ if (b == NULL || s == NULL || b->data == NULL || b->slen < 0) return BSTR_ERR;
+ for (i=0; i < b->slen; i++) {
+ if (s[i] == '\0' ||
+ (b->data[i] != (unsigned char) s[i] &&
+ downcase (b->data[i]) != (unsigned char) downcase (s[i])))
+ return BSTR_OK;
+ }
+ return s[i] == '\0';
+}
+
+/* int bstrcmp (const_bstring b0, const_bstring b1)
+ *
+ * Compare the string b0 and b1. If there is an error, SHRT_MIN is returned,
+ * otherwise a value less than or greater than zero, indicating that the
+ * string pointed to by b0 is lexicographically less than or greater than
+ * the string pointed to by b1 is returned. If the the string lengths are
+ * unequal but the characters up until the length of the shorter are equal
+ * then a value less than, or greater than zero, indicating that the string
+ * pointed to by b0 is shorter or longer than the string pointed to by b1 is
+ * returned. 0 is returned if and only if the two strings are the same. If
+ * the length of the strings are different, this function is O(n). Like its
+ * standard C library counter part strcmp, the comparison does not proceed
+ * past any '\0' termination characters encountered.
+ */
+int bstrcmp (const_bstring b0, const_bstring b1) {
+int i, v, n;
+
+ if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL ||
+ b0->slen < 0 || b1->slen < 0) return SHRT_MIN;
+ n = b0->slen; if (n > b1->slen) n = b1->slen;
+ if (b0->slen == b1->slen && (b0->data == b1->data || b0->slen == 0))
+ return BSTR_OK;
+
+ for (i = 0; i < n; i ++) {
+ v = ((char) b0->data[i]) - ((char) b1->data[i]);
+ if (v != 0) return v;
+ if (b0->data[i] == (unsigned char) '\0') return BSTR_OK;
+ }
+
+ if (b0->slen > n) return 1;
+ if (b1->slen > n) return -1;
+ return BSTR_OK;
+}
+
+/* int bstrncmp (const_bstring b0, const_bstring b1, int n)
+ *
+ * Compare the string b0 and b1 for at most n characters. If there is an
+ * error, SHRT_MIN is returned, otherwise a value is returned as if b0 and
+ * b1 were first truncated to at most n characters then bstrcmp was called
+ * with these new strings are paremeters. If the length of the strings are
+ * different, this function is O(n). Like its standard C library counter
+ * part strcmp, the comparison does not proceed past any '\0' termination
+ * characters encountered.
+ */
+int bstrncmp (const_bstring b0, const_bstring b1, int n) {
+int i, v, m;
+
+ if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL ||
+ b0->slen < 0 || b1->slen < 0) return SHRT_MIN;
+ m = n;
+ if (m > b0->slen) m = b0->slen;
+ if (m > b1->slen) m = b1->slen;
+
+ if (b0->data != b1->data) {
+ for (i = 0; i < m; i ++) {
+ v = ((char) b0->data[i]) - ((char) b1->data[i]);
+ if (v != 0) return v;
+ if (b0->data[i] == (unsigned char) '\0') return BSTR_OK;
+ }
+ }
+
+ if (n == m || b0->slen == b1->slen) return BSTR_OK;
+
+ if (b0->slen > m) return 1;
+ return -1;
+}
+
+/* bstring bmidstr (const_bstring b, int left, int len)
+ *
+ * Create a bstring which is the substring of b starting from position left
+ * and running for a length len (clamped by the end of the bstring b.) If
+ * b is detectably invalid, then NULL is returned. The section described
+ * by (left, len) is clamped to the boundaries of b.
+ */
+bstring bmidstr (const_bstring b, int left, int len) {
+
+ if (b == NULL || b->slen < 0 || b->data == NULL) return NULL;
+
+ if (left < 0) {
+ len += left;
+ left = 0;
+ }
+
+ if (len > b->slen - left) len = b->slen - left;
+
+ if (len <= 0) return bfromcstr ("");
+ return blk2bstr (b->data + left, len);
+}
+
+/* int bdelete (bstring b, int pos, int len)
+ *
+ * Removes characters from pos to pos+len-1 inclusive and shifts the tail of
+ * the bstring starting from pos+len to pos. len must be positive for this
+ * call to have any effect. The section of the string described by (pos,
+ * len) is clamped to boundaries of the bstring b.
+ */
+int bdelete (bstring b, int pos, int len) {
+ /* Clamp to left side of bstring */
+ if (pos < 0) {
+ len += pos;
+ pos = 0;
+ }
+
+ if (len < 0 || b == NULL || b->data == NULL || b->slen < 0 ||
+ b->mlen < b->slen || b->mlen <= 0)
+ return BSTR_ERR;
+ if (len > 0 && pos < b->slen) {
+ if (pos + len >= b->slen) {
+ b->slen = pos;
+ } else {
+ bBlockCopy ((char *) (b->data + pos),
+ (char *) (b->data + pos + len),
+ b->slen - (pos+len));
+ b->slen -= len;
+ }
+ b->data[b->slen] = (unsigned char) '\0';
+ }
+ return BSTR_OK;
+}
+
+/* int bdestroy (bstring b)
+ *
+ * Free up the bstring. Note that if b is detectably invalid or not writable
+ * then no action is performed and BSTR_ERR is returned. Like a freed memory
+ * allocation, dereferences, writes or any other action on b after it has
+ * been bdestroyed is undefined.
+ */
+int bdestroy (bstring b) {
+ if (b == NULL || b->slen < 0 || b->mlen <= 0 || b->mlen < b->slen ||
+ b->data == NULL)
+ return BSTR_ERR;
+
+ bstr__free (b->data);
+
+ /* In case there is any stale usage, there is one more chance to
+ notice this error. */
+
+ b->slen = -1;
+ b->mlen = -__LINE__;
+ b->data = NULL;
+
+ bstr__free (b);
+ return BSTR_OK;
+}
+
+/* int binstr (const_bstring b1, int pos, const_bstring b2)
+ *
+ * Search for the bstring b2 in b1 starting from position pos, and searching
+ * forward. If it is found then return with the first position where it is
+ * found, otherwise return BSTR_ERR. Note that this is just a brute force
+ * string searcher that does not attempt clever things like the Boyer-Moore
+ * search algorithm. Because of this there are many degenerate cases where
+ * this can take much longer than it needs to.
+ */
+int binstr (const_bstring b1, int pos, const_bstring b2) {
+int j, ii, ll, lf;
+unsigned char * d0;
+unsigned char c0;
+register unsigned char * d1;
+register unsigned char c1;
+register int i;
+
+ if (b1 == NULL || b1->data == NULL || b1->slen < 0 ||
+ b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR;
+ if (b1->slen == pos) return (b2->slen == 0)?pos:BSTR_ERR;
+ if (b1->slen < pos || pos < 0) return BSTR_ERR;
+ if (b2->slen == 0) return pos;
+
+ /* No space to find such a string? */
+ if ((lf = b1->slen - b2->slen + 1) <= pos) return BSTR_ERR;
+
+ /* An obvious alias case */
+ if (b1->data == b2->data && pos == 0) return 0;
+
+ i = pos;
+
+ d0 = b2->data;
+ d1 = b1->data;
+ ll = b2->slen;
+
+ /* Peel off the b2->slen == 1 case */
+ c0 = d0[0];
+ if (1 == ll) {
+ for (;i < lf; i++) if (c0 == d1[i]) return i;
+ return BSTR_ERR;
+ }
+
+ c1 = c0;
+ j = 0;
+ lf = b1->slen - 1;
+
+ ii = -1;
+ if (i < lf) do {
+ /* Unrolled current character test */
+ if (c1 != d1[i]) {
+ if (c1 != d1[1+i]) {
+ i += 2;
+ continue;
+ }
+ i++;
+ }
+
+ /* Take note if this is the start of a potential match */
+ if (0 == j) ii = i;
+
+ /* Shift the test character down by one */
+ j++;
+ i++;
+
+ /* If this isn't past the last character continue */
+ if (j < ll) {
+ c1 = d0[j];
+ continue;
+ }
+
+ N0:;
+
+ /* If no characters mismatched, then we matched */
+ if (i == ii+j) return ii;
+
+ /* Shift back to the beginning */
+ i -= j;
+ j = 0;
+ c1 = c0;
+ } while (i < lf);
+
+ /* Deal with last case if unrolling caused a misalignment */
+ if (i == lf && ll == j+1 && c1 == d1[i]) goto N0;
+
+ return BSTR_ERR;
+}
+
+/* int binstrr (const_bstring b1, int pos, const_bstring b2)
+ *
+ * Search for the bstring b2 in b1 starting from position pos, and searching
+ * backward. If it is found then return with the first position where it is
+ * found, otherwise return BSTR_ERR. Note that this is just a brute force
+ * string searcher that does not attempt clever things like the Boyer-Moore
+ * search algorithm. Because of this there are many degenerate cases where
+ * this can take much longer than it needs to.
+ */
+int binstrr (const_bstring b1, int pos, const_bstring b2) {
+int j, i, l;
+unsigned char * d0, * d1;
+
+ if (b1 == NULL || b1->data == NULL || b1->slen < 0 ||
+ b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR;
+ if (b1->slen == pos && b2->slen == 0) return pos;
+ if (b1->slen < pos || pos < 0) return BSTR_ERR;
+ if (b2->slen == 0) return pos;
+
+ /* Obvious alias case */
+ if (b1->data == b2->data && pos == 0 && b2->slen <= b1->slen) return 0;
+
+ i = pos;
+ if ((l = b1->slen - b2->slen) < 0) return BSTR_ERR;
+
+ /* If no space to find such a string then snap back */
+ if (l + 1 <= i) i = l;
+ j = 0;
+
+ d0 = b2->data;
+ d1 = b1->data;
+ l = b2->slen;
+
+ for (;;) {
+ if (d0[j] == d1[i + j]) {
+ j ++;
+ if (j >= l) return i;
+ } else {
+ i --;
+ if (i < 0) break;
+ j=0;
+ }
+ }
+
+ return BSTR_ERR;
+}
+
+/* int binstrcaseless (const_bstring b1, int pos, const_bstring b2)
+ *
+ * Search for the bstring b2 in b1 starting from position pos, and searching
+ * forward but without regard to case. If it is found then return with the
+ * first position where it is found, otherwise return BSTR_ERR. Note that
+ * this is just a brute force string searcher that does not attempt clever
+ * things like the Boyer-Moore search algorithm. Because of this there are
+ * many degenerate cases where this can take much longer than it needs to.
+ */
+int binstrcaseless (const_bstring b1, int pos, const_bstring b2) {
+int j, i, l, ll;
+unsigned char * d0, * d1;
+
+ if (b1 == NULL || b1->data == NULL || b1->slen < 0 ||
+ b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR;
+ if (b1->slen == pos) return (b2->slen == 0)?pos:BSTR_ERR;
+ if (b1->slen < pos || pos < 0) return BSTR_ERR;
+ if (b2->slen == 0) return pos;
+
+ l = b1->slen - b2->slen + 1;
+
+ /* No space to find such a string? */
+ if (l <= pos) return BSTR_ERR;
+
+ /* An obvious alias case */
+ if (b1->data == b2->data && pos == 0) return BSTR_OK;
+
+ i = pos;
+ j = 0;
+
+ d0 = b2->data;
+ d1 = b1->data;
+ ll = b2->slen;
+
+ for (;;) {
+ if (d0[j] == d1[i + j] || downcase (d0[j]) == downcase (d1[i + j])) {
+ j ++;
+ if (j >= ll) return i;
+ } else {
+ i ++;
+ if (i >= l) break;
+ j=0;
+ }
+ }
+
+ return BSTR_ERR;
+}
+
+/* int binstrrcaseless (const_bstring b1, int pos, const_bstring b2)
+ *
+ * Search for the bstring b2 in b1 starting from position pos, and searching
+ * backward but without regard to case. If it is found then return with the
+ * first position where it is found, otherwise return BSTR_ERR. Note that
+ * this is just a brute force string searcher that does not attempt clever
+ * things like the Boyer-Moore search algorithm. Because of this there are
+ * many degenerate cases where this can take much longer than it needs to.
+ */
+int binstrrcaseless (const_bstring b1, int pos, const_bstring b2) {
+int j, i, l;
+unsigned char * d0, * d1;
+
+ if (b1 == NULL || b1->data == NULL || b1->slen < 0 ||
+ b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR;
+ if (b1->slen == pos && b2->slen == 0) return pos;
+ if (b1->slen < pos || pos < 0) return BSTR_ERR;
+ if (b2->slen == 0) return pos;
+
+ /* Obvious alias case */
+ if (b1->data == b2->data && pos == 0 && b2->slen <= b1->slen) return BSTR_OK;
+
+ i = pos;
+ if ((l = b1->slen - b2->slen) < 0) return BSTR_ERR;
+
+ /* If no space to find such a string then snap back */
+ if (l + 1 <= i) i = l;
+ j = 0;
+
+ d0 = b2->data;
+ d1 = b1->data;
+ l = b2->slen;
+
+ for (;;) {
+ if (d0[j] == d1[i + j] || downcase (d0[j]) == downcase (d1[i + j])) {
+ j ++;
+ if (j >= l) return i;
+ } else {
+ i --;
+ if (i < 0) break;
+ j=0;
+ }
+ }
+
+ return BSTR_ERR;
+}
+
+
+/* int bstrchrp (const_bstring b, int c, int pos)
+ *
+ * Search for the character c in b forwards from the position pos
+ * (inclusive).
+ */
+int bstrchrp (const_bstring b, int c, int pos) {
+unsigned char * p;
+
+ if (b == NULL || b->data == NULL || b->slen <= pos || pos < 0) return BSTR_ERR;
+ p = (unsigned char *) bstr__memchr ((b->data + pos), (unsigned char) c, (b->slen - pos));
+ if (p) return (int) (p - b->data);
+ return BSTR_ERR;
+}
+
+/* int bstrrchrp (const_bstring b, int c, int pos)
+ *
+ * Search for the character c in b backwards from the position pos in string
+ * (inclusive).
+ */
+int bstrrchrp (const_bstring b, int c, int pos) {
+int i;
+
+ if (b == NULL || b->data == NULL || b->slen <= pos || pos < 0) return BSTR_ERR;
+ for (i=pos; i >= 0; i--) {
+ if (b->data[i] == (unsigned char) c) return i;
+ }
+ return BSTR_ERR;
+}
+
+#if !defined (BSTRLIB_AGGRESSIVE_MEMORY_FOR_SPEED_TRADEOFF)
+#define LONG_LOG_BITS_QTY (3)
+#define LONG_BITS_QTY (1 << LONG_LOG_BITS_QTY)
+#define LONG_TYPE unsigned char
+
+#define CFCLEN ((1 << CHAR_BIT) / LONG_BITS_QTY)
+struct charField { LONG_TYPE content[CFCLEN]; };
+#define testInCharField(cf,c) ((cf)->content[(c) >> LONG_LOG_BITS_QTY] & (((long)1) << ((c) & (LONG_BITS_QTY-1))))
+#define setInCharField(cf,idx) { \
+ unsigned int c = (unsigned int) (idx); \
+ (cf)->content[c >> LONG_LOG_BITS_QTY] |= (LONG_TYPE) (1ul << (c & (LONG_BITS_QTY-1))); \
+}
+
+#else
+
+#define CFCLEN (1 << CHAR_BIT)
+struct charField { unsigned char content[CFCLEN]; };
+#define testInCharField(cf,c) ((cf)->content[(unsigned char) (c)])
+#define setInCharField(cf,idx) (cf)->content[(unsigned int) (idx)] = ~0
+
+#endif
+
+/* Convert a bstring to charField */
+static int buildCharField (struct charField * cf, const_bstring b) {
+int i;
+ if (b == NULL || b->data == NULL || b->slen <= 0) return BSTR_ERR;
+ memset ((void *) cf->content, 0, sizeof (struct charField));
+ for (i=0; i < b->slen; i++) {
+ setInCharField (cf, b->data[i]);
+ }
+ return BSTR_OK;
+}
+
+static void invertCharField (struct charField * cf) {
+int i;
+ for (i=0; i < CFCLEN; i++) cf->content[i] = ~cf->content[i];
+}
+
+/* Inner engine for binchr */
+static int binchrCF (const unsigned char * data, int len, int pos, const struct charField * cf) {
+int i;
+ for (i=pos; i < len; i++) {
+ unsigned char c = (unsigned char) data[i];
+ if (testInCharField (cf, c)) return i;
+ }
+ return BSTR_ERR;
+}
+
+/* int binchr (const_bstring b0, int pos, const_bstring b1);
+ *
+ * Search for the first position in b0 starting from pos or after, in which
+ * one of the characters in b1 is found and return it. If such a position
+ * does not exist in b0, then BSTR_ERR is returned.
+ */
+int binchr (const_bstring b0, int pos, const_bstring b1) {
+struct charField chrs;
+ if (pos < 0 || b0 == NULL || b0->data == NULL ||
+ b0->slen <= pos) return BSTR_ERR;
+ if (1 == b1->slen) return bstrchrp (b0, b1->data[0], pos);
+ if (0 > buildCharField (&chrs, b1)) return BSTR_ERR;
+ return binchrCF (b0->data, b0->slen, pos, &chrs);
+}
+
+/* Inner engine for binchrr */
+static int binchrrCF (const unsigned char * data, int pos, const struct charField * cf) {
+int i;
+ for (i=pos; i >= 0; i--) {
+ unsigned int c = (unsigned int) data[i];
+ if (testInCharField (cf, c)) return i;
+ }
+ return BSTR_ERR;
+}
+
+/* int binchrr (const_bstring b0, int pos, const_bstring b1);
+ *
+ * Search for the last position in b0 no greater than pos, in which one of
+ * the characters in b1 is found and return it. If such a position does not
+ * exist in b0, then BSTR_ERR is returned.
+ */
+int binchrr (const_bstring b0, int pos, const_bstring b1) {
+struct charField chrs;
+ if (pos < 0 || b0 == NULL || b0->data == NULL || b1 == NULL ||
+ b0->slen < pos) return BSTR_ERR;
+ if (pos == b0->slen) pos--;
+ if (1 == b1->slen) return bstrrchrp (b0, b1->data[0], pos);
+ if (0 > buildCharField (&chrs, b1)) return BSTR_ERR;
+ return binchrrCF (b0->data, pos, &chrs);
+}
+
+/* int bninchr (const_bstring b0, int pos, const_bstring b1);
+ *
+ * Search for the first position in b0 starting from pos or after, in which
+ * none of the characters in b1 is found and return it. If such a position
+ * does not exist in b0, then BSTR_ERR is returned.
+ */
+int bninchr (const_bstring b0, int pos, const_bstring b1) {
+struct charField chrs;
+ if (pos < 0 || b0 == NULL || b0->data == NULL ||
+ b0->slen <= pos) return BSTR_ERR;
+ if (buildCharField (&chrs, b1) < 0) return BSTR_ERR;
+ invertCharField (&chrs);
+ return binchrCF (b0->data, b0->slen, pos, &chrs);
+}
+
+/* int bninchrr (const_bstring b0, int pos, const_bstring b1);
+ *
+ * Search for the last position in b0 no greater than pos, in which none of
+ * the characters in b1 is found and return it. If such a position does not
+ * exist in b0, then BSTR_ERR is returned.
+ */
+int bninchrr (const_bstring b0, int pos, const_bstring b1) {
+struct charField chrs;
+ if (pos < 0 || b0 == NULL || b0->data == NULL ||
+ b0->slen < pos) return BSTR_ERR;
+ if (pos == b0->slen) pos--;
+ if (buildCharField (&chrs, b1) < 0) return BSTR_ERR;
+ invertCharField (&chrs);
+ return binchrrCF (b0->data, pos, &chrs);
+}
+
+/* int bsetstr (bstring b0, int pos, bstring b1, unsigned char fill)
+ *
+ * Overwrite the string b0 starting at position pos with the string b1. If
+ * the position pos is past the end of b0, then the character "fill" is
+ * appended as necessary to make up the gap between the end of b0 and pos.
+ * If b1 is NULL, it behaves as if it were a 0-length string.
+ */
+int bsetstr (bstring b0, int pos, const_bstring b1, unsigned char fill) {
+int d, newlen;
+ptrdiff_t pd;
+bstring aux = (bstring) b1;
+
+ if (pos < 0 || b0 == NULL || b0->slen < 0 || NULL == b0->data ||
+ b0->mlen < b0->slen || b0->mlen <= 0) return BSTR_ERR;
+ if (b1 != NULL && (b1->slen < 0 || b1->data == NULL)) return BSTR_ERR;
+
+ d = pos;
+
+ /* Aliasing case */
+ if (NULL != aux) {
+ if ((pd = (ptrdiff_t) (b1->data - b0->data)) >= 0 && pd < (ptrdiff_t) b0->mlen) {
+ if (NULL == (aux = bstrcpy (b1))) return BSTR_ERR;
+ }
+ d += aux->slen;
+ }
+
+ /* Increase memory size if necessary */
+ if (balloc (b0, d + 1) != BSTR_OK) {
+ if (aux != b1) bdestroy (aux);
+ return BSTR_ERR;
+ }
+
+ newlen = b0->slen;
+
+ /* Fill in "fill" character as necessary */
+ if (pos > newlen) {
+ bstr__memset (b0->data + b0->slen, (int) fill, (size_t) (pos - b0->slen));
+ newlen = pos;
+ }
+
+ /* Copy b1 to position pos in b0. */
+ if (aux != NULL) {
+ bBlockCopy ((char *) (b0->data + pos), (char *) aux->data, aux->slen);
+ if (aux != b1) bdestroy (aux);
+ }
+
+ /* Indicate the potentially increased size of b0 */
+ if (d > newlen) newlen = d;
+
+ b0->slen = newlen;
+ b0->data[newlen] = (unsigned char) '\0';
+
+ return BSTR_OK;
+}
+
+/* int binsert (bstring b1, int pos, bstring b2, unsigned char fill)
+ *
+ * Inserts the string b2 into b1 at position pos. If the position pos is
+ * past the end of b1, then the character "fill" is appended as necessary to
+ * make up the gap between the end of b1 and pos. Unlike bsetstr, binsert
+ * does not allow b2 to be NULL.
+ */
+int binsert (bstring b1, int pos, const_bstring b2, unsigned char fill) {
+int d, l;
+ptrdiff_t pd;
+bstring aux = (bstring) b2;
+
+ if (pos < 0 || b1 == NULL || b2 == NULL || b1->slen < 0 ||
+ b2->slen < 0 || b1->mlen < b1->slen || b1->mlen <= 0) return BSTR_ERR;
+
+ /* Aliasing case */
+ if ((pd = (ptrdiff_t) (b2->data - b1->data)) >= 0 && pd < (ptrdiff_t) b1->mlen) {
+ if (NULL == (aux = bstrcpy (b2))) return BSTR_ERR;
+ }
+
+ /* Compute the two possible end pointers */
+ d = b1->slen + aux->slen;
+ l = pos + aux->slen;
+ if ((d|l) < 0) return BSTR_ERR;
+
+ if (l > d) {
+ /* Inserting past the end of the string */
+ if (balloc (b1, l + 1) != BSTR_OK) {
+ if (aux != b2) bdestroy (aux);
+ return BSTR_ERR;
+ }
+ bstr__memset (b1->data + b1->slen, (int) fill, (size_t) (pos - b1->slen));
+ b1->slen = l;
+ } else {
+ /* Inserting in the middle of the string */
+ if (balloc (b1, d + 1) != BSTR_OK) {
+ if (aux != b2) bdestroy (aux);
+ return BSTR_ERR;
+ }
+ bBlockCopy (b1->data + l, b1->data + pos, d - l);
+ b1->slen = d;
+ }
+ bBlockCopy (b1->data + pos, aux->data, aux->slen);
+ b1->data[b1->slen] = (unsigned char) '\0';
+ if (aux != b2) bdestroy (aux);
+ return BSTR_OK;
+}
+
+/* int breplace (bstring b1, int pos, int len, bstring b2,
+ * unsigned char fill)
+ *
+ * Replace a section of a string from pos for a length len with the string b2.
+ * fill is used is pos > b1->slen.
+ */
+int breplace (bstring b1, int pos, int len, const_bstring b2,
+ unsigned char fill) {
+int pl, ret;
+ptrdiff_t pd;
+bstring aux = (bstring) b2;
+
+ if (pos < 0 || len < 0 || (pl = pos + len) < 0 || b1 == NULL ||
+ b2 == NULL || b1->data == NULL || b2->data == NULL ||
+ b1->slen < 0 || b2->slen < 0 || b1->mlen < b1->slen ||
+ b1->mlen <= 0) return BSTR_ERR;
+
+ /* Straddles the end? */
+ if (pl >= b1->slen) {
+ if ((ret = bsetstr (b1, pos, b2, fill)) < 0) return ret;
+ if (pos + b2->slen < b1->slen) {
+ b1->slen = pos + b2->slen;
+ b1->data[b1->slen] = (unsigned char) '\0';
+ }
+ return ret;
+ }
+
+ /* Aliasing case */
+ if ((pd = (ptrdiff_t) (b2->data - b1->data)) >= 0 && pd < (ptrdiff_t) b1->slen) {
+ if (NULL == (aux = bstrcpy (b2))) return BSTR_ERR;
+ }
+
+ if (aux->slen > len) {
+ if (balloc (b1, b1->slen + aux->slen - len) != BSTR_OK) {
+ if (aux != b2) bdestroy (aux);
+ return BSTR_ERR;
+ }
+ }
+
+ if (aux->slen != len) bstr__memmove (b1->data + pos + aux->slen, b1->data + pos + len, b1->slen - (pos + len));
+ bstr__memcpy (b1->data + pos, aux->data, aux->slen);
+ b1->slen += aux->slen - len;
+ b1->data[b1->slen] = (unsigned char) '\0';
+ if (aux != b2) bdestroy (aux);
+ return BSTR_OK;
+}
+
+/*
+ * findreplaceengine is used to implement bfindreplace and
+ * bfindreplacecaseless. It works by breaking the three cases of
+ * expansion, reduction and replacement, and solving each of these
+ * in the most efficient way possible.
+ */
+
+typedef int (*instr_fnptr) (const_bstring s1, int pos, const_bstring s2);
+
+#define INITIAL_STATIC_FIND_INDEX_COUNT 32
+
+static int findreplaceengine (bstring b, const_bstring find, const_bstring repl, int pos, instr_fnptr instr) {
+int i, ret, slen, mlen, delta, acc;
+int * d;
+int static_d[INITIAL_STATIC_FIND_INDEX_COUNT+1]; /* This +1 is unnecessary, but it shuts up LINT. */
+ptrdiff_t pd;
+bstring auxf = (bstring) find;
+bstring auxr = (bstring) repl;
+
+ if (b == NULL || b->data == NULL || find == NULL ||
+ find->data == NULL || repl == NULL || repl->data == NULL ||
+ pos < 0 || find->slen <= 0 || b->mlen < 0 || b->slen > b->mlen ||
+ b->mlen <= 0 || b->slen < 0 || repl->slen < 0) return BSTR_ERR;
+ if (pos > b->slen - find->slen) return BSTR_OK;
+
+ /* Alias with find string */
+ pd = (ptrdiff_t) (find->data - b->data);
+ if ((ptrdiff_t) (pos - find->slen) < pd && pd < (ptrdiff_t) b->slen) {
+ if (NULL == (auxf = bstrcpy (find))) return BSTR_ERR;
+ }
+
+ /* Alias with repl string */
+ pd = (ptrdiff_t) (repl->data - b->data);
+ if ((ptrdiff_t) (pos - repl->slen) < pd && pd < (ptrdiff_t) b->slen) {
+ if (NULL == (auxr = bstrcpy (repl))) {
+ if (auxf != find) bdestroy (auxf);
+ return BSTR_ERR;
+ }
+ }
+
+ delta = auxf->slen - auxr->slen;
+
+ /* in-place replacement since find and replace strings are of equal
+ length */
+ if (delta == 0) {
+ while ((pos = instr (b, pos, auxf)) >= 0) {
+ bstr__memcpy (b->data + pos, auxr->data, auxr->slen);
+ pos += auxf->slen;
+ }
+ if (auxf != find) bdestroy (auxf);
+ if (auxr != repl) bdestroy (auxr);
+ return BSTR_OK;
+ }
+
+ /* shrinking replacement since auxf->slen > auxr->slen */
+ if (delta > 0) {
+ acc = 0;
+
+ while ((i = instr (b, pos, auxf)) >= 0) {
+ if (acc && i > pos)
+ bstr__memmove (b->data + pos - acc, b->data + pos, i - pos);
+ if (auxr->slen)
+ bstr__memcpy (b->data + i - acc, auxr->data, auxr->slen);
+ acc += delta;
+ pos = i + auxf->slen;
+ }
+
+ if (acc) {
+ i = b->slen;
+ if (i > pos)
+ bstr__memmove (b->data + pos - acc, b->data + pos, i - pos);
+ b->slen -= acc;
+ b->data[b->slen] = (unsigned char) '\0';
+ }
+
+ if (auxf != find) bdestroy (auxf);
+ if (auxr != repl) bdestroy (auxr);
+ return BSTR_OK;
+ }
+
+ /* expanding replacement since find->slen < repl->slen. Its a lot
+ more complicated. This works by first finding all the matches and
+ storing them to a growable array, then doing at most one resize of
+ the destination bstring and then performing the direct memory transfers
+ of the string segment pieces to form the final result. The growable
+ array of matches uses a deferred doubling reallocing strategy. What
+ this means is that it starts as a reasonably fixed sized auto array in
+ the hopes that many if not most cases will never need to grow this
+ array. But it switches as soon as the bounds of the array will be
+ exceeded. An extra find result is always appended to this array that
+ corresponds to the end of the destination string, so slen is checked
+ against mlen - 1 rather than mlen before resizing.
+ */
+
+ mlen = INITIAL_STATIC_FIND_INDEX_COUNT;
+ d = (int *) static_d; /* Avoid malloc for trivial/initial cases */
+ acc = slen = 0;
+
+ while ((pos = instr (b, pos, auxf)) >= 0) {
+ if (slen >= mlen - 1) {
+ int sl, *t;
+
+ mlen += mlen;
+ sl = sizeof (int *) * mlen;
+ if (static_d == d) d = NULL; /* static_d cannot be realloced */
+ if (mlen <= 0 || sl < mlen || NULL == (t = (int *) bstr__realloc (d, sl))) {
+ ret = BSTR_ERR;
+ goto done;
+ }
+ if (NULL == d) bstr__memcpy (t, static_d, sizeof (static_d));
+ d = t;
+ }
+ d[slen] = pos;
+ slen++;
+ acc -= delta;
+ pos += auxf->slen;
+ if (pos < 0 || acc < 0) {
+ ret = BSTR_ERR;
+ goto done;
+ }
+ }
+
+ /* slen <= INITIAL_STATIC_INDEX_COUNT-1 or mlen-1 here. */
+ d[slen] = b->slen;
+
+ if (BSTR_OK == (ret = balloc (b, b->slen + acc + 1))) {
+ b->slen += acc;
+ for (i = slen-1; i >= 0; i--) {
+ int s, l;
+ s = d[i] + auxf->slen;
+ l = d[i+1] - s; /* d[slen] may be accessed here. */
+ if (l) {
+ bstr__memmove (b->data + s + acc, b->data + s, l);
+ }
+ if (auxr->slen) {
+ bstr__memmove (b->data + s + acc - auxr->slen,
+ auxr->data, auxr->slen);
+ }
+ acc += delta;
+ }
+ b->data[b->slen] = (unsigned char) '\0';
+ }
+
+ done:;
+ if (static_d == d) d = NULL;
+ bstr__free (d);
+ if (auxf != find) bdestroy (auxf);
+ if (auxr != repl) bdestroy (auxr);
+ return ret;
+}
+
+/* int bfindreplace (bstring b, const_bstring find, const_bstring repl,
+ * int pos)
+ *
+ * Replace all occurrences of a find string with a replace string after a
+ * given point in a bstring.
+ */
+int bfindreplace (bstring b, const_bstring find, const_bstring repl, int pos) {
+ return findreplaceengine (b, find, repl, pos, binstr);
+}
+
+/* int bfindreplacecaseless (bstring b, const_bstring find, const_bstring repl,
+ * int pos)
+ *
+ * Replace all occurrences of a find string, ignoring case, with a replace
+ * string after a given point in a bstring.
+ */
+int bfindreplacecaseless (bstring b, const_bstring find, const_bstring repl, int pos) {
+ return findreplaceengine (b, find, repl, pos, binstrcaseless);
+}
+
+/* int binsertch (bstring b, int pos, int len, unsigned char fill)
+ *
+ * Inserts the character fill repeatedly into b at position pos for a
+ * length len. If the position pos is past the end of b, then the
+ * character "fill" is appended as necessary to make up the gap between the
+ * end of b and the position pos + len.
+ */
+int binsertch (bstring b, int pos, int len, unsigned char fill) {
+int d, l, i;
+
+ if (pos < 0 || b == NULL || b->slen < 0 || b->mlen < b->slen ||
+ b->mlen <= 0 || len < 0) return BSTR_ERR;
+
+ /* Compute the two possible end pointers */
+ d = b->slen + len;
+ l = pos + len;
+ if ((d|l) < 0) return BSTR_ERR;
+
+ if (l > d) {
+ /* Inserting past the end of the string */
+ if (balloc (b, l + 1) != BSTR_OK) return BSTR_ERR;
+ pos = b->slen;
+ b->slen = l;
+ } else {
+ /* Inserting in the middle of the string */
+ if (balloc (b, d + 1) != BSTR_OK) return BSTR_ERR;
+ for (i = d - 1; i >= l; i--) {
+ b->data[i] = b->data[i - len];
+ }
+ b->slen = d;
+ }
+
+ for (i=pos; i < l; i++) b->data[i] = fill;
+ b->data[b->slen] = (unsigned char) '\0';
+ return BSTR_OK;
+}
+
+/* int bpattern (bstring b, int len)
+ *
+ * Replicate the bstring, b in place, end to end repeatedly until it
+ * surpasses len characters, then chop the result to exactly len characters.
+ * This function operates in-place. The function will return with BSTR_ERR
+ * if b is NULL or of length 0, otherwise BSTR_OK is returned.
+ */
+int bpattern (bstring b, int len) {
+int i, d;
+
+ d = blength (b);
+ if (d <= 0 || len < 0 || balloc (b, len + 1) != BSTR_OK) return BSTR_ERR;
+ if (len > 0) {
+ if (d == 1) return bsetstr (b, len, NULL, b->data[0]);
+ for (i = d; i < len; i++) b->data[i] = b->data[i - d];
+ }
+ b->data[len] = (unsigned char) '\0';
+ b->slen = len;
+ return BSTR_OK;
+}
+
+#define BS_BUFF_SZ (1024)
+
+/* int breada (bstring b, bNread readPtr, void * parm)
+ *
+ * Use a finite buffer fread-like function readPtr to concatenate to the
+ * bstring b the entire contents of file-like source data in a roughly
+ * efficient way.
+ */
+int breada (bstring b, bNread readPtr, void * parm) {
+int i, l, n;
+
+ if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen ||
+ b->mlen <= 0 || readPtr == NULL) return BSTR_ERR;
+
+ i = b->slen;
+ for (n=i+16; ; n += ((n < BS_BUFF_SZ) ? n : BS_BUFF_SZ)) {
+ if (BSTR_OK != balloc (b, n + 1)) return BSTR_ERR;
+ l = (int) readPtr ((void *) (b->data + i), 1, n - i, parm);
+ i += l;
+ b->slen = i;
+ if (i < n) break;
+ }
+
+ b->data[i] = (unsigned char) '\0';
+ return BSTR_OK;
+}
+
+/* bstring bread (bNread readPtr, void * parm)
+ *
+ * Use a finite buffer fread-like function readPtr to create a bstring
+ * filled with the entire contents of file-like source data in a roughly
+ * efficient way.
+ */
+bstring bread (bNread readPtr, void * parm) {
+bstring buff;
+
+ if (0 > breada (buff = bfromcstr (""), readPtr, parm)) {
+ bdestroy (buff);
+ return NULL;
+ }
+ return buff;
+}
+
+/* int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator)
+ *
+ * Use an fgetc-like single character stream reading function (getcPtr) to
+ * obtain a sequence of characters which are concatenated to the end of the
+ * bstring b. The stream read is terminated by the passed in terminator
+ * parameter.
+ *
+ * If getcPtr returns with a negative number, or the terminator character
+ * (which is appended) is read, then the stream reading is halted and the
+ * function returns with a partial result in b. If there is an empty partial
+ * result, 1 is returned. If no characters are read, or there is some other
+ * detectable error, BSTR_ERR is returned.
+ */
+int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator) {
+int c, d, e;
+
+ if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen ||
+ b->mlen <= 0 || getcPtr == NULL) return BSTR_ERR;
+ d = 0;
+ e = b->mlen - 2;
+
+ while ((c = getcPtr (parm)) >= 0) {
+ if (d > e) {
+ b->slen = d;
+ if (balloc (b, d + 2) != BSTR_OK) return BSTR_ERR;
+ e = b->mlen - 2;
+ }
+ b->data[d] = (unsigned char) c;
+ d++;
+ if (c == terminator) break;
+ }
+
+ b->data[d] = (unsigned char) '\0';
+ b->slen = d;
+
+ return d == 0 && c < 0;
+}
+
+/* int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator)
+ *
+ * Use an fgetc-like single character stream reading function (getcPtr) to
+ * obtain a sequence of characters which are concatenated to the end of the
+ * bstring b. The stream read is terminated by the passed in terminator
+ * parameter.
+ *
+ * If getcPtr returns with a negative number, or the terminator character
+ * (which is appended) is read, then the stream reading is halted and the
+ * function returns with a partial result concatentated to b. If there is
+ * an empty partial result, 1 is returned. If no characters are read, or
+ * there is some other detectable error, BSTR_ERR is returned.
+ */
+int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator) {
+int c, d, e;
+
+ if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen ||
+ b->mlen <= 0 || getcPtr == NULL) return BSTR_ERR;
+ d = b->slen;
+ e = b->mlen - 2;
+
+ while ((c = getcPtr (parm)) >= 0) {
+ if (d > e) {
+ b->slen = d;
+ if (balloc (b, d + 2) != BSTR_OK) return BSTR_ERR;
+ e = b->mlen - 2;
+ }
+ b->data[d] = (unsigned char) c;
+ d++;
+ if (c == terminator) break;
+ }
+
+ b->data[d] = (unsigned char) '\0';
+ b->slen = d;
+
+ return d == 0 && c < 0;
+}
+
+/* bstring bgets (bNgetc getcPtr, void * parm, char terminator)
+ *
+ * Use an fgetc-like single character stream reading function (getcPtr) to
+ * obtain a sequence of characters which are concatenated into a bstring.
+ * The stream read is terminated by the passed in terminator function.
+ *
+ * If getcPtr returns with a negative number, or the terminator character
+ * (which is appended) is read, then the stream reading is halted and the
+ * result obtained thus far is returned. If no characters are read, or
+ * there is some other detectable error, NULL is returned.
+ */
+bstring bgets (bNgetc getcPtr, void * parm, char terminator) {
+bstring buff;
+
+ if (0 > bgetsa (buff = bfromcstr (""), getcPtr, parm, terminator) || 0 >= buff->slen) {
+ bdestroy (buff);
+ buff = NULL;
+ }
+ return buff;
+}
+
+struct bStream {
+ bstring buff; /* Buffer for over-reads */
+ void * parm; /* The stream handle for core stream */
+ bNread readFnPtr; /* fread compatible fnptr for core stream */
+ int isEOF; /* track file's EOF state */
+ int maxBuffSz;
+};
+
+/* struct bStream * bsopen (bNread readPtr, void * parm)
+ *
+ * Wrap a given open stream (described by a fread compatible function
+ * pointer and stream handle) into an open bStream suitable for the bstring
+ * library streaming functions.
+ */
+struct bStream * bsopen (bNread readPtr, void * parm) {
+struct bStream * s;
+
+ if (readPtr == NULL) return NULL;
+ s = (struct bStream *) bstr__alloc (sizeof (struct bStream));
+ if (s == NULL) return NULL;
+ s->parm = parm;
+ s->buff = bfromcstr ("");
+ s->readFnPtr = readPtr;
+ s->maxBuffSz = BS_BUFF_SZ;
+ s->isEOF = 0;
+ return s;
+}
+
+/* int bsbufflength (struct bStream * s, int sz)
+ *
+ * Set the length of the buffer used by the bStream. If sz is zero, the
+ * length is not set. This function returns with the previous length.
+ */
+int bsbufflength (struct bStream * s, int sz) {
+int oldSz;
+ if (s == NULL || sz < 0) return BSTR_ERR;
+ oldSz = s->maxBuffSz;
+ if (sz > 0) s->maxBuffSz = sz;
+ return oldSz;
+}
+
+int bseof (const struct bStream * s) {
+ if (s == NULL || s->readFnPtr == NULL) return BSTR_ERR;
+ return s->isEOF && (s->buff->slen == 0);
+}
+
+/* void * bsclose (struct bStream * s)
+ *
+ * Close the bStream, and return the handle to the stream that was originally
+ * used to open the given stream.
+ */
+void * bsclose (struct bStream * s) {
+void * parm;
+ if (s == NULL) return NULL;
+ s->readFnPtr = NULL;
+ if (s->buff) bdestroy (s->buff);
+ s->buff = NULL;
+ parm = s->parm;
+ s->parm = NULL;
+ s->isEOF = 1;
+ bstr__free (s);
+ return parm;
+}
+
+/* int bsreadlna (bstring r, struct bStream * s, char terminator)
+ *
+ * Read a bstring terminated by the terminator character or the end of the
+ * stream from the bStream (s) and return it into the parameter r. This
+ * function may read additional characters from the core stream that are not
+ * returned, but will be retained for subsequent read operations.
+ */
+int bsreadlna (bstring r, struct bStream * s, char terminator) {
+int i, l, ret, rlo;
+char * b;
+struct tagbstring x;
+
+ if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0 ||
+ r->slen < 0 || r->mlen < r->slen) return BSTR_ERR;
+ l = s->buff->slen;
+ if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR;
+ b = (char *) s->buff->data;
+ x.data = (unsigned char *) b;
+
+ /* First check if the current buffer holds the terminator */
+ b[l] = terminator; /* Set sentinel */
+ for (i=0; b[i] != terminator; i++) ;
+ if (i < l) {
+ x.slen = i + 1;
+ ret = bconcat (r, &x);
+ s->buff->slen = l;
+ if (BSTR_OK == ret) bdelete (s->buff, 0, i + 1);
+ return BSTR_OK;
+ }
+
+ rlo = r->slen;
+
+ /* If not then just concatenate the entire buffer to the output */
+ x.slen = l;
+ if (BSTR_OK != bconcat (r, &x)) return BSTR_ERR;
+
+ /* Perform direct in-place reads into the destination to allow for
+ the minimum of data-copies */
+ for (;;) {
+ if (BSTR_OK != balloc (r, r->slen + s->maxBuffSz + 1)) return BSTR_ERR;
+ b = (char *) (r->data + r->slen);
+ l = (int) s->readFnPtr (b, 1, s->maxBuffSz, s->parm);
+ if (l <= 0) {
+ r->data[r->slen] = (unsigned char) '\0';
+ s->buff->slen = 0;
+ s->isEOF = 1;
+ /* If nothing was read return with an error message */
+ return BSTR_ERR & -(r->slen == rlo);
+ }
+ b[l] = terminator; /* Set sentinel */
+ for (i=0; b[i] != terminator; i++) ;
+ if (i < l) break;
+ r->slen += l;
+ }
+
+ /* Terminator found, push over-read back to buffer */
+ i++;
+ r->slen += i;
+ s->buff->slen = l - i;
+ bstr__memcpy (s->buff->data, b + i, l - i);
+ r->data[r->slen] = (unsigned char) '\0';
+ return BSTR_OK;
+}
+
+/* int bsreadlnsa (bstring r, struct bStream * s, bstring term)
+ *
+ * Read a bstring terminated by any character in the term string or the end
+ * of the stream from the bStream (s) and return it into the parameter r.
+ * This function may read additional characters from the core stream that
+ * are not returned, but will be retained for subsequent read operations.
+ */
+int bsreadlnsa (bstring r, struct bStream * s, const_bstring term) {
+int i, l, ret, rlo;
+unsigned char * b;
+struct tagbstring x;
+struct charField cf;
+
+ if (s == NULL || s->buff == NULL || r == NULL || term == NULL ||
+ term->data == NULL || r->mlen <= 0 || r->slen < 0 ||
+ r->mlen < r->slen) return BSTR_ERR;
+ if (term->slen == 1) return bsreadlna (r, s, term->data[0]);
+ if (term->slen < 1 || buildCharField (&cf, term)) return BSTR_ERR;
+
+ l = s->buff->slen;
+ if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR;
+ b = (unsigned char *) s->buff->data;
+ x.data = b;
+
+ /* First check if the current buffer holds the terminator */
+ b[l] = term->data[0]; /* Set sentinel */
+ for (i=0; !testInCharField (&cf, b[i]); i++) ;
+ if (i < l) {
+ x.slen = i + 1;
+ ret = bconcat (r, &x);
+ s->buff->slen = l;
+ if (BSTR_OK == ret) bdelete (s->buff, 0, i + 1);
+ return BSTR_OK;
+ }
+
+ rlo = r->slen;
+
+ /* If not then just concatenate the entire buffer to the output */
+ x.slen = l;
+ if (BSTR_OK != bconcat (r, &x)) return BSTR_ERR;
+
+ /* Perform direct in-place reads into the destination to allow for
+ the minimum of data-copies */
+ for (;;) {
+ if (BSTR_OK != balloc (r, r->slen + s->maxBuffSz + 1)) return BSTR_ERR;
+ b = (unsigned char *) (r->data + r->slen);
+ l = (int) s->readFnPtr (b, 1, s->maxBuffSz, s->parm);
+ if (l <= 0) {
+ r->data[r->slen] = (unsigned char) '\0';
+ s->buff->slen = 0;
+ s->isEOF = 1;
+ /* If nothing was read return with an error message */
+ return BSTR_ERR & -(r->slen == rlo);
+ }
+
+ b[l] = term->data[0]; /* Set sentinel */
+ for (i=0; !testInCharField (&cf, b[i]); i++) ;
+ if (i < l) break;
+ r->slen += l;
+ }
+
+ /* Terminator found, push over-read back to buffer */
+ i++;
+ r->slen += i;
+ s->buff->slen = l - i;
+ bstr__memcpy (s->buff->data, b + i, l - i);
+ r->data[r->slen] = (unsigned char) '\0';
+ return BSTR_OK;
+}
+
+/* int bsreada (bstring r, struct bStream * s, int n)
+ *
+ * Read a bstring of length n (or, if it is fewer, as many bytes as is
+ * remaining) from the bStream. This function may read additional
+ * characters from the core stream that are not returned, but will be
+ * retained for subsequent read operations. This function will not read
+ * additional characters from the core stream beyond virtual stream pointer.
+ */
+int bsreada (bstring r, struct bStream * s, int n) {
+int l, ret, orslen;
+char * b;
+struct tagbstring x;
+
+ if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0
+ || r->slen < 0 || r->mlen < r->slen || n <= 0) return BSTR_ERR;
+
+ n += r->slen;
+ if (n <= 0) return BSTR_ERR;
+
+ l = s->buff->slen;
+
+ orslen = r->slen;
+
+ if (0 == l) {
+ if (s->isEOF) return BSTR_ERR;
+ if (r->mlen > n) {
+ l = (int) s->readFnPtr (r->data + r->slen, 1, n - r->slen, s->parm);
+ if (0 >= l || l > n - r->slen) {
+ s->isEOF = 1;
+ return BSTR_ERR;
+ }
+ r->slen += l;
+ r->data[r->slen] = (unsigned char) '\0';
+ return 0;
+ }
+ }
+
+ if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR;
+ b = (char *) s->buff->data;
+ x.data = (unsigned char *) b;
+
+ do {
+ if (l + r->slen >= n) {
+ x.slen = n - r->slen;
+ ret = bconcat (r, &x);
+ s->buff->slen = l;
+ if (BSTR_OK == ret) bdelete (s->buff, 0, x.slen);
+ return BSTR_ERR & -(r->slen == orslen);
+ }
+
+ x.slen = l;
+ if (BSTR_OK != bconcat (r, &x)) break;
+
+ l = n - r->slen;
+ if (l > s->maxBuffSz) l = s->maxBuffSz;
+
+ l = (int) s->readFnPtr (b, 1, l, s->parm);
+
+ } while (l > 0);
+ if (l < 0) l = 0;
+ if (l == 0) s->isEOF = 1;
+ s->buff->slen = l;
+ return BSTR_ERR & -(r->slen == orslen);
+}
+
+/* int bsreadln (bstring r, struct bStream * s, char terminator)
+ *
+ * Read a bstring terminated by the terminator character or the end of the
+ * stream from the bStream (s) and return it into the parameter r. This
+ * function may read additional characters from the core stream that are not
+ * returned, but will be retained for subsequent read operations.
+ */
+int bsreadln (bstring r, struct bStream * s, char terminator) {
+ if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0)
+ return BSTR_ERR;
+ if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR;
+ r->slen = 0;
+ return bsreadlna (r, s, terminator);
+}
+
+/* int bsreadlns (bstring r, struct bStream * s, bstring term)
+ *
+ * Read a bstring terminated by any character in the term string or the end
+ * of the stream from the bStream (s) and return it into the parameter r.
+ * This function may read additional characters from the core stream that
+ * are not returned, but will be retained for subsequent read operations.
+ */
+int bsreadlns (bstring r, struct bStream * s, const_bstring term) {
+ if (s == NULL || s->buff == NULL || r == NULL || term == NULL
+ || term->data == NULL || r->mlen <= 0) return BSTR_ERR;
+ if (term->slen == 1) return bsreadln (r, s, term->data[0]);
+ if (term->slen < 1) return BSTR_ERR;
+ if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR;
+ r->slen = 0;
+ return bsreadlnsa (r, s, term);
+}
+
+/* int bsread (bstring r, struct bStream * s, int n)
+ *
+ * Read a bstring of length n (or, if it is fewer, as many bytes as is
+ * remaining) from the bStream. This function may read additional
+ * characters from the core stream that are not returned, but will be
+ * retained for subsequent read operations. This function will not read
+ * additional characters from the core stream beyond virtual stream pointer.
+ */
+int bsread (bstring r, struct bStream * s, int n) {
+ if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0
+ || n <= 0) return BSTR_ERR;
+ if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR;
+ r->slen = 0;
+ return bsreada (r, s, n);
+}
+
+/* int bsunread (struct bStream * s, const_bstring b)
+ *
+ * Insert a bstring into the bStream at the current position. These
+ * characters will be read prior to those that actually come from the core
+ * stream.
+ */
+int bsunread (struct bStream * s, const_bstring b) {
+ if (s == NULL || s->buff == NULL) return BSTR_ERR;
+ return binsert (s->buff, 0, b, (unsigned char) '?');
+}
+
+/* int bspeek (bstring r, const struct bStream * s)
+ *
+ * Return the currently buffered characters from the bStream that will be
+ * read prior to reads from the core stream.
+ */
+int bspeek (bstring r, const struct bStream * s) {
+ if (s == NULL || s->buff == NULL) return BSTR_ERR;
+ return bassign (r, s->buff);
+}
+
+/* bstring bjoin (const struct bstrList * bl, const_bstring sep);
+ *
+ * Join the entries of a bstrList into one bstring by sequentially
+ * concatenating them with the sep string in between. If there is an error
+ * NULL is returned, otherwise a bstring with the correct result is returned.
+ */
+bstring bjoin (const struct bstrList * bl, const_bstring sep) {
+bstring b;
+int i, c, v;
+
+ if (bl == NULL || bl->qty < 0) return NULL;
+ if (sep != NULL && (sep->slen < 0 || sep->data == NULL)) return NULL;
+
+ for (i = 0, c = 1; i < bl->qty; i++) {
+ v = bl->entry[i]->slen;
+ if (v < 0) return NULL; /* Invalid input */
+ c += v;
+ if (c < 0) return NULL; /* Wrap around ?? */
+ }
+
+ if (sep != NULL) c += (bl->qty - 1) * sep->slen;
+
+ b = (bstring) bstr__alloc (sizeof (struct tagbstring));
+ if (NULL == b) return NULL; /* Out of memory */
+ b->data = (unsigned char *) bstr__alloc (c);
+ if (b->data == NULL) {
+ bstr__free (b);
+ return NULL;
+ }
+
+ b->mlen = c;
+ b->slen = c-1;
+
+ for (i = 0, c = 0; i < bl->qty; i++) {
+ if (i > 0 && sep != NULL) {
+ bstr__memcpy (b->data + c, sep->data, sep->slen);
+ c += sep->slen;
+ }
+ v = bl->entry[i]->slen;
+ bstr__memcpy (b->data + c, bl->entry[i]->data, v);
+ c += v;
+ }
+ b->data[c] = (unsigned char) '\0';
+ return b;
+}
+
+#define BSSSC_BUFF_LEN (256)
+
+/* int bssplitscb (struct bStream * s, const_bstring splitStr,
+ * int (* cb) (void * parm, int ofs, const_bstring entry), void * parm)
+ *
+ * Iterate the set of disjoint sequential substrings read from a stream
+ * divided by any of the characters in splitStr. An empty splitStr causes
+ * the whole stream to be iterated once.
+ *
+ * Note: At the point of calling the cb function, the bStream pointer is
+ * pointed exactly at the position right after having read the split
+ * character. The cb function can act on the stream by causing the bStream
+ * pointer to move, and bssplitscb will continue by starting the next split
+ * at the position of the pointer after the return from cb.
+ *
+ * However, if the cb causes the bStream s to be destroyed then the cb must
+ * return with a negative value, otherwise bssplitscb will continue in an
+ * undefined manner.
+ */
+int bssplitscb (struct bStream * s, const_bstring splitStr,
+ int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) {
+struct charField chrs;
+bstring buff;
+int i, p, ret;
+
+ if (cb == NULL || s == NULL || s->readFnPtr == NULL
+ || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR;
+
+ if (NULL == (buff = bfromcstr (""))) return BSTR_ERR;
+
+ if (splitStr->slen == 0) {
+ while (bsreada (buff, s, BSSSC_BUFF_LEN) >= 0) ;
+ if ((ret = cb (parm, 0, buff)) > 0)
+ ret = 0;
+ } else {
+ buildCharField (&chrs, splitStr);
+ ret = p = i = 0;
+ for (;;) {
+ if (i >= buff->slen) {
+ bsreada (buff, s, BSSSC_BUFF_LEN);
+ if (i >= buff->slen) {
+ if (0 < (ret = cb (parm, p, buff))) ret = 0;
+ break;
+ }
+ }
+ if (testInCharField (&chrs, buff->data[i])) {
+ struct tagbstring t;
+ unsigned char c;
+
+ blk2tbstr (t, buff->data + i + 1, buff->slen - (i + 1));
+ if ((ret = bsunread (s, &t)) < 0) break;
+ buff->slen = i;
+ c = buff->data[i];
+ buff->data[i] = (unsigned char) '\0';
+ if ((ret = cb (parm, p, buff)) < 0) break;
+ buff->data[i] = c;
+ buff->slen = 0;
+ p += i + 1;
+ i = -1;
+ }
+ i++;
+ }
+ }
+
+ bdestroy (buff);
+ return ret;
+}
+
+/* int bssplitstrcb (struct bStream * s, const_bstring splitStr,
+ * int (* cb) (void * parm, int ofs, const_bstring entry), void * parm)
+ *
+ * Iterate the set of disjoint sequential substrings read from a stream
+ * divided by the entire substring splitStr. An empty splitStr causes
+ * each character of the stream to be iterated.
+ *
+ * Note: At the point of calling the cb function, the bStream pointer is
+ * pointed exactly at the position right after having read the split
+ * character. The cb function can act on the stream by causing the bStream
+ * pointer to move, and bssplitscb will continue by starting the next split
+ * at the position of the pointer after the return from cb.
+ *
+ * However, if the cb causes the bStream s to be destroyed then the cb must
+ * return with a negative value, otherwise bssplitscb will continue in an
+ * undefined manner.
+ */
+int bssplitstrcb (struct bStream * s, const_bstring splitStr,
+ int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) {
+bstring buff;
+int i, p, ret;
+
+ if (cb == NULL || s == NULL || s->readFnPtr == NULL
+ || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR;
+
+ if (splitStr->slen == 1) return bssplitscb (s, splitStr, cb, parm);
+
+ if (NULL == (buff = bfromcstr (""))) return BSTR_ERR;
+
+ if (splitStr->slen == 0) {
+ for (i=0; bsreada (buff, s, BSSSC_BUFF_LEN) >= 0; i++) {
+ if ((ret = cb (parm, 0, buff)) < 0) {
+ bdestroy (buff);
+ return ret;
+ }
+ buff->slen = 0;
+ }
+ return BSTR_OK;
+ } else {
+ ret = p = i = 0;
+ for (i=p=0;;) {
+ if ((ret = binstr (buff, 0, splitStr)) >= 0) {
+ struct tagbstring t;
+ blk2tbstr (t, buff->data, ret);
+ i = ret + splitStr->slen;
+ if ((ret = cb (parm, p, &t)) < 0) break;
+ p += i;
+ bdelete (buff, 0, i);
+ } else {
+ bsreada (buff, s, BSSSC_BUFF_LEN);
+ if (bseof (s)) {
+ if ((ret = cb (parm, p, buff)) > 0) ret = 0;
+ break;
+ }
+ }
+ }
+ }
+
+ bdestroy (buff);
+ return ret;
+}
+
+/* int bstrListCreate (void)
+ *
+ * Create a bstrList.
+ */
+struct bstrList * bstrListCreate (void) {
+struct bstrList * sl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList));
+ if (sl) {
+ sl->entry = (bstring *) bstr__alloc (1*sizeof (bstring));
+ if (!sl->entry) {
+ bstr__free (sl);
+ sl = NULL;
+ } else {
+ sl->qty = 0;
+ sl->mlen = 1;
+ }
+ }
+ return sl;
+}
+
+/* int bstrListDestroy (struct bstrList * sl)
+ *
+ * Destroy a bstrList that has been created by bsplit, bsplits or bstrListCreate.
+ */
+int bstrListDestroy (struct bstrList * sl) {
+int i;
+ if (sl == NULL || sl->qty < 0) return BSTR_ERR;
+ for (i=0; i < sl->qty; i++) {
+ if (sl->entry[i]) {
+ bdestroy (sl->entry[i]);
+ sl->entry[i] = NULL;
+ }
+ }
+ sl->qty = -1;
+ sl->mlen = -1;
+ bstr__free (sl->entry);
+ sl->entry = NULL;
+ bstr__free (sl);
+ return BSTR_OK;
+}
+
+/* int bstrListAlloc (struct bstrList * sl, int msz)
+ *
+ * Ensure that there is memory for at least msz number of entries for the
+ * list.
+ */
+int bstrListAlloc (struct bstrList * sl, int msz) {
+bstring * l;
+int smsz;
+size_t nsz;
+ if (!sl || msz <= 0 || !sl->entry || sl->qty < 0 || sl->mlen <= 0 || sl->qty > sl->mlen) return BSTR_ERR;
+ if (sl->mlen >= msz) return BSTR_OK;
+ smsz = snapUpSize (msz);
+ nsz = ((size_t) smsz) * sizeof (bstring);
+ if (nsz < (size_t) smsz) return BSTR_ERR;
+ l = (bstring *) bstr__realloc (sl->entry, nsz);
+ if (!l) {
+ smsz = msz;
+ nsz = ((size_t) smsz) * sizeof (bstring);
+ l = (bstring *) bstr__realloc (sl->entry, nsz);
+ if (!l) return BSTR_ERR;
+ }
+ sl->mlen = smsz;
+ sl->entry = l;
+ return BSTR_OK;
+}
+
+/* int bstrListAllocMin (struct bstrList * sl, int msz)
+ *
+ * Try to allocate the minimum amount of memory for the list to include at
+ * least msz entries or sl->qty whichever is greater.
+ */
+int bstrListAllocMin (struct bstrList * sl, int msz) {
+bstring * l;
+size_t nsz;
+ if (!sl || msz <= 0 || !sl->entry || sl->qty < 0 || sl->mlen <= 0 || sl->qty > sl->mlen) return BSTR_ERR;
+ if (msz < sl->qty) msz = sl->qty;
+ if (sl->mlen == msz) return BSTR_OK;
+ nsz = ((size_t) msz) * sizeof (bstring);
+ if (nsz < (size_t) msz) return BSTR_ERR;
+ l = (bstring *) bstr__realloc (sl->entry, nsz);
+ if (!l) return BSTR_ERR;
+ sl->mlen = msz;
+ sl->entry = l;
+ return BSTR_OK;
+}
+
+/* int bsplitcb (const_bstring str, unsigned char splitChar, int pos,
+ * int (* cb) (void * parm, int ofs, int len), void * parm)
+ *
+ * Iterate the set of disjoint sequential substrings over str divided by the
+ * character in splitChar.
+ *
+ * Note: Non-destructive modification of str from within the cb function
+ * while performing this split is not undefined. bsplitcb behaves in
+ * sequential lock step with calls to cb. I.e., after returning from a cb
+ * that return a non-negative integer, bsplitcb continues from the position
+ * 1 character after the last detected split character and it will halt
+ * immediately if the length of str falls below this point. However, if the
+ * cb function destroys str, then it *must* return with a negative value,
+ * otherwise bsplitcb will continue in an undefined manner.
+ */
+int bsplitcb (const_bstring str, unsigned char splitChar, int pos,
+ int (* cb) (void * parm, int ofs, int len), void * parm) {
+int i, p, ret;
+
+ if (cb == NULL || str == NULL || pos < 0 || pos > str->slen)
+ return BSTR_ERR;
+
+ p = pos;
+ do {
+ for (i=p; i < str->slen; i++) {
+ if (str->data[i] == splitChar) break;
+ }
+ if ((ret = cb (parm, p, i - p)) < 0) return ret;
+ p = i + 1;
+ } while (p <= str->slen);
+ return BSTR_OK;
+}
+
+/* int bsplitscb (const_bstring str, const_bstring splitStr, int pos,
+ * int (* cb) (void * parm, int ofs, int len), void * parm)
+ *
+ * Iterate the set of disjoint sequential substrings over str divided by any
+ * of the characters in splitStr. An empty splitStr causes the whole str to
+ * be iterated once.
+ *
+ * Note: Non-destructive modification of str from within the cb function
+ * while performing this split is not undefined. bsplitscb behaves in
+ * sequential lock step with calls to cb. I.e., after returning from a cb
+ * that return a non-negative integer, bsplitscb continues from the position
+ * 1 character after the last detected split character and it will halt
+ * immediately if the length of str falls below this point. However, if the
+ * cb function destroys str, then it *must* return with a negative value,
+ * otherwise bsplitscb will continue in an undefined manner.
+ */
+int bsplitscb (const_bstring str, const_bstring splitStr, int pos,
+ int (* cb) (void * parm, int ofs, int len), void * parm) {
+struct charField chrs;
+int i, p, ret;
+
+ if (cb == NULL || str == NULL || pos < 0 || pos > str->slen
+ || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR;
+ if (splitStr->slen == 0) {
+ if ((ret = cb (parm, 0, str->slen)) > 0) ret = 0;
+ return ret;
+ }
+
+ if (splitStr->slen == 1)
+ return bsplitcb (str, splitStr->data[0], pos, cb, parm);
+
+ buildCharField (&chrs, splitStr);
+
+ p = pos;
+ do {
+ for (i=p; i < str->slen; i++) {
+ if (testInCharField (&chrs, str->data[i])) break;
+ }
+ if ((ret = cb (parm, p, i - p)) < 0) return ret;
+ p = i + 1;
+ } while (p <= str->slen);
+ return BSTR_OK;
+}
+
+/* int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos,
+ * int (* cb) (void * parm, int ofs, int len), void * parm)
+ *
+ * Iterate the set of disjoint sequential substrings over str divided by the
+ * substring splitStr. An empty splitStr causes the whole str to be
+ * iterated once.
+ *
+ * Note: Non-destructive modification of str from within the cb function
+ * while performing this split is not undefined. bsplitstrcb behaves in
+ * sequential lock step with calls to cb. I.e., after returning from a cb
+ * that return a non-negative integer, bsplitscb continues from the position
+ * 1 character after the last detected split character and it will halt
+ * immediately if the length of str falls below this point. However, if the
+ * cb function destroys str, then it *must* return with a negative value,
+ * otherwise bsplitscb will continue in an undefined manner.
+ */
+int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos,
+ int (* cb) (void * parm, int ofs, int len), void * parm) {
+int i, p, ret;
+
+ if (cb == NULL || str == NULL || pos < 0 || pos > str->slen
+ || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR;
+
+ if (0 == splitStr->slen) {
+ for (i=pos; i < str->slen; i++) {
+ if ((ret = cb (parm, i, 1)) < 0) return ret;
+ }
+ return BSTR_OK;
+ }
+
+ if (splitStr->slen == 1)
+ return bsplitcb (str, splitStr->data[0], pos, cb, parm);
+
+ for (i=p=pos; i <= str->slen - splitStr->slen; i++) {
+ if (0 == bstr__memcmp (splitStr->data, str->data + i, splitStr->slen)) {
+ if ((ret = cb (parm, p, i - p)) < 0) return ret;
+ i += splitStr->slen;
+ p = i;
+ }
+ }
+ if ((ret = cb (parm, p, str->slen - p)) < 0) return ret;
+ return BSTR_OK;
+}
+
+struct genBstrList {
+ bstring b;
+ struct bstrList * bl;
+};
+
+static int bscb (void * parm, int ofs, int len) {
+struct genBstrList * g = (struct genBstrList *) parm;
+ if (g->bl->qty >= g->bl->mlen) {
+ int mlen = g->bl->mlen * 2;
+ bstring * tbl;
+
+ while (g->bl->qty >= mlen) {
+ if (mlen < g->bl->mlen) return BSTR_ERR;
+ mlen += mlen;
+ }
+
+ tbl = (bstring *) bstr__realloc (g->bl->entry, sizeof (bstring) * mlen);
+ if (tbl == NULL) return BSTR_ERR;
+
+ g->bl->entry = tbl;
+ g->bl->mlen = mlen;
+ }
+
+ g->bl->entry[g->bl->qty] = bmidstr (g->b, ofs, len);
+ g->bl->qty++;
+ return BSTR_OK;
+}
+
+/* struct bstrList * bsplit (const_bstring str, unsigned char splitChar)
+ *
+ * Create an array of sequential substrings from str divided by the character
+ * splitChar.
+ */
+struct bstrList * bsplit (const_bstring str, unsigned char splitChar) {
+struct genBstrList g;
+
+ if (str == NULL || str->data == NULL || str->slen < 0) return NULL;
+
+ g.bl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList));
+ if (g.bl == NULL) return NULL;
+ g.bl->mlen = 4;
+ g.bl->entry = (bstring *) bstr__alloc (g.bl->mlen * sizeof (bstring));
+ if (NULL == g.bl->entry) {
+ bstr__free (g.bl);
+ return NULL;
+ }
+
+ g.b = (bstring) str;
+ g.bl->qty = 0;
+ if (bsplitcb (str, splitChar, 0, bscb, &g) < 0) {
+ bstrListDestroy (g.bl);
+ return NULL;
+ }
+ return g.bl;
+}
+
+/* struct bstrList * bsplitstr (const_bstring str, const_bstring splitStr)
+ *
+ * Create an array of sequential substrings from str divided by the entire
+ * substring splitStr.
+ */
+struct bstrList * bsplitstr (const_bstring str, const_bstring splitStr) {
+struct genBstrList g;
+
+ if (str == NULL || str->data == NULL || str->slen < 0) return NULL;
+
+ g.bl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList));
+ if (g.bl == NULL) return NULL;
+ g.bl->mlen = 4;
+ g.bl->entry = (bstring *) bstr__alloc (g.bl->mlen * sizeof (bstring));
+ if (NULL == g.bl->entry) {
+ bstr__free (g.bl);
+ return NULL;
+ }
+
+ g.b = (bstring) str;
+ g.bl->qty = 0;
+ if (bsplitstrcb (str, splitStr, 0, bscb, &g) < 0) {
+ bstrListDestroy (g.bl);
+ return NULL;
+ }
+ return g.bl;
+}
+
+/* struct bstrList * bsplits (const_bstring str, bstring splitStr)
+ *
+ * Create an array of sequential substrings from str divided by any of the
+ * characters in splitStr. An empty splitStr causes a single entry bstrList
+ * containing a copy of str to be returned.
+ */
+struct bstrList * bsplits (const_bstring str, const_bstring splitStr) {
+struct genBstrList g;
+
+ if ( str == NULL || str->slen < 0 || str->data == NULL ||
+ splitStr == NULL || splitStr->slen < 0 || splitStr->data == NULL)
+ return NULL;
+
+ g.bl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList));
+ if (g.bl == NULL) return NULL;
+ g.bl->mlen = 4;
+ g.bl->entry = (bstring *) bstr__alloc (g.bl->mlen * sizeof (bstring));
+ if (NULL == g.bl->entry) {
+ bstr__free (g.bl);
+ return NULL;
+ }
+ g.b = (bstring) str;
+ g.bl->qty = 0;
+
+ if (bsplitscb (str, splitStr, 0, bscb, &g) < 0) {
+ bstrListDestroy (g.bl);
+ return NULL;
+ }
+ return g.bl;
+}
+
+#if defined (__TURBOC__) && !defined (__BORLANDC__)
+# ifndef BSTRLIB_NOVSNP
+# define BSTRLIB_NOVSNP
+# endif
+#endif
+
+/* Give WATCOM C/C++, MSVC some latitude for their non-support of vsnprintf */
+#if defined(__WATCOMC__) || defined(_MSC_VER)
+#define exvsnprintf(r,b,n,f,a) {r = _vsnprintf (b,n,f,a);}
+#else
+#ifdef BSTRLIB_NOVSNP
+/* This is just a hack. If you are using a system without a vsnprintf, it is
+ not recommended that bformat be used at all. */
+#define exvsnprintf(r,b,n,f,a) {vsprintf (b,f,a); r = -1;}
+#define START_VSNBUFF (256)
+#else
+
+#if defined(__GNUC__) && !defined(__APPLE__)
+/* Something is making gcc complain about this prototype not being here, so
+ I've just gone ahead and put it in. */
+extern int vsnprintf (char *buf, size_t count, const char *format, va_list arg);
+#endif
+
+#define exvsnprintf(r,b,n,f,a) {r = vsnprintf (b,n,f,a);}
+#endif
+#endif
+
+#if !defined (BSTRLIB_NOVSNP)
+
+#ifndef START_VSNBUFF
+#define START_VSNBUFF (16)
+#endif
+
+/* On IRIX vsnprintf returns n-1 when the operation would overflow the target
+ buffer, WATCOM and MSVC both return -1, while C99 requires that the
+ returned value be exactly what the length would be if the buffer would be
+ large enough. This leads to the idea that if the return value is larger
+ than n, then changing n to the return value will reduce the number of
+ iterations required. */
+
+/* int bformata (bstring b, const char * fmt, ...)
+ *
+ * After the first parameter, it takes the same parameters as printf (), but
+ * rather than outputting results to stdio, it appends the results to
+ * a bstring which contains what would have been output. Note that if there
+ * is an early generation of a '\0' character, the bstring will be truncated
+ * to this end point.
+ */
+int bformata (bstring b, const char * fmt, ...) {
+va_list arglist;
+bstring buff;
+int n, r;
+
+ if (b == NULL || fmt == NULL || b->data == NULL || b->mlen <= 0
+ || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR;
+
+ /* Since the length is not determinable beforehand, a search is
+ performed using the truncating "vsnprintf" call (to avoid buffer
+ overflows) on increasing potential sizes for the output result. */
+
+ if ((n = (int) (2*strlen (fmt))) < START_VSNBUFF) n = START_VSNBUFF;
+ if (NULL == (buff = bfromcstralloc (n + 2, ""))) {
+ n = 1;
+ if (NULL == (buff = bfromcstralloc (n + 2, ""))) return BSTR_ERR;
+ }
+
+ for (;;) {
+ va_start (arglist, fmt);
+ exvsnprintf (r, (char *) buff->data, n + 1, fmt, arglist);
+ va_end (arglist);
+
+ buff->data[n] = (unsigned char) '\0';
+ buff->slen = (int) (strlen) ((char *) buff->data);
+
+ if (buff->slen < n) break;
+
+ if (r > n) n = r; else n += n;
+
+ if (BSTR_OK != balloc (buff, n + 2)) {
+ bdestroy (buff);
+ return BSTR_ERR;
+ }
+ }
+
+ r = bconcat (b, buff);
+ bdestroy (buff);
+ return r;
+}
+
+/* int bassignformat (bstring b, const char * fmt, ...)
+ *
+ * After the first parameter, it takes the same parameters as printf (), but
+ * rather than outputting results to stdio, it outputs the results to
+ * the bstring parameter b. Note that if there is an early generation of a
+ * '\0' character, the bstring will be truncated to this end point.
+ */
+int bassignformat (bstring b, const char * fmt, ...) {
+va_list arglist;
+bstring buff;
+int n, r;
+
+ if (b == NULL || fmt == NULL || b->data == NULL || b->mlen <= 0
+ || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR;
+
+ /* Since the length is not determinable beforehand, a search is
+ performed using the truncating "vsnprintf" call (to avoid buffer
+ overflows) on increasing potential sizes for the output result. */
+
+ if ((n = (int) (2*strlen (fmt))) < START_VSNBUFF) n = START_VSNBUFF;
+ if (NULL == (buff = bfromcstralloc (n + 2, ""))) {
+ n = 1;
+ if (NULL == (buff = bfromcstralloc (n + 2, ""))) return BSTR_ERR;
+ }
+
+ for (;;) {
+ va_start (arglist, fmt);
+ exvsnprintf (r, (char *) buff->data, n + 1, fmt, arglist);
+ va_end (arglist);
+
+ buff->data[n] = (unsigned char) '\0';
+ buff->slen = (int) (strlen) ((char *) buff->data);
+
+ if (buff->slen < n) break;
+
+ if (r > n) n = r; else n += n;
+
+ if (BSTR_OK != balloc (buff, n + 2)) {
+ bdestroy (buff);
+ return BSTR_ERR;
+ }
+ }
+
+ r = bassign (b, buff);
+ bdestroy (buff);
+ return r;
+}
+
+/* bstring bformat (const char * fmt, ...)
+ *
+ * Takes the same parameters as printf (), but rather than outputting results
+ * to stdio, it forms a bstring which contains what would have been output.
+ * Note that if there is an early generation of a '\0' character, the
+ * bstring will be truncated to this end point.
+ */
+bstring bformat (const char * fmt, ...) {
+va_list arglist;
+bstring buff;
+int n, r;
+
+ if (fmt == NULL) return NULL;
+
+ /* Since the length is not determinable beforehand, a search is
+ performed using the truncating "vsnprintf" call (to avoid buffer
+ overflows) on increasing potential sizes for the output result. */
+
+ if ((n = (int) (2*strlen (fmt))) < START_VSNBUFF) n = START_VSNBUFF;
+ if (NULL == (buff = bfromcstralloc (n + 2, ""))) {
+ n = 1;
+ if (NULL == (buff = bfromcstralloc (n + 2, ""))) return NULL;
+ }
+
+ for (;;) {
+ va_start (arglist, fmt);
+ exvsnprintf (r, (char *) buff->data, n + 1, fmt, arglist);
+ va_end (arglist);
+
+ buff->data[n] = (unsigned char) '\0';
+ buff->slen = (int) (strlen) ((char *) buff->data);
+
+ if (buff->slen < n) break;
+
+ if (r > n) n = r; else n += n;
+
+ if (BSTR_OK != balloc (buff, n + 2)) {
+ bdestroy (buff);
+ return NULL;
+ }
+ }
+
+ return buff;
+}
+
+/* int bvcformata (bstring b, int count, const char * fmt, va_list arglist)
+ *
+ * The bvcformata function formats data under control of the format control
+ * string fmt and attempts to append the result to b. The fmt parameter is
+ * the same as that of the printf function. The variable argument list is
+ * replaced with arglist, which has been initialized by the va_start macro.
+ * The size of the appended output is upper bounded by count. If the
+ * required output exceeds count, the string b is not augmented with any
+ * contents and a value below BSTR_ERR is returned. If a value below -count
+ * is returned then it is recommended that the negative of this value be
+ * used as an update to the count in a subsequent pass. On other errors,
+ * such as running out of memory, parameter errors or numeric wrap around
+ * BSTR_ERR is returned. BSTR_OK is returned when the output is successfully
+ * generated and appended to b.
+ *
+ * Note: There is no sanity checking of arglist, and this function is
+ * destructive of the contents of b from the b->slen point onward. If there
+ * is an early generation of a '\0' character, the bstring will be truncated
+ * to this end point.
+ */
+int bvcformata (bstring b, int count, const char * fmt, va_list arg) {
+int n, r, l;
+
+ if (b == NULL || fmt == NULL || count <= 0 || b->data == NULL
+ || b->mlen <= 0 || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR;
+
+ if (count > (n = b->slen + count) + 2) return BSTR_ERR;
+ if (BSTR_OK != balloc (b, n + 2)) return BSTR_ERR;
+
+ exvsnprintf (r, (char *) b->data + b->slen, count + 2, fmt, arg);
+
+ /* Did the operation complete successfully within bounds? */
+ for (l = b->slen; l <= n; l++) {
+ if ('\0' == b->data[l]) {
+ b->slen = l;
+ return BSTR_OK;
+ }
+ }
+
+ /* Abort, since the buffer was not large enough. The return value
+ tries to help set what the retry length should be. */
+
+ b->data[b->slen] = '\0';
+ if (r > count + 1) { /* Does r specify a particular target length? */
+ n = r;
+ } else {
+ n = count + count; /* If not, just double the size of count */
+ if (count > n) n = INT_MAX;
+ }
+ n = -n;
+
+ if (n > BSTR_ERR-1) n = BSTR_ERR-1;
+ return n;
+}
+
+#endif
diff --git a/src/bstrlib.h b/src/bstrlib.h
new file mode 100644
index 0000000..c8fa694
--- /dev/null
+++ b/src/bstrlib.h
@@ -0,0 +1,304 @@
+/*
+ * This source file is part of the bstring string library. This code was
+ * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause
+ * BSD open source license or GPL v2.0. Refer to the accompanying documentation
+ * for details on usage and license.
+ */
+
+/*
+ * bstrlib.h
+ *
+ * This file is the header file for the core module for implementing the
+ * bstring functions.
+ */
+
+#ifndef BSTRLIB_INCLUDE
+#define BSTRLIB_INCLUDE
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdarg.h>
+#include <string.h>
+#include <limits.h>
+#include <ctype.h>
+
+#if !defined (BSTRLIB_VSNP_OK) && !defined (BSTRLIB_NOVSNP)
+# if defined (__TURBOC__) && !defined (__BORLANDC__)
+# define BSTRLIB_NOVSNP
+# endif
+#endif
+
+#define BSTR_ERR (-1)
+#define BSTR_OK (0)
+#define BSTR_BS_BUFF_LENGTH_GET (0)
+
+typedef struct tagbstring * bstring;
+typedef const struct tagbstring * const_bstring;
+
+/* Copy functions */
+#define cstr2bstr bfromcstr
+extern bstring bfromcstr (const char * str);
+extern bstring bfromcstralloc (int mlen, const char * str);
+extern bstring blk2bstr (const void * blk, int len);
+extern char * bstr2cstr (const_bstring s, char z);
+extern int bcstrfree (char * s);
+extern bstring bstrcpy (const_bstring b1);
+extern int bassign (bstring a, const_bstring b);
+extern int bassignmidstr (bstring a, const_bstring b, int left, int len);
+extern int bassigncstr (bstring a, const char * str);
+extern int bassignblk (bstring a, const void * s, int len);
+
+/* Destroy function */
+extern int bdestroy (bstring b);
+
+/* Space allocation hinting functions */
+extern int balloc (bstring s, int len);
+extern int ballocmin (bstring b, int len);
+
+/* Substring extraction */
+extern bstring bmidstr (const_bstring b, int left, int len);
+
+/* Various standard manipulations */
+extern int bconcat (bstring b0, const_bstring b1);
+extern int bconchar (bstring b0, char c);
+extern int bcatcstr (bstring b, const char * s);
+extern int bcatblk (bstring b, const void * s, int len);
+extern int binsert (bstring s1, int pos, const_bstring s2, unsigned char fill);
+extern int binsertch (bstring s1, int pos, int len, unsigned char fill);
+extern int breplace (bstring b1, int pos, int len, const_bstring b2, unsigned char fill);
+extern int bdelete (bstring s1, int pos, int len);
+extern int bsetstr (bstring b0, int pos, const_bstring b1, unsigned char fill);
+extern int btrunc (bstring b, int n);
+
+/* Scan/search functions */
+extern int bstricmp (const_bstring b0, const_bstring b1);
+extern int bstrnicmp (const_bstring b0, const_bstring b1, int n);
+extern int biseqcaseless (const_bstring b0, const_bstring b1);
+extern int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len);
+extern int biseq (const_bstring b0, const_bstring b1);
+extern int bisstemeqblk (const_bstring b0, const void * blk, int len);
+extern int biseqcstr (const_bstring b, const char * s);
+extern int biseqcstrcaseless (const_bstring b, const char * s);
+extern int bstrcmp (const_bstring b0, const_bstring b1);
+extern int bstrncmp (const_bstring b0, const_bstring b1, int n);
+extern int binstr (const_bstring s1, int pos, const_bstring s2);
+extern int binstrr (const_bstring s1, int pos, const_bstring s2);
+extern int binstrcaseless (const_bstring s1, int pos, const_bstring s2);
+extern int binstrrcaseless (const_bstring s1, int pos, const_bstring s2);
+extern int bstrchrp (const_bstring b, int c, int pos);
+extern int bstrrchrp (const_bstring b, int c, int pos);
+#define bstrchr(b,c) bstrchrp ((b), (c), 0)
+#define bstrrchr(b,c) bstrrchrp ((b), (c), blength(b)-1)
+extern int binchr (const_bstring b0, int pos, const_bstring b1);
+extern int binchrr (const_bstring b0, int pos, const_bstring b1);
+extern int bninchr (const_bstring b0, int pos, const_bstring b1);
+extern int bninchrr (const_bstring b0, int pos, const_bstring b1);
+extern int bfindreplace (bstring b, const_bstring find, const_bstring repl, int pos);
+extern int bfindreplacecaseless (bstring b, const_bstring find, const_bstring repl, int pos);
+
+/* List of string container functions */
+struct bstrList {
+ int qty, mlen;
+ bstring * entry;
+};
+extern struct bstrList * bstrListCreate (void);
+extern int bstrListDestroy (struct bstrList * sl);
+extern int bstrListAlloc (struct bstrList * sl, int msz);
+extern int bstrListAllocMin (struct bstrList * sl, int msz);
+
+/* String split and join functions */
+extern struct bstrList * bsplit (const_bstring str, unsigned char splitChar);
+extern struct bstrList * bsplits (const_bstring str, const_bstring splitStr);
+extern struct bstrList * bsplitstr (const_bstring str, const_bstring splitStr);
+extern bstring bjoin (const struct bstrList * bl, const_bstring sep);
+extern int bsplitcb (const_bstring str, unsigned char splitChar, int pos,
+ int (* cb) (void * parm, int ofs, int len), void * parm);
+extern int bsplitscb (const_bstring str, const_bstring splitStr, int pos,
+ int (* cb) (void * parm, int ofs, int len), void * parm);
+extern int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos,
+ int (* cb) (void * parm, int ofs, int len), void * parm);
+
+/* Miscellaneous functions */
+extern int bpattern (bstring b, int len);
+extern int btoupper (bstring b);
+extern int btolower (bstring b);
+extern int bltrimws (bstring b);
+extern int brtrimws (bstring b);
+extern int btrimws (bstring b);
+
+/* <*>printf format functions */
+#if !defined (BSTRLIB_NOVSNP)
+extern bstring bformat (const char * fmt, ...);
+extern int bformata (bstring b, const char * fmt, ...);
+extern int bassignformat (bstring b, const char * fmt, ...);
+extern int bvcformata (bstring b, int count, const char * fmt, va_list arglist);
+
+#define bvformata(ret, b, fmt, lastarg) { \
+bstring bstrtmp_b = (b); \
+const char * bstrtmp_fmt = (fmt); \
+int bstrtmp_r = BSTR_ERR, bstrtmp_sz = 16; \
+ for (;;) { \
+ va_list bstrtmp_arglist; \
+ va_start (bstrtmp_arglist, lastarg); \
+ bstrtmp_r = bvcformata (bstrtmp_b, bstrtmp_sz, bstrtmp_fmt, bstrtmp_arglist); \
+ va_end (bstrtmp_arglist); \
+ if (bstrtmp_r >= 0) { /* Everything went ok */ \
+ bstrtmp_r = BSTR_OK; \
+ break; \
+ } else if (-bstrtmp_r <= bstrtmp_sz) { /* A real error? */ \
+ bstrtmp_r = BSTR_ERR; \
+ break; \
+ } \
+ bstrtmp_sz = -bstrtmp_r; /* Doubled or target size */ \
+ } \
+ ret = bstrtmp_r; \
+}
+
+#endif
+
+typedef int (*bNgetc) (void *parm);
+typedef size_t (* bNread) (void *buff, size_t elsize, size_t nelem, void *parm);
+
+/* Input functions */
+extern bstring bgets (bNgetc getcPtr, void * parm, char terminator);
+extern bstring bread (bNread readPtr, void * parm);
+extern int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator);
+extern int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator);
+extern int breada (bstring b, bNread readPtr, void * parm);
+
+/* Stream functions */
+extern struct bStream * bsopen (bNread readPtr, void * parm);
+extern void * bsclose (struct bStream * s);
+extern int bsbufflength (struct bStream * s, int sz);
+extern int bsreadln (bstring b, struct bStream * s, char terminator);
+extern int bsreadlns (bstring r, struct bStream * s, const_bstring term);
+extern int bsread (bstring b, struct bStream * s, int n);
+extern int bsreadlna (bstring b, struct bStream * s, char terminator);
+extern int bsreadlnsa (bstring r, struct bStream * s, const_bstring term);
+extern int bsreada (bstring b, struct bStream * s, int n);
+extern int bsunread (struct bStream * s, const_bstring b);
+extern int bspeek (bstring r, const struct bStream * s);
+extern int bssplitscb (struct bStream * s, const_bstring splitStr,
+ int (* cb) (void * parm, int ofs, const_bstring entry), void * parm);
+extern int bssplitstrcb (struct bStream * s, const_bstring splitStr,
+ int (* cb) (void * parm, int ofs, const_bstring entry), void * parm);
+extern int bseof (const struct bStream * s);
+
+struct tagbstring {
+ int mlen;
+ int slen;
+ unsigned char * data;
+};
+
+/* Accessor macros */
+#define blengthe(b, e) (((b) == (void *)0 || (b)->slen < 0) ? (int)(e) : ((b)->slen))
+#define blength(b) (blengthe ((b), 0))
+#define bdataofse(b, o, e) (((b) == (void *)0 || (b)->data == (void*)0) ? (char *)(e) : ((char *)(b)->data) + (o))
+#define bdataofs(b, o) (bdataofse ((b), (o), (void *)0))
+#define bdatae(b, e) (bdataofse (b, 0, e))
+#define bdata(b) (bdataofs (b, 0))
+#define bchare(b, p, e) ((((unsigned)(p)) < (unsigned)blength(b)) ? ((b)->data[(p)]) : (e))
+#define bchar(b, p) bchare ((b), (p), '\0')
+
+/* Static constant string initialization macro */
+#define bsStaticMlen(q,m) {(m), (int) sizeof(q)-1, (unsigned char *) ("" q "")}
+#if defined(_MSC_VER)
+/* There are many versions of MSVC which emit __LINE__ as a non-constant. */
+# define bsStatic(q) bsStaticMlen(q,-32)
+#endif
+#ifndef bsStatic
+# define bsStatic(q) bsStaticMlen(q,-__LINE__)
+#endif
+
+/* Static constant block parameter pair */
+#define bsStaticBlkParms(q) ((void *)("" q "")), ((int) sizeof(q)-1)
+
+/* Reference building macros */
+#define cstr2tbstr btfromcstr
+#define btfromcstr(t,s) { \
+ (t).data = (unsigned char *) (s); \
+ (t).slen = ((t).data) ? ((int) (strlen) ((char *)(t).data)) : 0; \
+ (t).mlen = -1; \
+}
+#define blk2tbstr(t,s,l) { \
+ (t).data = (unsigned char *) (s); \
+ (t).slen = l; \
+ (t).mlen = -1; \
+}
+#define btfromblk(t,s,l) blk2tbstr(t,s,l)
+#define bmid2tbstr(t,b,p,l) { \
+ const_bstring bstrtmp_s = (b); \
+ if (bstrtmp_s && bstrtmp_s->data && bstrtmp_s->slen >= 0) { \
+ int bstrtmp_left = (p); \
+ int bstrtmp_len = (l); \
+ if (bstrtmp_left < 0) { \
+ bstrtmp_len += bstrtmp_left; \
+ bstrtmp_left = 0; \
+ } \
+ if (bstrtmp_len > bstrtmp_s->slen - bstrtmp_left) \
+ bstrtmp_len = bstrtmp_s->slen - bstrtmp_left; \
+ if (bstrtmp_len <= 0) { \
+ (t).data = (unsigned char *)""; \
+ (t).slen = 0; \
+ } else { \
+ (t).data = bstrtmp_s->data + bstrtmp_left; \
+ (t).slen = bstrtmp_len; \
+ } \
+ } else { \
+ (t).data = (unsigned char *)""; \
+ (t).slen = 0; \
+ } \
+ (t).mlen = -__LINE__; \
+}
+#define btfromblkltrimws(t,s,l) { \
+ int bstrtmp_idx = 0, bstrtmp_len = (l); \
+ unsigned char * bstrtmp_s = (s); \
+ if (bstrtmp_s && bstrtmp_len >= 0) { \
+ for (; bstrtmp_idx < bstrtmp_len; bstrtmp_idx++) { \
+ if (!isspace (bstrtmp_s[bstrtmp_idx])) break; \
+ } \
+ } \
+ (t).data = bstrtmp_s + bstrtmp_idx; \
+ (t).slen = bstrtmp_len - bstrtmp_idx; \
+ (t).mlen = -__LINE__; \
+}
+#define btfromblkrtrimws(t,s,l) { \
+ int bstrtmp_len = (l) - 1; \
+ unsigned char * bstrtmp_s = (s); \
+ if (bstrtmp_s && bstrtmp_len >= 0) { \
+ for (; bstrtmp_len >= 0; bstrtmp_len--) { \
+ if (!isspace (bstrtmp_s[bstrtmp_len])) break; \
+ } \
+ } \
+ (t).data = bstrtmp_s; \
+ (t).slen = bstrtmp_len + 1; \
+ (t).mlen = -__LINE__; \
+}
+#define btfromblktrimws(t,s,l) { \
+ int bstrtmp_idx = 0, bstrtmp_len = (l) - 1; \
+ unsigned char * bstrtmp_s = (s); \
+ if (bstrtmp_s && bstrtmp_len >= 0) { \
+ for (; bstrtmp_idx <= bstrtmp_len; bstrtmp_idx++) { \
+ if (!isspace (bstrtmp_s[bstrtmp_idx])) break; \
+ } \
+ for (; bstrtmp_len >= bstrtmp_idx; bstrtmp_len--) { \
+ if (!isspace (bstrtmp_s[bstrtmp_len])) break; \
+ } \
+ } \
+ (t).data = bstrtmp_s + bstrtmp_idx; \
+ (t).slen = bstrtmp_len + 1 - bstrtmp_idx; \
+ (t).mlen = -__LINE__; \
+}
+
+/* Write protection macros */
+#define bwriteprotect(t) { if ((t).mlen >= 0) (t).mlen = -1; }
+#define bwriteallow(t) { if ((t).mlen == -1) (t).mlen = (t).slen + ((t).slen == 0); }
+#define biswriteprotected(t) ((t).mlen <= 0)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/case_fold_switch.c b/src/case_fold_switch.c
new file mode 100644
index 0000000..70fdd75
--- /dev/null
+++ b/src/case_fold_switch.c
@@ -0,0 +1,2637 @@
+ switch (c) {
+ case 0x0041:
+ bufpush(0x0061);
+ break;
+ case 0x0042:
+ bufpush(0x0062);
+ break;
+ case 0x0043:
+ bufpush(0x0063);
+ break;
+ case 0x0044:
+ bufpush(0x0064);
+ break;
+ case 0x0045:
+ bufpush(0x0065);
+ break;
+ case 0x0046:
+ bufpush(0x0066);
+ break;
+ case 0x0047:
+ bufpush(0x0067);
+ break;
+ case 0x0048:
+ bufpush(0x0068);
+ break;
+ case 0x0049:
+ bufpush(0x0069);
+ break;
+ case 0x004A:
+ bufpush(0x006A);
+ break;
+ case 0x004B:
+ bufpush(0x006B);
+ break;
+ case 0x004C:
+ bufpush(0x006C);
+ break;
+ case 0x004D:
+ bufpush(0x006D);
+ break;
+ case 0x004E:
+ bufpush(0x006E);
+ break;
+ case 0x004F:
+ bufpush(0x006F);
+ break;
+ case 0x0050:
+ bufpush(0x0070);
+ break;
+ case 0x0051:
+ bufpush(0x0071);
+ break;
+ case 0x0052:
+ bufpush(0x0072);
+ break;
+ case 0x0053:
+ bufpush(0x0073);
+ break;
+ case 0x0054:
+ bufpush(0x0074);
+ break;
+ case 0x0055:
+ bufpush(0x0075);
+ break;
+ case 0x0056:
+ bufpush(0x0076);
+ break;
+ case 0x0057:
+ bufpush(0x0077);
+ break;
+ case 0x0058:
+ bufpush(0x0078);
+ break;
+ case 0x0059:
+ bufpush(0x0079);
+ break;
+ case 0x005A:
+ bufpush(0x007A);
+ break;
+ case 0x00B5:
+ bufpush(0x03BC);
+ break;
+ case 0x00C0:
+ bufpush(0x00E0);
+ break;
+ case 0x00C1:
+ bufpush(0x00E1);
+ break;
+ case 0x00C2:
+ bufpush(0x00E2);
+ break;
+ case 0x00C3:
+ bufpush(0x00E3);
+ break;
+ case 0x00C4:
+ bufpush(0x00E4);
+ break;
+ case 0x00C5:
+ bufpush(0x00E5);
+ break;
+ case 0x00C6:
+ bufpush(0x00E6);
+ break;
+ case 0x00C7:
+ bufpush(0x00E7);
+ break;
+ case 0x00C8:
+ bufpush(0x00E8);
+ break;
+ case 0x00C9:
+ bufpush(0x00E9);
+ break;
+ case 0x00CA:
+ bufpush(0x00EA);
+ break;
+ case 0x00CB:
+ bufpush(0x00EB);
+ break;
+ case 0x00CC:
+ bufpush(0x00EC);
+ break;
+ case 0x00CD:
+ bufpush(0x00ED);
+ break;
+ case 0x00CE:
+ bufpush(0x00EE);
+ break;
+ case 0x00CF:
+ bufpush(0x00EF);
+ break;
+ case 0x00D0:
+ bufpush(0x00F0);
+ break;
+ case 0x00D1:
+ bufpush(0x00F1);
+ break;
+ case 0x00D2:
+ bufpush(0x00F2);
+ break;
+ case 0x00D3:
+ bufpush(0x00F3);
+ break;
+ case 0x00D4:
+ bufpush(0x00F4);
+ break;
+ case 0x00D5:
+ bufpush(0x00F5);
+ break;
+ case 0x00D6:
+ bufpush(0x00F6);
+ break;
+ case 0x00D8:
+ bufpush(0x00F8);
+ break;
+ case 0x00D9:
+ bufpush(0x00F9);
+ break;
+ case 0x00DA:
+ bufpush(0x00FA);
+ break;
+ case 0x00DB:
+ bufpush(0x00FB);
+ break;
+ case 0x00DC:
+ bufpush(0x00FC);
+ break;
+ case 0x00DD:
+ bufpush(0x00FD);
+ break;
+ case 0x00DE:
+ bufpush(0x00FE);
+ break;
+ case 0x00DF:
+ bufpush(0x0073);
+ bufpush(0x0073);
+ break;
+ case 0x0100:
+ bufpush(0x0101);
+ break;
+ case 0x0102:
+ bufpush(0x0103);
+ break;
+ case 0x0104:
+ bufpush(0x0105);
+ break;
+ case 0x0106:
+ bufpush(0x0107);
+ break;
+ case 0x0108:
+ bufpush(0x0109);
+ break;
+ case 0x010A:
+ bufpush(0x010B);
+ break;
+ case 0x010C:
+ bufpush(0x010D);
+ break;
+ case 0x010E:
+ bufpush(0x010F);
+ break;
+ case 0x0110:
+ bufpush(0x0111);
+ break;
+ case 0x0112:
+ bufpush(0x0113);
+ break;
+ case 0x0114:
+ bufpush(0x0115);
+ break;
+ case 0x0116:
+ bufpush(0x0117);
+ break;
+ case 0x0118:
+ bufpush(0x0119);
+ break;
+ case 0x011A:
+ bufpush(0x011B);
+ break;
+ case 0x011C:
+ bufpush(0x011D);
+ break;
+ case 0x011E:
+ bufpush(0x011F);
+ break;
+ case 0x0120:
+ bufpush(0x0121);
+ break;
+ case 0x0122:
+ bufpush(0x0123);
+ break;
+ case 0x0124:
+ bufpush(0x0125);
+ break;
+ case 0x0126:
+ bufpush(0x0127);
+ break;
+ case 0x0128:
+ bufpush(0x0129);
+ break;
+ case 0x012A:
+ bufpush(0x012B);
+ break;
+ case 0x012C:
+ bufpush(0x012D);
+ break;
+ case 0x012E:
+ bufpush(0x012F);
+ break;
+ case 0x0130:
+ bufpush(0x0069);
+ bufpush(0x0307);
+ break;
+ case 0x0132:
+ bufpush(0x0133);
+ break;
+ case 0x0134:
+ bufpush(0x0135);
+ break;
+ case 0x0136:
+ bufpush(0x0137);
+ break;
+ case 0x0139:
+ bufpush(0x013A);
+ break;
+ case 0x013B:
+ bufpush(0x013C);
+ break;
+ case 0x013D:
+ bufpush(0x013E);
+ break;
+ case 0x013F:
+ bufpush(0x0140);
+ break;
+ case 0x0141:
+ bufpush(0x0142);
+ break;
+ case 0x0143:
+ bufpush(0x0144);
+ break;
+ case 0x0145:
+ bufpush(0x0146);
+ break;
+ case 0x0147:
+ bufpush(0x0148);
+ break;
+ case 0x0149:
+ bufpush(0x02BC);
+ bufpush(0x006E);
+ break;
+ case 0x014A:
+ bufpush(0x014B);
+ break;
+ case 0x014C:
+ bufpush(0x014D);
+ break;
+ case 0x014E:
+ bufpush(0x014F);
+ break;
+ case 0x0150:
+ bufpush(0x0151);
+ break;
+ case 0x0152:
+ bufpush(0x0153);
+ break;
+ case 0x0154:
+ bufpush(0x0155);
+ break;
+ case 0x0156:
+ bufpush(0x0157);
+ break;
+ case 0x0158:
+ bufpush(0x0159);
+ break;
+ case 0x015A:
+ bufpush(0x015B);
+ break;
+ case 0x015C:
+ bufpush(0x015D);
+ break;
+ case 0x015E:
+ bufpush(0x015F);
+ break;
+ case 0x0160:
+ bufpush(0x0161);
+ break;
+ case 0x0162:
+ bufpush(0x0163);
+ break;
+ case 0x0164:
+ bufpush(0x0165);
+ break;
+ case 0x0166:
+ bufpush(0x0167);
+ break;
+ case 0x0168:
+ bufpush(0x0169);
+ break;
+ case 0x016A:
+ bufpush(0x016B);
+ break;
+ case 0x016C:
+ bufpush(0x016D);
+ break;
+ case 0x016E:
+ bufpush(0x016F);
+ break;
+ case 0x0170:
+ bufpush(0x0171);
+ break;
+ case 0x0172:
+ bufpush(0x0173);
+ break;
+ case 0x0174:
+ bufpush(0x0175);
+ break;
+ case 0x0176:
+ bufpush(0x0177);
+ break;
+ case 0x0178:
+ bufpush(0x00FF);
+ break;
+ case 0x0179:
+ bufpush(0x017A);
+ break;
+ case 0x017B:
+ bufpush(0x017C);
+ break;
+ case 0x017D:
+ bufpush(0x017E);
+ break;
+ case 0x017F:
+ bufpush(0x0073);
+ break;
+ case 0x0181:
+ bufpush(0x0253);
+ break;
+ case 0x0182:
+ bufpush(0x0183);
+ break;
+ case 0x0184:
+ bufpush(0x0185);
+ break;
+ case 0x0186:
+ bufpush(0x0254);
+ break;
+ case 0x0187:
+ bufpush(0x0188);
+ break;
+ case 0x0189:
+ bufpush(0x0256);
+ break;
+ case 0x018A:
+ bufpush(0x0257);
+ break;
+ case 0x018B:
+ bufpush(0x018C);
+ break;
+ case 0x018E:
+ bufpush(0x01DD);
+ break;
+ case 0x018F:
+ bufpush(0x0259);
+ break;
+ case 0x0190:
+ bufpush(0x025B);
+ break;
+ case 0x0191:
+ bufpush(0x0192);
+ break;
+ case 0x0193:
+ bufpush(0x0260);
+ break;
+ case 0x0194:
+ bufpush(0x0263);
+ break;
+ case 0x0196:
+ bufpush(0x0269);
+ break;
+ case 0x0197:
+ bufpush(0x0268);
+ break;
+ case 0x0198:
+ bufpush(0x0199);
+ break;
+ case 0x019C:
+ bufpush(0x026F);
+ break;
+ case 0x019D:
+ bufpush(0x0272);
+ break;
+ case 0x019F:
+ bufpush(0x0275);
+ break;
+ case 0x01A0:
+ bufpush(0x01A1);
+ break;
+ case 0x01A2:
+ bufpush(0x01A3);
+ break;
+ case 0x01A4:
+ bufpush(0x01A5);
+ break;
+ case 0x01A6:
+ bufpush(0x0280);
+ break;
+ case 0x01A7:
+ bufpush(0x01A8);
+ break;
+ case 0x01A9:
+ bufpush(0x0283);
+ break;
+ case 0x01AC:
+ bufpush(0x01AD);
+ break;
+ case 0x01AE:
+ bufpush(0x0288);
+ break;
+ case 0x01AF:
+ bufpush(0x01B0);
+ break;
+ case 0x01B1:
+ bufpush(0x028A);
+ break;
+ case 0x01B2:
+ bufpush(0x028B);
+ break;
+ case 0x01B3:
+ bufpush(0x01B4);
+ break;
+ case 0x01B5:
+ bufpush(0x01B6);
+ break;
+ case 0x01B7:
+ bufpush(0x0292);
+ break;
+ case 0x01B8:
+ bufpush(0x01B9);
+ break;
+ case 0x01BC:
+ bufpush(0x01BD);
+ break;
+ case 0x01C4:
+ bufpush(0x01C6);
+ break;
+ case 0x01C5:
+ bufpush(0x01C6);
+ break;
+ case 0x01C7:
+ bufpush(0x01C9);
+ break;
+ case 0x01C8:
+ bufpush(0x01C9);
+ break;
+ case 0x01CA:
+ bufpush(0x01CC);
+ break;
+ case 0x01CB:
+ bufpush(0x01CC);
+ break;
+ case 0x01CD:
+ bufpush(0x01CE);
+ break;
+ case 0x01CF:
+ bufpush(0x01D0);
+ break;
+ case 0x01D1:
+ bufpush(0x01D2);
+ break;
+ case 0x01D3:
+ bufpush(0x01D4);
+ break;
+ case 0x01D5:
+ bufpush(0x01D6);
+ break;
+ case 0x01D7:
+ bufpush(0x01D8);
+ break;
+ case 0x01D9:
+ bufpush(0x01DA);
+ break;
+ case 0x01DB:
+ bufpush(0x01DC);
+ break;
+ case 0x01DE:
+ bufpush(0x01DF);
+ break;
+ case 0x01E0:
+ bufpush(0x01E1);
+ break;
+ case 0x01E2:
+ bufpush(0x01E3);
+ break;
+ case 0x01E4:
+ bufpush(0x01E5);
+ break;
+ case 0x01E6:
+ bufpush(0x01E7);
+ break;
+ case 0x01E8:
+ bufpush(0x01E9);
+ break;
+ case 0x01EA:
+ bufpush(0x01EB);
+ break;
+ case 0x01EC:
+ bufpush(0x01ED);
+ break;
+ case 0x01EE:
+ bufpush(0x01EF);
+ break;
+ case 0x01F0:
+ bufpush(0x006A);
+ bufpush(0x030C);
+ break;
+ case 0x01F1:
+ bufpush(0x01F3);
+ break;
+ case 0x01F2:
+ bufpush(0x01F3);
+ break;
+ case 0x01F4:
+ bufpush(0x01F5);
+ break;
+ case 0x01F6:
+ bufpush(0x0195);
+ break;
+ case 0x01F7:
+ bufpush(0x01BF);
+ break;
+ case 0x01F8:
+ bufpush(0x01F9);
+ break;
+ case 0x01FA:
+ bufpush(0x01FB);
+ break;
+ case 0x01FC:
+ bufpush(0x01FD);
+ break;
+ case 0x01FE:
+ bufpush(0x01FF);
+ break;
+ case 0x0200:
+ bufpush(0x0201);
+ break;
+ case 0x0202:
+ bufpush(0x0203);
+ break;
+ case 0x0204:
+ bufpush(0x0205);
+ break;
+ case 0x0206:
+ bufpush(0x0207);
+ break;
+ case 0x0208:
+ bufpush(0x0209);
+ break;
+ case 0x020A:
+ bufpush(0x020B);
+ break;
+ case 0x020C:
+ bufpush(0x020D);
+ break;
+ case 0x020E:
+ bufpush(0x020F);
+ break;
+ case 0x0210:
+ bufpush(0x0211);
+ break;
+ case 0x0212:
+ bufpush(0x0213);
+ break;
+ case 0x0214:
+ bufpush(0x0215);
+ break;
+ case 0x0216:
+ bufpush(0x0217);
+ break;
+ case 0x0218:
+ bufpush(0x0219);
+ break;
+ case 0x021A:
+ bufpush(0x021B);
+ break;
+ case 0x021C:
+ bufpush(0x021D);
+ break;
+ case 0x021E:
+ bufpush(0x021F);
+ break;
+ case 0x0220:
+ bufpush(0x019E);
+ break;
+ case 0x0222:
+ bufpush(0x0223);
+ break;
+ case 0x0224:
+ bufpush(0x0225);
+ break;
+ case 0x0226:
+ bufpush(0x0227);
+ break;
+ case 0x0228:
+ bufpush(0x0229);
+ break;
+ case 0x022A:
+ bufpush(0x022B);
+ break;
+ case 0x022C:
+ bufpush(0x022D);
+ break;
+ case 0x022E:
+ bufpush(0x022F);
+ break;
+ case 0x0230:
+ bufpush(0x0231);
+ break;
+ case 0x0232:
+ bufpush(0x0233);
+ break;
+ case 0x0345:
+ bufpush(0x03B9);
+ break;
+ case 0x0386:
+ bufpush(0x03AC);
+ break;
+ case 0x0388:
+ bufpush(0x03AD);
+ break;
+ case 0x0389:
+ bufpush(0x03AE);
+ break;
+ case 0x038A:
+ bufpush(0x03AF);
+ break;
+ case 0x038C:
+ bufpush(0x03CC);
+ break;
+ case 0x038E:
+ bufpush(0x03CD);
+ break;
+ case 0x038F:
+ bufpush(0x03CE);
+ break;
+ case 0x0390:
+ bufpush(0x03B9);
+ bufpush(0x0308);
+ bufpush(0x0301);
+ break;
+ case 0x0391:
+ bufpush(0x03B1);
+ break;
+ case 0x0392:
+ bufpush(0x03B2);
+ break;
+ case 0x0393:
+ bufpush(0x03B3);
+ break;
+ case 0x0394:
+ bufpush(0x03B4);
+ break;
+ case 0x0395:
+ bufpush(0x03B5);
+ break;
+ case 0x0396:
+ bufpush(0x03B6);
+ break;
+ case 0x0397:
+ bufpush(0x03B7);
+ break;
+ case 0x0398:
+ bufpush(0x03B8);
+ break;
+ case 0x0399:
+ bufpush(0x03B9);
+ break;
+ case 0x039A:
+ bufpush(0x03BA);
+ break;
+ case 0x039B:
+ bufpush(0x03BB);
+ break;
+ case 0x039C:
+ bufpush(0x03BC);
+ break;
+ case 0x039D:
+ bufpush(0x03BD);
+ break;
+ case 0x039E:
+ bufpush(0x03BE);
+ break;
+ case 0x039F:
+ bufpush(0x03BF);
+ break;
+ case 0x03A0:
+ bufpush(0x03C0);
+ break;
+ case 0x03A1:
+ bufpush(0x03C1);
+ break;
+ case 0x03A3:
+ bufpush(0x03C3);
+ break;
+ case 0x03A4:
+ bufpush(0x03C4);
+ break;
+ case 0x03A5:
+ bufpush(0x03C5);
+ break;
+ case 0x03A6:
+ bufpush(0x03C6);
+ break;
+ case 0x03A7:
+ bufpush(0x03C7);
+ break;
+ case 0x03A8:
+ bufpush(0x03C8);
+ break;
+ case 0x03A9:
+ bufpush(0x03C9);
+ break;
+ case 0x03AA:
+ bufpush(0x03CA);
+ break;
+ case 0x03AB:
+ bufpush(0x03CB);
+ break;
+ case 0x03B0:
+ bufpush(0x03C5);
+ bufpush(0x0308);
+ bufpush(0x0301);
+ break;
+ case 0x03C2:
+ bufpush(0x03C3);
+ break;
+ case 0x03D0:
+ bufpush(0x03B2);
+ break;
+ case 0x03D1:
+ bufpush(0x03B8);
+ break;
+ case 0x03D5:
+ bufpush(0x03C6);
+ break;
+ case 0x03D6:
+ bufpush(0x03C0);
+ break;
+ case 0x03D8:
+ bufpush(0x03D9);
+ break;
+ case 0x03DA:
+ bufpush(0x03DB);
+ break;
+ case 0x03DC:
+ bufpush(0x03DD);
+ break;
+ case 0x03DE:
+ bufpush(0x03DF);
+ break;
+ case 0x03E0:
+ bufpush(0x03E1);
+ break;
+ case 0x03E2:
+ bufpush(0x03E3);
+ break;
+ case 0x03E4:
+ bufpush(0x03E5);
+ break;
+ case 0x03E6:
+ bufpush(0x03E7);
+ break;
+ case 0x03E8:
+ bufpush(0x03E9);
+ break;
+ case 0x03EA:
+ bufpush(0x03EB);
+ break;
+ case 0x03EC:
+ bufpush(0x03ED);
+ break;
+ case 0x03EE:
+ bufpush(0x03EF);
+ break;
+ case 0x03F0:
+ bufpush(0x03BA);
+ break;
+ case 0x03F1:
+ bufpush(0x03C1);
+ break;
+ case 0x03F2:
+ bufpush(0x03C3);
+ break;
+ case 0x03F4:
+ bufpush(0x03B8);
+ break;
+ case 0x03F5:
+ bufpush(0x03B5);
+ break;
+ case 0x0400:
+ bufpush(0x0450);
+ break;
+ case 0x0401:
+ bufpush(0x0451);
+ break;
+ case 0x0402:
+ bufpush(0x0452);
+ break;
+ case 0x0403:
+ bufpush(0x0453);
+ break;
+ case 0x0404:
+ bufpush(0x0454);
+ break;
+ case 0x0405:
+ bufpush(0x0455);
+ break;
+ case 0x0406:
+ bufpush(0x0456);
+ break;
+ case 0x0407:
+ bufpush(0x0457);
+ break;
+ case 0x0408:
+ bufpush(0x0458);
+ break;
+ case 0x0409:
+ bufpush(0x0459);
+ break;
+ case 0x040A:
+ bufpush(0x045A);
+ break;
+ case 0x040B:
+ bufpush(0x045B);
+ break;
+ case 0x040C:
+ bufpush(0x045C);
+ break;
+ case 0x040D:
+ bufpush(0x045D);
+ break;
+ case 0x040E:
+ bufpush(0x045E);
+ break;
+ case 0x040F:
+ bufpush(0x045F);
+ break;
+ case 0x0410:
+ bufpush(0x0430);
+ break;
+ case 0x0411:
+ bufpush(0x0431);
+ break;
+ case 0x0412:
+ bufpush(0x0432);
+ break;
+ case 0x0413:
+ bufpush(0x0433);
+ break;
+ case 0x0414:
+ bufpush(0x0434);
+ break;
+ case 0x0415:
+ bufpush(0x0435);
+ break;
+ case 0x0416:
+ bufpush(0x0436);
+ break;
+ case 0x0417:
+ bufpush(0x0437);
+ break;
+ case 0x0418:
+ bufpush(0x0438);
+ break;
+ case 0x0419:
+ bufpush(0x0439);
+ break;
+ case 0x041A:
+ bufpush(0x043A);
+ break;
+ case 0x041B:
+ bufpush(0x043B);
+ break;
+ case 0x041C:
+ bufpush(0x043C);
+ break;
+ case 0x041D:
+ bufpush(0x043D);
+ break;
+ case 0x041E:
+ bufpush(0x043E);
+ break;
+ case 0x041F:
+ bufpush(0x043F);
+ break;
+ case 0x0420:
+ bufpush(0x0440);
+ break;
+ case 0x0421:
+ bufpush(0x0441);
+ break;
+ case 0x0422:
+ bufpush(0x0442);
+ break;
+ case 0x0423:
+ bufpush(0x0443);
+ break;
+ case 0x0424:
+ bufpush(0x0444);
+ break;
+ case 0x0425:
+ bufpush(0x0445);
+ break;
+ case 0x0426:
+ bufpush(0x0446);
+ break;
+ case 0x0427:
+ bufpush(0x0447);
+ break;
+ case 0x0428:
+ bufpush(0x0448);
+ break;
+ case 0x0429:
+ bufpush(0x0449);
+ break;
+ case 0x042A:
+ bufpush(0x044A);
+ break;
+ case 0x042B:
+ bufpush(0x044B);
+ break;
+ case 0x042C:
+ bufpush(0x044C);
+ break;
+ case 0x042D:
+ bufpush(0x044D);
+ break;
+ case 0x042E:
+ bufpush(0x044E);
+ break;
+ case 0x042F:
+ bufpush(0x044F);
+ break;
+ case 0x0460:
+ bufpush(0x0461);
+ break;
+ case 0x0462:
+ bufpush(0x0463);
+ break;
+ case 0x0464:
+ bufpush(0x0465);
+ break;
+ case 0x0466:
+ bufpush(0x0467);
+ break;
+ case 0x0468:
+ bufpush(0x0469);
+ break;
+ case 0x046A:
+ bufpush(0x046B);
+ break;
+ case 0x046C:
+ bufpush(0x046D);
+ break;
+ case 0x046E:
+ bufpush(0x046F);
+ break;
+ case 0x0470:
+ bufpush(0x0471);
+ break;
+ case 0x0472:
+ bufpush(0x0473);
+ break;
+ case 0x0474:
+ bufpush(0x0475);
+ break;
+ case 0x0476:
+ bufpush(0x0477);
+ break;
+ case 0x0478:
+ bufpush(0x0479);
+ break;
+ case 0x047A:
+ bufpush(0x047B);
+ break;
+ case 0x047C:
+ bufpush(0x047D);
+ break;
+ case 0x047E:
+ bufpush(0x047F);
+ break;
+ case 0x0480:
+ bufpush(0x0481);
+ break;
+ case 0x048A:
+ bufpush(0x048B);
+ break;
+ case 0x048C:
+ bufpush(0x048D);
+ break;
+ case 0x048E:
+ bufpush(0x048F);
+ break;
+ case 0x0490:
+ bufpush(0x0491);
+ break;
+ case 0x0492:
+ bufpush(0x0493);
+ break;
+ case 0x0494:
+ bufpush(0x0495);
+ break;
+ case 0x0496:
+ bufpush(0x0497);
+ break;
+ case 0x0498:
+ bufpush(0x0499);
+ break;
+ case 0x049A:
+ bufpush(0x049B);
+ break;
+ case 0x049C:
+ bufpush(0x049D);
+ break;
+ case 0x049E:
+ bufpush(0x049F);
+ break;
+ case 0x04A0:
+ bufpush(0x04A1);
+ break;
+ case 0x04A2:
+ bufpush(0x04A3);
+ break;
+ case 0x04A4:
+ bufpush(0x04A5);
+ break;
+ case 0x04A6:
+ bufpush(0x04A7);
+ break;
+ case 0x04A8:
+ bufpush(0x04A9);
+ break;
+ case 0x04AA:
+ bufpush(0x04AB);
+ break;
+ case 0x04AC:
+ bufpush(0x04AD);
+ break;
+ case 0x04AE:
+ bufpush(0x04AF);
+ break;
+ case 0x04B0:
+ bufpush(0x04B1);
+ break;
+ case 0x04B2:
+ bufpush(0x04B3);
+ break;
+ case 0x04B4:
+ bufpush(0x04B5);
+ break;
+ case 0x04B6:
+ bufpush(0x04B7);
+ break;
+ case 0x04B8:
+ bufpush(0x04B9);
+ break;
+ case 0x04BA:
+ bufpush(0x04BB);
+ break;
+ case 0x04BC:
+ bufpush(0x04BD);
+ break;
+ case 0x04BE:
+ bufpush(0x04BF);
+ break;
+ case 0x04C1:
+ bufpush(0x04C2);
+ break;
+ case 0x04C3:
+ bufpush(0x04C4);
+ break;
+ case 0x04C5:
+ bufpush(0x04C6);
+ break;
+ case 0x04C7:
+ bufpush(0x04C8);
+ break;
+ case 0x04C9:
+ bufpush(0x04CA);
+ break;
+ case 0x04CB:
+ bufpush(0x04CC);
+ break;
+ case 0x04CD:
+ bufpush(0x04CE);
+ break;
+ case 0x04D0:
+ bufpush(0x04D1);
+ break;
+ case 0x04D2:
+ bufpush(0x04D3);
+ break;
+ case 0x04D4:
+ bufpush(0x04D5);
+ break;
+ case 0x04D6:
+ bufpush(0x04D7);
+ break;
+ case 0x04D8:
+ bufpush(0x04D9);
+ break;
+ case 0x04DA:
+ bufpush(0x04DB);
+ break;
+ case 0x04DC:
+ bufpush(0x04DD);
+ break;
+ case 0x04DE:
+ bufpush(0x04DF);
+ break;
+ case 0x04E0:
+ bufpush(0x04E1);
+ break;
+ case 0x04E2:
+ bufpush(0x04E3);
+ break;
+ case 0x04E4:
+ bufpush(0x04E5);
+ break;
+ case 0x04E6:
+ bufpush(0x04E7);
+ break;
+ case 0x04E8:
+ bufpush(0x04E9);
+ break;
+ case 0x04EA:
+ bufpush(0x04EB);
+ break;
+ case 0x04EC:
+ bufpush(0x04ED);
+ break;
+ case 0x04EE:
+ bufpush(0x04EF);
+ break;
+ case 0x04F0:
+ bufpush(0x04F1);
+ break;
+ case 0x04F2:
+ bufpush(0x04F3);
+ break;
+ case 0x04F4:
+ bufpush(0x04F5);
+ break;
+ case 0x04F8:
+ bufpush(0x04F9);
+ break;
+ case 0x0500:
+ bufpush(0x0501);
+ break;
+ case 0x0502:
+ bufpush(0x0503);
+ break;
+ case 0x0504:
+ bufpush(0x0505);
+ break;
+ case 0x0506:
+ bufpush(0x0507);
+ break;
+ case 0x0508:
+ bufpush(0x0509);
+ break;
+ case 0x050A:
+ bufpush(0x050B);
+ break;
+ case 0x050C:
+ bufpush(0x050D);
+ break;
+ case 0x050E:
+ bufpush(0x050F);
+ break;
+ case 0x0531:
+ bufpush(0x0561);
+ break;
+ case 0x0532:
+ bufpush(0x0562);
+ break;
+ case 0x0533:
+ bufpush(0x0563);
+ break;
+ case 0x0534:
+ bufpush(0x0564);
+ break;
+ case 0x0535:
+ bufpush(0x0565);
+ break;
+ case 0x0536:
+ bufpush(0x0566);
+ break;
+ case 0x0537:
+ bufpush(0x0567);
+ break;
+ case 0x0538:
+ bufpush(0x0568);
+ break;
+ case 0x0539:
+ bufpush(0x0569);
+ break;
+ case 0x053A:
+ bufpush(0x056A);
+ break;
+ case 0x053B:
+ bufpush(0x056B);
+ break;
+ case 0x053C:
+ bufpush(0x056C);
+ break;
+ case 0x053D:
+ bufpush(0x056D);
+ break;
+ case 0x053E:
+ bufpush(0x056E);
+ break;
+ case 0x053F:
+ bufpush(0x056F);
+ break;
+ case 0x0540:
+ bufpush(0x0570);
+ break;
+ case 0x0541:
+ bufpush(0x0571);
+ break;
+ case 0x0542:
+ bufpush(0x0572);
+ break;
+ case 0x0543:
+ bufpush(0x0573);
+ break;
+ case 0x0544:
+ bufpush(0x0574);
+ break;
+ case 0x0545:
+ bufpush(0x0575);
+ break;
+ case 0x0546:
+ bufpush(0x0576);
+ break;
+ case 0x0547:
+ bufpush(0x0577);
+ break;
+ case 0x0548:
+ bufpush(0x0578);
+ break;
+ case 0x0549:
+ bufpush(0x0579);
+ break;
+ case 0x054A:
+ bufpush(0x057A);
+ break;
+ case 0x054B:
+ bufpush(0x057B);
+ break;
+ case 0x054C:
+ bufpush(0x057C);
+ break;
+ case 0x054D:
+ bufpush(0x057D);
+ break;
+ case 0x054E:
+ bufpush(0x057E);
+ break;
+ case 0x054F:
+ bufpush(0x057F);
+ break;
+ case 0x0550:
+ bufpush(0x0580);
+ break;
+ case 0x0551:
+ bufpush(0x0581);
+ break;
+ case 0x0552:
+ bufpush(0x0582);
+ break;
+ case 0x0553:
+ bufpush(0x0583);
+ break;
+ case 0x0554:
+ bufpush(0x0584);
+ break;
+ case 0x0555:
+ bufpush(0x0585);
+ break;
+ case 0x0556:
+ bufpush(0x0586);
+ break;
+ case 0x0587:
+ bufpush(0x0565);
+ bufpush(0x0582);
+ break;
+ case 0x1E00:
+ bufpush(0x1E01);
+ break;
+ case 0x1E02:
+ bufpush(0x1E03);
+ break;
+ case 0x1E04:
+ bufpush(0x1E05);
+ break;
+ case 0x1E06:
+ bufpush(0x1E07);
+ break;
+ case 0x1E08:
+ bufpush(0x1E09);
+ break;
+ case 0x1E0A:
+ bufpush(0x1E0B);
+ break;
+ case 0x1E0C:
+ bufpush(0x1E0D);
+ break;
+ case 0x1E0E:
+ bufpush(0x1E0F);
+ break;
+ case 0x1E10:
+ bufpush(0x1E11);
+ break;
+ case 0x1E12:
+ bufpush(0x1E13);
+ break;
+ case 0x1E14:
+ bufpush(0x1E15);
+ break;
+ case 0x1E16:
+ bufpush(0x1E17);
+ break;
+ case 0x1E18:
+ bufpush(0x1E19);
+ break;
+ case 0x1E1A:
+ bufpush(0x1E1B);
+ break;
+ case 0x1E1C:
+ bufpush(0x1E1D);
+ break;
+ case 0x1E1E:
+ bufpush(0x1E1F);
+ break;
+ case 0x1E20:
+ bufpush(0x1E21);
+ break;
+ case 0x1E22:
+ bufpush(0x1E23);
+ break;
+ case 0x1E24:
+ bufpush(0x1E25);
+ break;
+ case 0x1E26:
+ bufpush(0x1E27);
+ break;
+ case 0x1E28:
+ bufpush(0x1E29);
+ break;
+ case 0x1E2A:
+ bufpush(0x1E2B);
+ break;
+ case 0x1E2C:
+ bufpush(0x1E2D);
+ break;
+ case 0x1E2E:
+ bufpush(0x1E2F);
+ break;
+ case 0x1E30:
+ bufpush(0x1E31);
+ break;
+ case 0x1E32:
+ bufpush(0x1E33);
+ break;
+ case 0x1E34:
+ bufpush(0x1E35);
+ break;
+ case 0x1E36:
+ bufpush(0x1E37);
+ break;
+ case 0x1E38:
+ bufpush(0x1E39);
+ break;
+ case 0x1E3A:
+ bufpush(0x1E3B);
+ break;
+ case 0x1E3C:
+ bufpush(0x1E3D);
+ break;
+ case 0x1E3E:
+ bufpush(0x1E3F);
+ break;
+ case 0x1E40:
+ bufpush(0x1E41);
+ break;
+ case 0x1E42:
+ bufpush(0x1E43);
+ break;
+ case 0x1E44:
+ bufpush(0x1E45);
+ break;
+ case 0x1E46:
+ bufpush(0x1E47);
+ break;
+ case 0x1E48:
+ bufpush(0x1E49);
+ break;
+ case 0x1E4A:
+ bufpush(0x1E4B);
+ break;
+ case 0x1E4C:
+ bufpush(0x1E4D);
+ break;
+ case 0x1E4E:
+ bufpush(0x1E4F);
+ break;
+ case 0x1E50:
+ bufpush(0x1E51);
+ break;
+ case 0x1E52:
+ bufpush(0x1E53);
+ break;
+ case 0x1E54:
+ bufpush(0x1E55);
+ break;
+ case 0x1E56:
+ bufpush(0x1E57);
+ break;
+ case 0x1E58:
+ bufpush(0x1E59);
+ break;
+ case 0x1E5A:
+ bufpush(0x1E5B);
+ break;
+ case 0x1E5C:
+ bufpush(0x1E5D);
+ break;
+ case 0x1E5E:
+ bufpush(0x1E5F);
+ break;
+ case 0x1E60:
+ bufpush(0x1E61);
+ break;
+ case 0x1E62:
+ bufpush(0x1E63);
+ break;
+ case 0x1E64:
+ bufpush(0x1E65);
+ break;
+ case 0x1E66:
+ bufpush(0x1E67);
+ break;
+ case 0x1E68:
+ bufpush(0x1E69);
+ break;
+ case 0x1E6A:
+ bufpush(0x1E6B);
+ break;
+ case 0x1E6C:
+ bufpush(0x1E6D);
+ break;
+ case 0x1E6E:
+ bufpush(0x1E6F);
+ break;
+ case 0x1E70:
+ bufpush(0x1E71);
+ break;
+ case 0x1E72:
+ bufpush(0x1E73);
+ break;
+ case 0x1E74:
+ bufpush(0x1E75);
+ break;
+ case 0x1E76:
+ bufpush(0x1E77);
+ break;
+ case 0x1E78:
+ bufpush(0x1E79);
+ break;
+ case 0x1E7A:
+ bufpush(0x1E7B);
+ break;
+ case 0x1E7C:
+ bufpush(0x1E7D);
+ break;
+ case 0x1E7E:
+ bufpush(0x1E7F);
+ break;
+ case 0x1E80:
+ bufpush(0x1E81);
+ break;
+ case 0x1E82:
+ bufpush(0x1E83);
+ break;
+ case 0x1E84:
+ bufpush(0x1E85);
+ break;
+ case 0x1E86:
+ bufpush(0x1E87);
+ break;
+ case 0x1E88:
+ bufpush(0x1E89);
+ break;
+ case 0x1E8A:
+ bufpush(0x1E8B);
+ break;
+ case 0x1E8C:
+ bufpush(0x1E8D);
+ break;
+ case 0x1E8E:
+ bufpush(0x1E8F);
+ break;
+ case 0x1E90:
+ bufpush(0x1E91);
+ break;
+ case 0x1E92:
+ bufpush(0x1E93);
+ break;
+ case 0x1E94:
+ bufpush(0x1E95);
+ break;
+ case 0x1E96:
+ bufpush(0x0068);
+ bufpush(0x0331);
+ break;
+ case 0x1E97:
+ bufpush(0x0074);
+ bufpush(0x0308);
+ break;
+ case 0x1E98:
+ bufpush(0x0077);
+ bufpush(0x030A);
+ break;
+ case 0x1E99:
+ bufpush(0x0079);
+ bufpush(0x030A);
+ break;
+ case 0x1E9A:
+ bufpush(0x0061);
+ bufpush(0x02BE);
+ break;
+ case 0x1E9B:
+ bufpush(0x1E61);
+ break;
+ case 0x1EA0:
+ bufpush(0x1EA1);
+ break;
+ case 0x1EA2:
+ bufpush(0x1EA3);
+ break;
+ case 0x1EA4:
+ bufpush(0x1EA5);
+ break;
+ case 0x1EA6:
+ bufpush(0x1EA7);
+ break;
+ case 0x1EA8:
+ bufpush(0x1EA9);
+ break;
+ case 0x1EAA:
+ bufpush(0x1EAB);
+ break;
+ case 0x1EAC:
+ bufpush(0x1EAD);
+ break;
+ case 0x1EAE:
+ bufpush(0x1EAF);
+ break;
+ case 0x1EB0:
+ bufpush(0x1EB1);
+ break;
+ case 0x1EB2:
+ bufpush(0x1EB3);
+ break;
+ case 0x1EB4:
+ bufpush(0x1EB5);
+ break;
+ case 0x1EB6:
+ bufpush(0x1EB7);
+ break;
+ case 0x1EB8:
+ bufpush(0x1EB9);
+ break;
+ case 0x1EBA:
+ bufpush(0x1EBB);
+ break;
+ case 0x1EBC:
+ bufpush(0x1EBD);
+ break;
+ case 0x1EBE:
+ bufpush(0x1EBF);
+ break;
+ case 0x1EC0:
+ bufpush(0x1EC1);
+ break;
+ case 0x1EC2:
+ bufpush(0x1EC3);
+ break;
+ case 0x1EC4:
+ bufpush(0x1EC5);
+ break;
+ case 0x1EC6:
+ bufpush(0x1EC7);
+ break;
+ case 0x1EC8:
+ bufpush(0x1EC9);
+ break;
+ case 0x1ECA:
+ bufpush(0x1ECB);
+ break;
+ case 0x1ECC:
+ bufpush(0x1ECD);
+ break;
+ case 0x1ECE:
+ bufpush(0x1ECF);
+ break;
+ case 0x1ED0:
+ bufpush(0x1ED1);
+ break;
+ case 0x1ED2:
+ bufpush(0x1ED3);
+ break;
+ case 0x1ED4:
+ bufpush(0x1ED5);
+ break;
+ case 0x1ED6:
+ bufpush(0x1ED7);
+ break;
+ case 0x1ED8:
+ bufpush(0x1ED9);
+ break;
+ case 0x1EDA:
+ bufpush(0x1EDB);
+ break;
+ case 0x1EDC:
+ bufpush(0x1EDD);
+ break;
+ case 0x1EDE:
+ bufpush(0x1EDF);
+ break;
+ case 0x1EE0:
+ bufpush(0x1EE1);
+ break;
+ case 0x1EE2:
+ bufpush(0x1EE3);
+ break;
+ case 0x1EE4:
+ bufpush(0x1EE5);
+ break;
+ case 0x1EE6:
+ bufpush(0x1EE7);
+ break;
+ case 0x1EE8:
+ bufpush(0x1EE9);
+ break;
+ case 0x1EEA:
+ bufpush(0x1EEB);
+ break;
+ case 0x1EEC:
+ bufpush(0x1EED);
+ break;
+ case 0x1EEE:
+ bufpush(0x1EEF);
+ break;
+ case 0x1EF0:
+ bufpush(0x1EF1);
+ break;
+ case 0x1EF2:
+ bufpush(0x1EF3);
+ break;
+ case 0x1EF4:
+ bufpush(0x1EF5);
+ break;
+ case 0x1EF6:
+ bufpush(0x1EF7);
+ break;
+ case 0x1EF8:
+ bufpush(0x1EF9);
+ break;
+ case 0x1F08:
+ bufpush(0x1F00);
+ break;
+ case 0x1F09:
+ bufpush(0x1F01);
+ break;
+ case 0x1F0A:
+ bufpush(0x1F02);
+ break;
+ case 0x1F0B:
+ bufpush(0x1F03);
+ break;
+ case 0x1F0C:
+ bufpush(0x1F04);
+ break;
+ case 0x1F0D:
+ bufpush(0x1F05);
+ break;
+ case 0x1F0E:
+ bufpush(0x1F06);
+ break;
+ case 0x1F0F:
+ bufpush(0x1F07);
+ break;
+ case 0x1F18:
+ bufpush(0x1F10);
+ break;
+ case 0x1F19:
+ bufpush(0x1F11);
+ break;
+ case 0x1F1A:
+ bufpush(0x1F12);
+ break;
+ case 0x1F1B:
+ bufpush(0x1F13);
+ break;
+ case 0x1F1C:
+ bufpush(0x1F14);
+ break;
+ case 0x1F1D:
+ bufpush(0x1F15);
+ break;
+ case 0x1F28:
+ bufpush(0x1F20);
+ break;
+ case 0x1F29:
+ bufpush(0x1F21);
+ break;
+ case 0x1F2A:
+ bufpush(0x1F22);
+ break;
+ case 0x1F2B:
+ bufpush(0x1F23);
+ break;
+ case 0x1F2C:
+ bufpush(0x1F24);
+ break;
+ case 0x1F2D:
+ bufpush(0x1F25);
+ break;
+ case 0x1F2E:
+ bufpush(0x1F26);
+ break;
+ case 0x1F2F:
+ bufpush(0x1F27);
+ break;
+ case 0x1F38:
+ bufpush(0x1F30);
+ break;
+ case 0x1F39:
+ bufpush(0x1F31);
+ break;
+ case 0x1F3A:
+ bufpush(0x1F32);
+ break;
+ case 0x1F3B:
+ bufpush(0x1F33);
+ break;
+ case 0x1F3C:
+ bufpush(0x1F34);
+ break;
+ case 0x1F3D:
+ bufpush(0x1F35);
+ break;
+ case 0x1F3E:
+ bufpush(0x1F36);
+ break;
+ case 0x1F3F:
+ bufpush(0x1F37);
+ break;
+ case 0x1F48:
+ bufpush(0x1F40);
+ break;
+ case 0x1F49:
+ bufpush(0x1F41);
+ break;
+ case 0x1F4A:
+ bufpush(0x1F42);
+ break;
+ case 0x1F4B:
+ bufpush(0x1F43);
+ break;
+ case 0x1F4C:
+ bufpush(0x1F44);
+ break;
+ case 0x1F4D:
+ bufpush(0x1F45);
+ break;
+ case 0x1F50:
+ bufpush(0x03C5);
+ bufpush(0x0313);
+ break;
+ case 0x1F52:
+ bufpush(0x03C5);
+ bufpush(0x0313);
+ bufpush(0x0300);
+ break;
+ case 0x1F54:
+ bufpush(0x03C5);
+ bufpush(0x0313);
+ bufpush(0x0301);
+ break;
+ case 0x1F56:
+ bufpush(0x03C5);
+ bufpush(0x0313);
+ bufpush(0x0342);
+ break;
+ case 0x1F59:
+ bufpush(0x1F51);
+ break;
+ case 0x1F5B:
+ bufpush(0x1F53);
+ break;
+ case 0x1F5D:
+ bufpush(0x1F55);
+ break;
+ case 0x1F5F:
+ bufpush(0x1F57);
+ break;
+ case 0x1F68:
+ bufpush(0x1F60);
+ break;
+ case 0x1F69:
+ bufpush(0x1F61);
+ break;
+ case 0x1F6A:
+ bufpush(0x1F62);
+ break;
+ case 0x1F6B:
+ bufpush(0x1F63);
+ break;
+ case 0x1F6C:
+ bufpush(0x1F64);
+ break;
+ case 0x1F6D:
+ bufpush(0x1F65);
+ break;
+ case 0x1F6E:
+ bufpush(0x1F66);
+ break;
+ case 0x1F6F:
+ bufpush(0x1F67);
+ break;
+ case 0x1F80:
+ bufpush(0x1F00);
+ bufpush(0x03B9);
+ break;
+ case 0x1F81:
+ bufpush(0x1F01);
+ bufpush(0x03B9);
+ break;
+ case 0x1F82:
+ bufpush(0x1F02);
+ bufpush(0x03B9);
+ break;
+ case 0x1F83:
+ bufpush(0x1F03);
+ bufpush(0x03B9);
+ break;
+ case 0x1F84:
+ bufpush(0x1F04);
+ bufpush(0x03B9);
+ break;
+ case 0x1F85:
+ bufpush(0x1F05);
+ bufpush(0x03B9);
+ break;
+ case 0x1F86:
+ bufpush(0x1F06);
+ bufpush(0x03B9);
+ break;
+ case 0x1F87:
+ bufpush(0x1F07);
+ bufpush(0x03B9);
+ break;
+ case 0x1F88:
+ bufpush(0x1F00);
+ bufpush(0x03B9);
+ break;
+ case 0x1F89:
+ bufpush(0x1F01);
+ bufpush(0x03B9);
+ break;
+ case 0x1F8A:
+ bufpush(0x1F02);
+ bufpush(0x03B9);
+ break;
+ case 0x1F8B:
+ bufpush(0x1F03);
+ bufpush(0x03B9);
+ break;
+ case 0x1F8C:
+ bufpush(0x1F04);
+ bufpush(0x03B9);
+ break;
+ case 0x1F8D:
+ bufpush(0x1F05);
+ bufpush(0x03B9);
+ break;
+ case 0x1F8E:
+ bufpush(0x1F06);
+ bufpush(0x03B9);
+ break;
+ case 0x1F8F:
+ bufpush(0x1F07);
+ bufpush(0x03B9);
+ break;
+ case 0x1F90:
+ bufpush(0x1F20);
+ bufpush(0x03B9);
+ break;
+ case 0x1F91:
+ bufpush(0x1F21);
+ bufpush(0x03B9);
+ break;
+ case 0x1F92:
+ bufpush(0x1F22);
+ bufpush(0x03B9);
+ break;
+ case 0x1F93:
+ bufpush(0x1F23);
+ bufpush(0x03B9);
+ break;
+ case 0x1F94:
+ bufpush(0x1F24);
+ bufpush(0x03B9);
+ break;
+ case 0x1F95:
+ bufpush(0x1F25);
+ bufpush(0x03B9);
+ break;
+ case 0x1F96:
+ bufpush(0x1F26);
+ bufpush(0x03B9);
+ break;
+ case 0x1F97:
+ bufpush(0x1F27);
+ bufpush(0x03B9);
+ break;
+ case 0x1F98:
+ bufpush(0x1F20);
+ bufpush(0x03B9);
+ break;
+ case 0x1F99:
+ bufpush(0x1F21);
+ bufpush(0x03B9);
+ break;
+ case 0x1F9A:
+ bufpush(0x1F22);
+ bufpush(0x03B9);
+ break;
+ case 0x1F9B:
+ bufpush(0x1F23);
+ bufpush(0x03B9);
+ break;
+ case 0x1F9C:
+ bufpush(0x1F24);
+ bufpush(0x03B9);
+ break;
+ case 0x1F9D:
+ bufpush(0x1F25);
+ bufpush(0x03B9);
+ break;
+ case 0x1F9E:
+ bufpush(0x1F26);
+ bufpush(0x03B9);
+ break;
+ case 0x1F9F:
+ bufpush(0x1F27);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA0:
+ bufpush(0x1F60);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA1:
+ bufpush(0x1F61);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA2:
+ bufpush(0x1F62);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA3:
+ bufpush(0x1F63);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA4:
+ bufpush(0x1F64);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA5:
+ bufpush(0x1F65);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA6:
+ bufpush(0x1F66);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA7:
+ bufpush(0x1F67);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA8:
+ bufpush(0x1F60);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA9:
+ bufpush(0x1F61);
+ bufpush(0x03B9);
+ break;
+ case 0x1FAA:
+ bufpush(0x1F62);
+ bufpush(0x03B9);
+ break;
+ case 0x1FAB:
+ bufpush(0x1F63);
+ bufpush(0x03B9);
+ break;
+ case 0x1FAC:
+ bufpush(0x1F64);
+ bufpush(0x03B9);
+ break;
+ case 0x1FAD:
+ bufpush(0x1F65);
+ bufpush(0x03B9);
+ break;
+ case 0x1FAE:
+ bufpush(0x1F66);
+ bufpush(0x03B9);
+ break;
+ case 0x1FAF:
+ bufpush(0x1F67);
+ bufpush(0x03B9);
+ break;
+ case 0x1FB2:
+ bufpush(0x1F70);
+ bufpush(0x03B9);
+ break;
+ case 0x1FB3:
+ bufpush(0x03B1);
+ bufpush(0x03B9);
+ break;
+ case 0x1FB4:
+ bufpush(0x03AC);
+ bufpush(0x03B9);
+ break;
+ case 0x1FB6:
+ bufpush(0x03B1);
+ bufpush(0x0342);
+ break;
+ case 0x1FB7:
+ bufpush(0x03B1);
+ bufpush(0x0342);
+ bufpush(0x03B9);
+ break;
+ case 0x1FB8:
+ bufpush(0x1FB0);
+ break;
+ case 0x1FB9:
+ bufpush(0x1FB1);
+ break;
+ case 0x1FBA:
+ bufpush(0x1F70);
+ break;
+ case 0x1FBB:
+ bufpush(0x1F71);
+ break;
+ case 0x1FBC:
+ bufpush(0x03B1);
+ bufpush(0x03B9);
+ break;
+ case 0x1FBE:
+ bufpush(0x03B9);
+ break;
+ case 0x1FC2:
+ bufpush(0x1F74);
+ bufpush(0x03B9);
+ break;
+ case 0x1FC3:
+ bufpush(0x03B7);
+ bufpush(0x03B9);
+ break;
+ case 0x1FC4:
+ bufpush(0x03AE);
+ bufpush(0x03B9);
+ break;
+ case 0x1FC6:
+ bufpush(0x03B7);
+ bufpush(0x0342);
+ break;
+ case 0x1FC7:
+ bufpush(0x03B7);
+ bufpush(0x0342);
+ bufpush(0x03B9);
+ break;
+ case 0x1FC8:
+ bufpush(0x1F72);
+ break;
+ case 0x1FC9:
+ bufpush(0x1F73);
+ break;
+ case 0x1FCA:
+ bufpush(0x1F74);
+ break;
+ case 0x1FCB:
+ bufpush(0x1F75);
+ break;
+ case 0x1FCC:
+ bufpush(0x03B7);
+ bufpush(0x03B9);
+ break;
+ case 0x1FD2:
+ bufpush(0x03B9);
+ bufpush(0x0308);
+ bufpush(0x0300);
+ break;
+ case 0x1FD3:
+ bufpush(0x03B9);
+ bufpush(0x0308);
+ bufpush(0x0301);
+ break;
+ case 0x1FD6:
+ bufpush(0x03B9);
+ bufpush(0x0342);
+ break;
+ case 0x1FD7:
+ bufpush(0x03B9);
+ bufpush(0x0308);
+ bufpush(0x0342);
+ break;
+ case 0x1FD8:
+ bufpush(0x1FD0);
+ break;
+ case 0x1FD9:
+ bufpush(0x1FD1);
+ break;
+ case 0x1FDA:
+ bufpush(0x1F76);
+ break;
+ case 0x1FDB:
+ bufpush(0x1F77);
+ break;
+ case 0x1FE2:
+ bufpush(0x03C5);
+ bufpush(0x0308);
+ bufpush(0x0300);
+ break;
+ case 0x1FE3:
+ bufpush(0x03C5);
+ bufpush(0x0308);
+ bufpush(0x0301);
+ break;
+ case 0x1FE4:
+ bufpush(0x03C1);
+ bufpush(0x0313);
+ break;
+ case 0x1FE6:
+ bufpush(0x03C5);
+ bufpush(0x0342);
+ break;
+ case 0x1FE7:
+ bufpush(0x03C5);
+ bufpush(0x0308);
+ bufpush(0x0342);
+ break;
+ case 0x1FE8:
+ bufpush(0x1FE0);
+ break;
+ case 0x1FE9:
+ bufpush(0x1FE1);
+ break;
+ case 0x1FEA:
+ bufpush(0x1F7A);
+ break;
+ case 0x1FEB:
+ bufpush(0x1F7B);
+ break;
+ case 0x1FEC:
+ bufpush(0x1FE5);
+ break;
+ case 0x1FF2:
+ bufpush(0x1F7C);
+ bufpush(0x03B9);
+ break;
+ case 0x1FF3:
+ bufpush(0x03C9);
+ bufpush(0x03B9);
+ break;
+ case 0x1FF4:
+ bufpush(0x03CE);
+ bufpush(0x03B9);
+ break;
+ case 0x1FF6:
+ bufpush(0x03C9);
+ bufpush(0x0342);
+ break;
+ case 0x1FF7:
+ bufpush(0x03C9);
+ bufpush(0x0342);
+ bufpush(0x03B9);
+ break;
+ case 0x1FF8:
+ bufpush(0x1F78);
+ break;
+ case 0x1FF9:
+ bufpush(0x1F79);
+ break;
+ case 0x1FFA:
+ bufpush(0x1F7C);
+ break;
+ case 0x1FFB:
+ bufpush(0x1F7D);
+ break;
+ case 0x1FFC:
+ bufpush(0x03C9);
+ bufpush(0x03B9);
+ break;
+ case 0x2126:
+ bufpush(0x03C9);
+ break;
+ case 0x212A:
+ bufpush(0x006B);
+ break;
+ case 0x212B:
+ bufpush(0x00E5);
+ break;
+ case 0x2160:
+ bufpush(0x2170);
+ break;
+ case 0x2161:
+ bufpush(0x2171);
+ break;
+ case 0x2162:
+ bufpush(0x2172);
+ break;
+ case 0x2163:
+ bufpush(0x2173);
+ break;
+ case 0x2164:
+ bufpush(0x2174);
+ break;
+ case 0x2165:
+ bufpush(0x2175);
+ break;
+ case 0x2166:
+ bufpush(0x2176);
+ break;
+ case 0x2167:
+ bufpush(0x2177);
+ break;
+ case 0x2168:
+ bufpush(0x2178);
+ break;
+ case 0x2169:
+ bufpush(0x2179);
+ break;
+ case 0x216A:
+ bufpush(0x217A);
+ break;
+ case 0x216B:
+ bufpush(0x217B);
+ break;
+ case 0x216C:
+ bufpush(0x217C);
+ break;
+ case 0x216D:
+ bufpush(0x217D);
+ break;
+ case 0x216E:
+ bufpush(0x217E);
+ break;
+ case 0x216F:
+ bufpush(0x217F);
+ break;
+ case 0x24B6:
+ bufpush(0x24D0);
+ break;
+ case 0x24B7:
+ bufpush(0x24D1);
+ break;
+ case 0x24B8:
+ bufpush(0x24D2);
+ break;
+ case 0x24B9:
+ bufpush(0x24D3);
+ break;
+ case 0x24BA:
+ bufpush(0x24D4);
+ break;
+ case 0x24BB:
+ bufpush(0x24D5);
+ break;
+ case 0x24BC:
+ bufpush(0x24D6);
+ break;
+ case 0x24BD:
+ bufpush(0x24D7);
+ break;
+ case 0x24BE:
+ bufpush(0x24D8);
+ break;
+ case 0x24BF:
+ bufpush(0x24D9);
+ break;
+ case 0x24C0:
+ bufpush(0x24DA);
+ break;
+ case 0x24C1:
+ bufpush(0x24DB);
+ break;
+ case 0x24C2:
+ bufpush(0x24DC);
+ break;
+ case 0x24C3:
+ bufpush(0x24DD);
+ break;
+ case 0x24C4:
+ bufpush(0x24DE);
+ break;
+ case 0x24C5:
+ bufpush(0x24DF);
+ break;
+ case 0x24C6:
+ bufpush(0x24E0);
+ break;
+ case 0x24C7:
+ bufpush(0x24E1);
+ break;
+ case 0x24C8:
+ bufpush(0x24E2);
+ break;
+ case 0x24C9:
+ bufpush(0x24E3);
+ break;
+ case 0x24CA:
+ bufpush(0x24E4);
+ break;
+ case 0x24CB:
+ bufpush(0x24E5);
+ break;
+ case 0x24CC:
+ bufpush(0x24E6);
+ break;
+ case 0x24CD:
+ bufpush(0x24E7);
+ break;
+ case 0x24CE:
+ bufpush(0x24E8);
+ break;
+ case 0x24CF:
+ bufpush(0x24E9);
+ break;
+ case 0xFB00:
+ bufpush(0x0066);
+ bufpush(0x0066);
+ break;
+ case 0xFB01:
+ bufpush(0x0066);
+ bufpush(0x0069);
+ break;
+ case 0xFB02:
+ bufpush(0x0066);
+ bufpush(0x006C);
+ break;
+ case 0xFB03:
+ bufpush(0x0066);
+ bufpush(0x0066);
+ bufpush(0x0069);
+ break;
+ case 0xFB04:
+ bufpush(0x0066);
+ bufpush(0x0066);
+ bufpush(0x006C);
+ break;
+ case 0xFB05:
+ bufpush(0x0073);
+ bufpush(0x0074);
+ break;
+ case 0xFB06:
+ bufpush(0x0073);
+ bufpush(0x0074);
+ break;
+ case 0xFB13:
+ bufpush(0x0574);
+ bufpush(0x0576);
+ break;
+ case 0xFB14:
+ bufpush(0x0574);
+ bufpush(0x0565);
+ break;
+ case 0xFB15:
+ bufpush(0x0574);
+ bufpush(0x056B);
+ break;
+ case 0xFB16:
+ bufpush(0x057E);
+ bufpush(0x0576);
+ break;
+ case 0xFB17:
+ bufpush(0x0574);
+ bufpush(0x056D);
+ break;
+ case 0xFF21:
+ bufpush(0xFF41);
+ break;
+ case 0xFF22:
+ bufpush(0xFF42);
+ break;
+ case 0xFF23:
+ bufpush(0xFF43);
+ break;
+ case 0xFF24:
+ bufpush(0xFF44);
+ break;
+ case 0xFF25:
+ bufpush(0xFF45);
+ break;
+ case 0xFF26:
+ bufpush(0xFF46);
+ break;
+ case 0xFF27:
+ bufpush(0xFF47);
+ break;
+ case 0xFF28:
+ bufpush(0xFF48);
+ break;
+ case 0xFF29:
+ bufpush(0xFF49);
+ break;
+ case 0xFF2A:
+ bufpush(0xFF4A);
+ break;
+ case 0xFF2B:
+ bufpush(0xFF4B);
+ break;
+ case 0xFF2C:
+ bufpush(0xFF4C);
+ break;
+ case 0xFF2D:
+ bufpush(0xFF4D);
+ break;
+ case 0xFF2E:
+ bufpush(0xFF4E);
+ break;
+ case 0xFF2F:
+ bufpush(0xFF4F);
+ break;
+ case 0xFF30:
+ bufpush(0xFF50);
+ break;
+ case 0xFF31:
+ bufpush(0xFF51);
+ break;
+ case 0xFF32:
+ bufpush(0xFF52);
+ break;
+ case 0xFF33:
+ bufpush(0xFF53);
+ break;
+ case 0xFF34:
+ bufpush(0xFF54);
+ break;
+ case 0xFF35:
+ bufpush(0xFF55);
+ break;
+ case 0xFF36:
+ bufpush(0xFF56);
+ break;
+ case 0xFF37:
+ bufpush(0xFF57);
+ break;
+ case 0xFF38:
+ bufpush(0xFF58);
+ break;
+ case 0xFF39:
+ bufpush(0xFF59);
+ break;
+ case 0xFF3A:
+ bufpush(0xFF5A);
+ break;
+ case 0x10400:
+ bufpush(0x10428);
+ break;
+ case 0x10401:
+ bufpush(0x10429);
+ break;
+ case 0x10402:
+ bufpush(0x1042A);
+ break;
+ case 0x10403:
+ bufpush(0x1042B);
+ break;
+ case 0x10404:
+ bufpush(0x1042C);
+ break;
+ case 0x10405:
+ bufpush(0x1042D);
+ break;
+ case 0x10406:
+ bufpush(0x1042E);
+ break;
+ case 0x10407:
+ bufpush(0x1042F);
+ break;
+ case 0x10408:
+ bufpush(0x10430);
+ break;
+ case 0x10409:
+ bufpush(0x10431);
+ break;
+ case 0x1040A:
+ bufpush(0x10432);
+ break;
+ case 0x1040B:
+ bufpush(0x10433);
+ break;
+ case 0x1040C:
+ bufpush(0x10434);
+ break;
+ case 0x1040D:
+ bufpush(0x10435);
+ break;
+ case 0x1040E:
+ bufpush(0x10436);
+ break;
+ case 0x1040F:
+ bufpush(0x10437);
+ break;
+ case 0x10410:
+ bufpush(0x10438);
+ break;
+ case 0x10411:
+ bufpush(0x10439);
+ break;
+ case 0x10412:
+ bufpush(0x1043A);
+ break;
+ case 0x10413:
+ bufpush(0x1043B);
+ break;
+ case 0x10414:
+ bufpush(0x1043C);
+ break;
+ case 0x10415:
+ bufpush(0x1043D);
+ break;
+ case 0x10416:
+ bufpush(0x1043E);
+ break;
+ case 0x10417:
+ bufpush(0x1043F);
+ break;
+ case 0x10418:
+ bufpush(0x10440);
+ break;
+ case 0x10419:
+ bufpush(0x10441);
+ break;
+ case 0x1041A:
+ bufpush(0x10442);
+ break;
+ case 0x1041B:
+ bufpush(0x10443);
+ break;
+ case 0x1041C:
+ bufpush(0x10444);
+ break;
+ case 0x1041D:
+ bufpush(0x10445);
+ break;
+ case 0x1041E:
+ bufpush(0x10446);
+ break;
+ case 0x1041F:
+ bufpush(0x10447);
+ break;
+ case 0x10420:
+ bufpush(0x10448);
+ break;
+ case 0x10421:
+ bufpush(0x10449);
+ break;
+ case 0x10422:
+ bufpush(0x1044A);
+ break;
+ case 0x10423:
+ bufpush(0x1044B);
+ break;
+ case 0x10424:
+ bufpush(0x1044C);
+ break;
+ case 0x10425:
+ bufpush(0x1044D);
+ break;
+ default:
+ bufpush(c);
+ }
diff --git a/src/casefold.c b/src/casefold.c
new file mode 100644
index 0000000..33f18aa
--- /dev/null
+++ b/src/casefold.c
@@ -0,0 +1,2699 @@
+#include <stdlib.h>
+#include <stdio.h>
+
+
+ switch c {
+ case 0x0041:
+ bufpush(0x0061);
+ break;
+ case 0x0042:
+ bufpush(0x0062);
+ break;
+ case 0x0043:
+ bufpush(0x0063);
+ break;
+ case 0x0044:
+ bufpush(0x0064);
+ break;
+ case 0x0045:
+ bufpush(0x0065);
+ break;
+ case 0x0046:
+ bufpush(0x0066);
+ break;
+ case 0x0047:
+ bufpush(0x0067);
+ break;
+ case 0x0048:
+ bufpush(0x0068);
+ break;
+ case 0x0049:
+ bufpush(0x0069);
+ break;
+ case 0x0049:
+ bufpush(0x0131);
+ break;
+ case 0x004A:
+ bufpush(0x006A);
+ break;
+ case 0x004B:
+ bufpush(0x006B);
+ break;
+ case 0x004C:
+ bufpush(0x006C);
+ break;
+ case 0x004D:
+ bufpush(0x006D);
+ break;
+ case 0x004E:
+ bufpush(0x006E);
+ break;
+ case 0x004F:
+ bufpush(0x006F);
+ break;
+ case 0x0050:
+ bufpush(0x0070);
+ break;
+ case 0x0051:
+ bufpush(0x0071);
+ break;
+ case 0x0052:
+ bufpush(0x0072);
+ break;
+ case 0x0053:
+ bufpush(0x0073);
+ break;
+ case 0x0054:
+ bufpush(0x0074);
+ break;
+ case 0x0055:
+ bufpush(0x0075);
+ break;
+ case 0x0056:
+ bufpush(0x0076);
+ break;
+ case 0x0057:
+ bufpush(0x0077);
+ break;
+ case 0x0058:
+ bufpush(0x0078);
+ break;
+ case 0x0059:
+ bufpush(0x0079);
+ break;
+ case 0x005A:
+ bufpush(0x007A);
+ break;
+ case 0x00B5:
+ bufpush(0x03BC);
+ break;
+ case 0x00C0:
+ bufpush(0x00E0);
+ break;
+ case 0x00C1:
+ bufpush(0x00E1);
+ break;
+ case 0x00C2:
+ bufpush(0x00E2);
+ break;
+ case 0x00C3:
+ bufpush(0x00E3);
+ break;
+ case 0x00C4:
+ bufpush(0x00E4);
+ break;
+ case 0x00C5:
+ bufpush(0x00E5);
+ break;
+ case 0x00C6:
+ bufpush(0x00E6);
+ break;
+ case 0x00C7:
+ bufpush(0x00E7);
+ break;
+ case 0x00C8:
+ bufpush(0x00E8);
+ break;
+ case 0x00C9:
+ bufpush(0x00E9);
+ break;
+ case 0x00CA:
+ bufpush(0x00EA);
+ break;
+ case 0x00CB:
+ bufpush(0x00EB);
+ break;
+ case 0x00CC:
+ bufpush(0x00EC);
+ break;
+ case 0x00CD:
+ bufpush(0x00ED);
+ break;
+ case 0x00CE:
+ bufpush(0x00EE);
+ break;
+ case 0x00CF:
+ bufpush(0x00EF);
+ break;
+ case 0x00D0:
+ bufpush(0x00F0);
+ break;
+ case 0x00D1:
+ bufpush(0x00F1);
+ break;
+ case 0x00D2:
+ bufpush(0x00F2);
+ break;
+ case 0x00D3:
+ bufpush(0x00F3);
+ break;
+ case 0x00D4:
+ bufpush(0x00F4);
+ break;
+ case 0x00D5:
+ bufpush(0x00F5);
+ break;
+ case 0x00D6:
+ bufpush(0x00F6);
+ break;
+ case 0x00D8:
+ bufpush(0x00F8);
+ break;
+ case 0x00D9:
+ bufpush(0x00F9);
+ break;
+ case 0x00DA:
+ bufpush(0x00FA);
+ break;
+ case 0x00DB:
+ bufpush(0x00FB);
+ break;
+ case 0x00DC:
+ bufpush(0x00FC);
+ break;
+ case 0x00DD:
+ bufpush(0x00FD);
+ break;
+ case 0x00DE:
+ bufpush(0x00FE);
+ break;
+ case 0x00DF:
+ bufpush(0x0073);
+ bufpush(0x0073);
+ break;
+ case 0x0100:
+ bufpush(0x0101);
+ break;
+ case 0x0102:
+ bufpush(0x0103);
+ break;
+ case 0x0104:
+ bufpush(0x0105);
+ break;
+ case 0x0106:
+ bufpush(0x0107);
+ break;
+ case 0x0108:
+ bufpush(0x0109);
+ break;
+ case 0x010A:
+ bufpush(0x010B);
+ break;
+ case 0x010C:
+ bufpush(0x010D);
+ break;
+ case 0x010E:
+ bufpush(0x010F);
+ break;
+ case 0x0110:
+ bufpush(0x0111);
+ break;
+ case 0x0112:
+ bufpush(0x0113);
+ break;
+ case 0x0114:
+ bufpush(0x0115);
+ break;
+ case 0x0116:
+ bufpush(0x0117);
+ break;
+ case 0x0118:
+ bufpush(0x0119);
+ break;
+ case 0x011A:
+ bufpush(0x011B);
+ break;
+ case 0x011C:
+ bufpush(0x011D);
+ break;
+ case 0x011E:
+ bufpush(0x011F);
+ break;
+ case 0x0120:
+ bufpush(0x0121);
+ break;
+ case 0x0122:
+ bufpush(0x0123);
+ break;
+ case 0x0124:
+ bufpush(0x0125);
+ break;
+ case 0x0126:
+ bufpush(0x0127);
+ break;
+ case 0x0128:
+ bufpush(0x0129);
+ break;
+ case 0x012A:
+ bufpush(0x012B);
+ break;
+ case 0x012C:
+ bufpush(0x012D);
+ break;
+ case 0x012E:
+ bufpush(0x012F);
+ break;
+ case 0x0130:
+ bufpush(0x0069);
+ bufpush(0x0307);
+ break;
+ case 0x0130:
+ bufpush(0x0069);
+ break;
+ case 0x0132:
+ bufpush(0x0133);
+ break;
+ case 0x0134:
+ bufpush(0x0135);
+ break;
+ case 0x0136:
+ bufpush(0x0137);
+ break;
+ case 0x0139:
+ bufpush(0x013A);
+ break;
+ case 0x013B:
+ bufpush(0x013C);
+ break;
+ case 0x013D:
+ bufpush(0x013E);
+ break;
+ case 0x013F:
+ bufpush(0x0140);
+ break;
+ case 0x0141:
+ bufpush(0x0142);
+ break;
+ case 0x0143:
+ bufpush(0x0144);
+ break;
+ case 0x0145:
+ bufpush(0x0146);
+ break;
+ case 0x0147:
+ bufpush(0x0148);
+ break;
+ case 0x0149:
+ bufpush(0x02BC);
+ bufpush(0x006E);
+ break;
+ case 0x014A:
+ bufpush(0x014B);
+ break;
+ case 0x014C:
+ bufpush(0x014D);
+ break;
+ case 0x014E:
+ bufpush(0x014F);
+ break;
+ case 0x0150:
+ bufpush(0x0151);
+ break;
+ case 0x0152:
+ bufpush(0x0153);
+ break;
+ case 0x0154:
+ bufpush(0x0155);
+ break;
+ case 0x0156:
+ bufpush(0x0157);
+ break;
+ case 0x0158:
+ bufpush(0x0159);
+ break;
+ case 0x015A:
+ bufpush(0x015B);
+ break;
+ case 0x015C:
+ bufpush(0x015D);
+ break;
+ case 0x015E:
+ bufpush(0x015F);
+ break;
+ case 0x0160:
+ bufpush(0x0161);
+ break;
+ case 0x0162:
+ bufpush(0x0163);
+ break;
+ case 0x0164:
+ bufpush(0x0165);
+ break;
+ case 0x0166:
+ bufpush(0x0167);
+ break;
+ case 0x0168:
+ bufpush(0x0169);
+ break;
+ case 0x016A:
+ bufpush(0x016B);
+ break;
+ case 0x016C:
+ bufpush(0x016D);
+ break;
+ case 0x016E:
+ bufpush(0x016F);
+ break;
+ case 0x0170:
+ bufpush(0x0171);
+ break;
+ case 0x0172:
+ bufpush(0x0173);
+ break;
+ case 0x0174:
+ bufpush(0x0175);
+ break;
+ case 0x0176:
+ bufpush(0x0177);
+ break;
+ case 0x0178:
+ bufpush(0x00FF);
+ break;
+ case 0x0179:
+ bufpush(0x017A);
+ break;
+ case 0x017B:
+ bufpush(0x017C);
+ break;
+ case 0x017D:
+ bufpush(0x017E);
+ break;
+ case 0x017F:
+ bufpush(0x0073);
+ break;
+ case 0x0181:
+ bufpush(0x0253);
+ break;
+ case 0x0182:
+ bufpush(0x0183);
+ break;
+ case 0x0184:
+ bufpush(0x0185);
+ break;
+ case 0x0186:
+ bufpush(0x0254);
+ break;
+ case 0x0187:
+ bufpush(0x0188);
+ break;
+ case 0x0189:
+ bufpush(0x0256);
+ break;
+ case 0x018A:
+ bufpush(0x0257);
+ break;
+ case 0x018B:
+ bufpush(0x018C);
+ break;
+ case 0x018E:
+ bufpush(0x01DD);
+ break;
+ case 0x018F:
+ bufpush(0x0259);
+ break;
+ case 0x0190:
+ bufpush(0x025B);
+ break;
+ case 0x0191:
+ bufpush(0x0192);
+ break;
+ case 0x0193:
+ bufpush(0x0260);
+ break;
+ case 0x0194:
+ bufpush(0x0263);
+ break;
+ case 0x0196:
+ bufpush(0x0269);
+ break;
+ case 0x0197:
+ bufpush(0x0268);
+ break;
+ case 0x0198:
+ bufpush(0x0199);
+ break;
+ case 0x019C:
+ bufpush(0x026F);
+ break;
+ case 0x019D:
+ bufpush(0x0272);
+ break;
+ case 0x019F:
+ bufpush(0x0275);
+ break;
+ case 0x01A0:
+ bufpush(0x01A1);
+ break;
+ case 0x01A2:
+ bufpush(0x01A3);
+ break;
+ case 0x01A4:
+ bufpush(0x01A5);
+ break;
+ case 0x01A6:
+ bufpush(0x0280);
+ break;
+ case 0x01A7:
+ bufpush(0x01A8);
+ break;
+ case 0x01A9:
+ bufpush(0x0283);
+ break;
+ case 0x01AC:
+ bufpush(0x01AD);
+ break;
+ case 0x01AE:
+ bufpush(0x0288);
+ break;
+ case 0x01AF:
+ bufpush(0x01B0);
+ break;
+ case 0x01B1:
+ bufpush(0x028A);
+ break;
+ case 0x01B2:
+ bufpush(0x028B);
+ break;
+ case 0x01B3:
+ bufpush(0x01B4);
+ break;
+ case 0x01B5:
+ bufpush(0x01B6);
+ break;
+ case 0x01B7:
+ bufpush(0x0292);
+ break;
+ case 0x01B8:
+ bufpush(0x01B9);
+ break;
+ case 0x01BC:
+ bufpush(0x01BD);
+ break;
+ case 0x01C4:
+ bufpush(0x01C6);
+ break;
+ case 0x01C5:
+ bufpush(0x01C6);
+ break;
+ case 0x01C7:
+ bufpush(0x01C9);
+ break;
+ case 0x01C8:
+ bufpush(0x01C9);
+ break;
+ case 0x01CA:
+ bufpush(0x01CC);
+ break;
+ case 0x01CB:
+ bufpush(0x01CC);
+ break;
+ case 0x01CD:
+ bufpush(0x01CE);
+ break;
+ case 0x01CF:
+ bufpush(0x01D0);
+ break;
+ case 0x01D1:
+ bufpush(0x01D2);
+ break;
+ case 0x01D3:
+ bufpush(0x01D4);
+ break;
+ case 0x01D5:
+ bufpush(0x01D6);
+ break;
+ case 0x01D7:
+ bufpush(0x01D8);
+ break;
+ case 0x01D9:
+ bufpush(0x01DA);
+ break;
+ case 0x01DB:
+ bufpush(0x01DC);
+ break;
+ case 0x01DE:
+ bufpush(0x01DF);
+ break;
+ case 0x01E0:
+ bufpush(0x01E1);
+ break;
+ case 0x01E2:
+ bufpush(0x01E3);
+ break;
+ case 0x01E4:
+ bufpush(0x01E5);
+ break;
+ case 0x01E6:
+ bufpush(0x01E7);
+ break;
+ case 0x01E8:
+ bufpush(0x01E9);
+ break;
+ case 0x01EA:
+ bufpush(0x01EB);
+ break;
+ case 0x01EC:
+ bufpush(0x01ED);
+ break;
+ case 0x01EE:
+ bufpush(0x01EF);
+ break;
+ case 0x01F0:
+ bufpush(0x006A);
+ bufpush(0x030C);
+ break;
+ case 0x01F1:
+ bufpush(0x01F3);
+ break;
+ case 0x01F2:
+ bufpush(0x01F3);
+ break;
+ case 0x01F4:
+ bufpush(0x01F5);
+ break;
+ case 0x01F6:
+ bufpush(0x0195);
+ break;
+ case 0x01F7:
+ bufpush(0x01BF);
+ break;
+ case 0x01F8:
+ bufpush(0x01F9);
+ break;
+ case 0x01FA:
+ bufpush(0x01FB);
+ break;
+ case 0x01FC:
+ bufpush(0x01FD);
+ break;
+ case 0x01FE:
+ bufpush(0x01FF);
+ break;
+ case 0x0200:
+ bufpush(0x0201);
+ break;
+ case 0x0202:
+ bufpush(0x0203);
+ break;
+ case 0x0204:
+ bufpush(0x0205);
+ break;
+ case 0x0206:
+ bufpush(0x0207);
+ break;
+ case 0x0208:
+ bufpush(0x0209);
+ break;
+ case 0x020A:
+ bufpush(0x020B);
+ break;
+ case 0x020C:
+ bufpush(0x020D);
+ break;
+ case 0x020E:
+ bufpush(0x020F);
+ break;
+ case 0x0210:
+ bufpush(0x0211);
+ break;
+ case 0x0212:
+ bufpush(0x0213);
+ break;
+ case 0x0214:
+ bufpush(0x0215);
+ break;
+ case 0x0216:
+ bufpush(0x0217);
+ break;
+ case 0x0218:
+ bufpush(0x0219);
+ break;
+ case 0x021A:
+ bufpush(0x021B);
+ break;
+ case 0x021C:
+ bufpush(0x021D);
+ break;
+ case 0x021E:
+ bufpush(0x021F);
+ break;
+ case 0x0220:
+ bufpush(0x019E);
+ break;
+ case 0x0222:
+ bufpush(0x0223);
+ break;
+ case 0x0224:
+ bufpush(0x0225);
+ break;
+ case 0x0226:
+ bufpush(0x0227);
+ break;
+ case 0x0228:
+ bufpush(0x0229);
+ break;
+ case 0x022A:
+ bufpush(0x022B);
+ break;
+ case 0x022C:
+ bufpush(0x022D);
+ break;
+ case 0x022E:
+ bufpush(0x022F);
+ break;
+ case 0x0230:
+ bufpush(0x0231);
+ break;
+ case 0x0232:
+ bufpush(0x0233);
+ break;
+ case 0x0345:
+ bufpush(0x03B9);
+ break;
+ case 0x0386:
+ bufpush(0x03AC);
+ break;
+ case 0x0388:
+ bufpush(0x03AD);
+ break;
+ case 0x0389:
+ bufpush(0x03AE);
+ break;
+ case 0x038A:
+ bufpush(0x03AF);
+ break;
+ case 0x038C:
+ bufpush(0x03CC);
+ break;
+ case 0x038E:
+ bufpush(0x03CD);
+ break;
+ case 0x038F:
+ bufpush(0x03CE);
+ break;
+ case 0x0390:
+ bufpush(0x03B9);
+ bufpush(0x0308);
+ bufpush(0x0301);
+ break;
+ case 0x0391:
+ bufpush(0x03B1);
+ break;
+ case 0x0392:
+ bufpush(0x03B2);
+ break;
+ case 0x0393:
+ bufpush(0x03B3);
+ break;
+ case 0x0394:
+ bufpush(0x03B4);
+ break;
+ case 0x0395:
+ bufpush(0x03B5);
+ break;
+ case 0x0396:
+ bufpush(0x03B6);
+ break;
+ case 0x0397:
+ bufpush(0x03B7);
+ break;
+ case 0x0398:
+ bufpush(0x03B8);
+ break;
+ case 0x0399:
+ bufpush(0x03B9);
+ break;
+ case 0x039A:
+ bufpush(0x03BA);
+ break;
+ case 0x039B:
+ bufpush(0x03BB);
+ break;
+ case 0x039C:
+ bufpush(0x03BC);
+ break;
+ case 0x039D:
+ bufpush(0x03BD);
+ break;
+ case 0x039E:
+ bufpush(0x03BE);
+ break;
+ case 0x039F:
+ bufpush(0x03BF);
+ break;
+ case 0x03A0:
+ bufpush(0x03C0);
+ break;
+ case 0x03A1:
+ bufpush(0x03C1);
+ break;
+ case 0x03A3:
+ bufpush(0x03C3);
+ break;
+ case 0x03A4:
+ bufpush(0x03C4);
+ break;
+ case 0x03A5:
+ bufpush(0x03C5);
+ break;
+ case 0x03A6:
+ bufpush(0x03C6);
+ break;
+ case 0x03A7:
+ bufpush(0x03C7);
+ break;
+ case 0x03A8:
+ bufpush(0x03C8);
+ break;
+ case 0x03A9:
+ bufpush(0x03C9);
+ break;
+ case 0x03AA:
+ bufpush(0x03CA);
+ break;
+ case 0x03AB:
+ bufpush(0x03CB);
+ break;
+ case 0x03B0:
+ bufpush(0x03C5);
+ bufpush(0x0308);
+ bufpush(0x0301);
+ break;
+ case 0x03C2:
+ bufpush(0x03C3);
+ break;
+ case 0x03D0:
+ bufpush(0x03B2);
+ break;
+ case 0x03D1:
+ bufpush(0x03B8);
+ break;
+ case 0x03D5:
+ bufpush(0x03C6);
+ break;
+ case 0x03D6:
+ bufpush(0x03C0);
+ break;
+ case 0x03D8:
+ bufpush(0x03D9);
+ break;
+ case 0x03DA:
+ bufpush(0x03DB);
+ break;
+ case 0x03DC:
+ bufpush(0x03DD);
+ break;
+ case 0x03DE:
+ bufpush(0x03DF);
+ break;
+ case 0x03E0:
+ bufpush(0x03E1);
+ break;
+ case 0x03E2:
+ bufpush(0x03E3);
+ break;
+ case 0x03E4:
+ bufpush(0x03E5);
+ break;
+ case 0x03E6:
+ bufpush(0x03E7);
+ break;
+ case 0x03E8:
+ bufpush(0x03E9);
+ break;
+ case 0x03EA:
+ bufpush(0x03EB);
+ break;
+ case 0x03EC:
+ bufpush(0x03ED);
+ break;
+ case 0x03EE:
+ bufpush(0x03EF);
+ break;
+ case 0x03F0:
+ bufpush(0x03BA);
+ break;
+ case 0x03F1:
+ bufpush(0x03C1);
+ break;
+ case 0x03F2:
+ bufpush(0x03C3);
+ break;
+ case 0x03F4:
+ bufpush(0x03B8);
+ break;
+ case 0x03F5:
+ bufpush(0x03B5);
+ break;
+ case 0x0400:
+ bufpush(0x0450);
+ break;
+ case 0x0401:
+ bufpush(0x0451);
+ break;
+ case 0x0402:
+ bufpush(0x0452);
+ break;
+ case 0x0403:
+ bufpush(0x0453);
+ break;
+ case 0x0404:
+ bufpush(0x0454);
+ break;
+ case 0x0405:
+ bufpush(0x0455);
+ break;
+ case 0x0406:
+ bufpush(0x0456);
+ break;
+ case 0x0407:
+ bufpush(0x0457);
+ break;
+ case 0x0408:
+ bufpush(0x0458);
+ break;
+ case 0x0409:
+ bufpush(0x0459);
+ break;
+ case 0x040A:
+ bufpush(0x045A);
+ break;
+ case 0x040B:
+ bufpush(0x045B);
+ break;
+ case 0x040C:
+ bufpush(0x045C);
+ break;
+ case 0x040D:
+ bufpush(0x045D);
+ break;
+ case 0x040E:
+ bufpush(0x045E);
+ break;
+ case 0x040F:
+ bufpush(0x045F);
+ break;
+ case 0x0410:
+ bufpush(0x0430);
+ break;
+ case 0x0411:
+ bufpush(0x0431);
+ break;
+ case 0x0412:
+ bufpush(0x0432);
+ break;
+ case 0x0413:
+ bufpush(0x0433);
+ break;
+ case 0x0414:
+ bufpush(0x0434);
+ break;
+ case 0x0415:
+ bufpush(0x0435);
+ break;
+ case 0x0416:
+ bufpush(0x0436);
+ break;
+ case 0x0417:
+ bufpush(0x0437);
+ break;
+ case 0x0418:
+ bufpush(0x0438);
+ break;
+ case 0x0419:
+ bufpush(0x0439);
+ break;
+ case 0x041A:
+ bufpush(0x043A);
+ break;
+ case 0x041B:
+ bufpush(0x043B);
+ break;
+ case 0x041C:
+ bufpush(0x043C);
+ break;
+ case 0x041D:
+ bufpush(0x043D);
+ break;
+ case 0x041E:
+ bufpush(0x043E);
+ break;
+ case 0x041F:
+ bufpush(0x043F);
+ break;
+ case 0x0420:
+ bufpush(0x0440);
+ break;
+ case 0x0421:
+ bufpush(0x0441);
+ break;
+ case 0x0422:
+ bufpush(0x0442);
+ break;
+ case 0x0423:
+ bufpush(0x0443);
+ break;
+ case 0x0424:
+ bufpush(0x0444);
+ break;
+ case 0x0425:
+ bufpush(0x0445);
+ break;
+ case 0x0426:
+ bufpush(0x0446);
+ break;
+ case 0x0427:
+ bufpush(0x0447);
+ break;
+ case 0x0428:
+ bufpush(0x0448);
+ break;
+ case 0x0429:
+ bufpush(0x0449);
+ break;
+ case 0x042A:
+ bufpush(0x044A);
+ break;
+ case 0x042B:
+ bufpush(0x044B);
+ break;
+ case 0x042C:
+ bufpush(0x044C);
+ break;
+ case 0x042D:
+ bufpush(0x044D);
+ break;
+ case 0x042E:
+ bufpush(0x044E);
+ break;
+ case 0x042F:
+ bufpush(0x044F);
+ break;
+ case 0x0460:
+ bufpush(0x0461);
+ break;
+ case 0x0462:
+ bufpush(0x0463);
+ break;
+ case 0x0464:
+ bufpush(0x0465);
+ break;
+ case 0x0466:
+ bufpush(0x0467);
+ break;
+ case 0x0468:
+ bufpush(0x0469);
+ break;
+ case 0x046A:
+ bufpush(0x046B);
+ break;
+ case 0x046C:
+ bufpush(0x046D);
+ break;
+ case 0x046E:
+ bufpush(0x046F);
+ break;
+ case 0x0470:
+ bufpush(0x0471);
+ break;
+ case 0x0472:
+ bufpush(0x0473);
+ break;
+ case 0x0474:
+ bufpush(0x0475);
+ break;
+ case 0x0476:
+ bufpush(0x0477);
+ break;
+ case 0x0478:
+ bufpush(0x0479);
+ break;
+ case 0x047A:
+ bufpush(0x047B);
+ break;
+ case 0x047C:
+ bufpush(0x047D);
+ break;
+ case 0x047E:
+ bufpush(0x047F);
+ break;
+ case 0x0480:
+ bufpush(0x0481);
+ break;
+ case 0x048A:
+ bufpush(0x048B);
+ break;
+ case 0x048C:
+ bufpush(0x048D);
+ break;
+ case 0x048E:
+ bufpush(0x048F);
+ break;
+ case 0x0490:
+ bufpush(0x0491);
+ break;
+ case 0x0492:
+ bufpush(0x0493);
+ break;
+ case 0x0494:
+ bufpush(0x0495);
+ break;
+ case 0x0496:
+ bufpush(0x0497);
+ break;
+ case 0x0498:
+ bufpush(0x0499);
+ break;
+ case 0x049A:
+ bufpush(0x049B);
+ break;
+ case 0x049C:
+ bufpush(0x049D);
+ break;
+ case 0x049E:
+ bufpush(0x049F);
+ break;
+ case 0x04A0:
+ bufpush(0x04A1);
+ break;
+ case 0x04A2:
+ bufpush(0x04A3);
+ break;
+ case 0x04A4:
+ bufpush(0x04A5);
+ break;
+ case 0x04A6:
+ bufpush(0x04A7);
+ break;
+ case 0x04A8:
+ bufpush(0x04A9);
+ break;
+ case 0x04AA:
+ bufpush(0x04AB);
+ break;
+ case 0x04AC:
+ bufpush(0x04AD);
+ break;
+ case 0x04AE:
+ bufpush(0x04AF);
+ break;
+ case 0x04B0:
+ bufpush(0x04B1);
+ break;
+ case 0x04B2:
+ bufpush(0x04B3);
+ break;
+ case 0x04B4:
+ bufpush(0x04B5);
+ break;
+ case 0x04B6:
+ bufpush(0x04B7);
+ break;
+ case 0x04B8:
+ bufpush(0x04B9);
+ break;
+ case 0x04BA:
+ bufpush(0x04BB);
+ break;
+ case 0x04BC:
+ bufpush(0x04BD);
+ break;
+ case 0x04BE:
+ bufpush(0x04BF);
+ break;
+ case 0x04C1:
+ bufpush(0x04C2);
+ break;
+ case 0x04C3:
+ bufpush(0x04C4);
+ break;
+ case 0x04C5:
+ bufpush(0x04C6);
+ break;
+ case 0x04C7:
+ bufpush(0x04C8);
+ break;
+ case 0x04C9:
+ bufpush(0x04CA);
+ break;
+ case 0x04CB:
+ bufpush(0x04CC);
+ break;
+ case 0x04CD:
+ bufpush(0x04CE);
+ break;
+ case 0x04D0:
+ bufpush(0x04D1);
+ break;
+ case 0x04D2:
+ bufpush(0x04D3);
+ break;
+ case 0x04D4:
+ bufpush(0x04D5);
+ break;
+ case 0x04D6:
+ bufpush(0x04D7);
+ break;
+ case 0x04D8:
+ bufpush(0x04D9);
+ break;
+ case 0x04DA:
+ bufpush(0x04DB);
+ break;
+ case 0x04DC:
+ bufpush(0x04DD);
+ break;
+ case 0x04DE:
+ bufpush(0x04DF);
+ break;
+ case 0x04E0:
+ bufpush(0x04E1);
+ break;
+ case 0x04E2:
+ bufpush(0x04E3);
+ break;
+ case 0x04E4:
+ bufpush(0x04E5);
+ break;
+ case 0x04E6:
+ bufpush(0x04E7);
+ break;
+ case 0x04E8:
+ bufpush(0x04E9);
+ break;
+ case 0x04EA:
+ bufpush(0x04EB);
+ break;
+ case 0x04EC:
+ bufpush(0x04ED);
+ break;
+ case 0x04EE:
+ bufpush(0x04EF);
+ break;
+ case 0x04F0:
+ bufpush(0x04F1);
+ break;
+ case 0x04F2:
+ bufpush(0x04F3);
+ break;
+ case 0x04F4:
+ bufpush(0x04F5);
+ break;
+ case 0x04F8:
+ bufpush(0x04F9);
+ break;
+ case 0x0500:
+ bufpush(0x0501);
+ break;
+ case 0x0502:
+ bufpush(0x0503);
+ break;
+ case 0x0504:
+ bufpush(0x0505);
+ break;
+ case 0x0506:
+ bufpush(0x0507);
+ break;
+ case 0x0508:
+ bufpush(0x0509);
+ break;
+ case 0x050A:
+ bufpush(0x050B);
+ break;
+ case 0x050C:
+ bufpush(0x050D);
+ break;
+ case 0x050E:
+ bufpush(0x050F);
+ break;
+ case 0x0531:
+ bufpush(0x0561);
+ break;
+ case 0x0532:
+ bufpush(0x0562);
+ break;
+ case 0x0533:
+ bufpush(0x0563);
+ break;
+ case 0x0534:
+ bufpush(0x0564);
+ break;
+ case 0x0535:
+ bufpush(0x0565);
+ break;
+ case 0x0536:
+ bufpush(0x0566);
+ break;
+ case 0x0537:
+ bufpush(0x0567);
+ break;
+ case 0x0538:
+ bufpush(0x0568);
+ break;
+ case 0x0539:
+ bufpush(0x0569);
+ break;
+ case 0x053A:
+ bufpush(0x056A);
+ break;
+ case 0x053B:
+ bufpush(0x056B);
+ break;
+ case 0x053C:
+ bufpush(0x056C);
+ break;
+ case 0x053D:
+ bufpush(0x056D);
+ break;
+ case 0x053E:
+ bufpush(0x056E);
+ break;
+ case 0x053F:
+ bufpush(0x056F);
+ break;
+ case 0x0540:
+ bufpush(0x0570);
+ break;
+ case 0x0541:
+ bufpush(0x0571);
+ break;
+ case 0x0542:
+ bufpush(0x0572);
+ break;
+ case 0x0543:
+ bufpush(0x0573);
+ break;
+ case 0x0544:
+ bufpush(0x0574);
+ break;
+ case 0x0545:
+ bufpush(0x0575);
+ break;
+ case 0x0546:
+ bufpush(0x0576);
+ break;
+ case 0x0547:
+ bufpush(0x0577);
+ break;
+ case 0x0548:
+ bufpush(0x0578);
+ break;
+ case 0x0549:
+ bufpush(0x0579);
+ break;
+ case 0x054A:
+ bufpush(0x057A);
+ break;
+ case 0x054B:
+ bufpush(0x057B);
+ break;
+ case 0x054C:
+ bufpush(0x057C);
+ break;
+ case 0x054D:
+ bufpush(0x057D);
+ break;
+ case 0x054E:
+ bufpush(0x057E);
+ break;
+ case 0x054F:
+ bufpush(0x057F);
+ break;
+ case 0x0550:
+ bufpush(0x0580);
+ break;
+ case 0x0551:
+ bufpush(0x0581);
+ break;
+ case 0x0552:
+ bufpush(0x0582);
+ break;
+ case 0x0553:
+ bufpush(0x0583);
+ break;
+ case 0x0554:
+ bufpush(0x0584);
+ break;
+ case 0x0555:
+ bufpush(0x0585);
+ break;
+ case 0x0556:
+ bufpush(0x0586);
+ break;
+ case 0x0587:
+ bufpush(0x0565);
+ bufpush(0x0582);
+ break;
+ case 0x1E00:
+ bufpush(0x1E01);
+ break;
+ case 0x1E02:
+ bufpush(0x1E03);
+ break;
+ case 0x1E04:
+ bufpush(0x1E05);
+ break;
+ case 0x1E06:
+ bufpush(0x1E07);
+ break;
+ case 0x1E08:
+ bufpush(0x1E09);
+ break;
+ case 0x1E0A:
+ bufpush(0x1E0B);
+ break;
+ case 0x1E0C:
+ bufpush(0x1E0D);
+ break;
+ case 0x1E0E:
+ bufpush(0x1E0F);
+ break;
+ case 0x1E10:
+ bufpush(0x1E11);
+ break;
+ case 0x1E12:
+ bufpush(0x1E13);
+ break;
+ case 0x1E14:
+ bufpush(0x1E15);
+ break;
+ case 0x1E16:
+ bufpush(0x1E17);
+ break;
+ case 0x1E18:
+ bufpush(0x1E19);
+ break;
+ case 0x1E1A:
+ bufpush(0x1E1B);
+ break;
+ case 0x1E1C:
+ bufpush(0x1E1D);
+ break;
+ case 0x1E1E:
+ bufpush(0x1E1F);
+ break;
+ case 0x1E20:
+ bufpush(0x1E21);
+ break;
+ case 0x1E22:
+ bufpush(0x1E23);
+ break;
+ case 0x1E24:
+ bufpush(0x1E25);
+ break;
+ case 0x1E26:
+ bufpush(0x1E27);
+ break;
+ case 0x1E28:
+ bufpush(0x1E29);
+ break;
+ case 0x1E2A:
+ bufpush(0x1E2B);
+ break;
+ case 0x1E2C:
+ bufpush(0x1E2D);
+ break;
+ case 0x1E2E:
+ bufpush(0x1E2F);
+ break;
+ case 0x1E30:
+ bufpush(0x1E31);
+ break;
+ case 0x1E32:
+ bufpush(0x1E33);
+ break;
+ case 0x1E34:
+ bufpush(0x1E35);
+ break;
+ case 0x1E36:
+ bufpush(0x1E37);
+ break;
+ case 0x1E38:
+ bufpush(0x1E39);
+ break;
+ case 0x1E3A:
+ bufpush(0x1E3B);
+ break;
+ case 0x1E3C:
+ bufpush(0x1E3D);
+ break;
+ case 0x1E3E:
+ bufpush(0x1E3F);
+ break;
+ case 0x1E40:
+ bufpush(0x1E41);
+ break;
+ case 0x1E42:
+ bufpush(0x1E43);
+ break;
+ case 0x1E44:
+ bufpush(0x1E45);
+ break;
+ case 0x1E46:
+ bufpush(0x1E47);
+ break;
+ case 0x1E48:
+ bufpush(0x1E49);
+ break;
+ case 0x1E4A:
+ bufpush(0x1E4B);
+ break;
+ case 0x1E4C:
+ bufpush(0x1E4D);
+ break;
+ case 0x1E4E:
+ bufpush(0x1E4F);
+ break;
+ case 0x1E50:
+ bufpush(0x1E51);
+ break;
+ case 0x1E52:
+ bufpush(0x1E53);
+ break;
+ case 0x1E54:
+ bufpush(0x1E55);
+ break;
+ case 0x1E56:
+ bufpush(0x1E57);
+ break;
+ case 0x1E58:
+ bufpush(0x1E59);
+ break;
+ case 0x1E5A:
+ bufpush(0x1E5B);
+ break;
+ case 0x1E5C:
+ bufpush(0x1E5D);
+ break;
+ case 0x1E5E:
+ bufpush(0x1E5F);
+ break;
+ case 0x1E60:
+ bufpush(0x1E61);
+ break;
+ case 0x1E62:
+ bufpush(0x1E63);
+ break;
+ case 0x1E64:
+ bufpush(0x1E65);
+ break;
+ case 0x1E66:
+ bufpush(0x1E67);
+ break;
+ case 0x1E68:
+ bufpush(0x1E69);
+ break;
+ case 0x1E6A:
+ bufpush(0x1E6B);
+ break;
+ case 0x1E6C:
+ bufpush(0x1E6D);
+ break;
+ case 0x1E6E:
+ bufpush(0x1E6F);
+ break;
+ case 0x1E70:
+ bufpush(0x1E71);
+ break;
+ case 0x1E72:
+ bufpush(0x1E73);
+ break;
+ case 0x1E74:
+ bufpush(0x1E75);
+ break;
+ case 0x1E76:
+ bufpush(0x1E77);
+ break;
+ case 0x1E78:
+ bufpush(0x1E79);
+ break;
+ case 0x1E7A:
+ bufpush(0x1E7B);
+ break;
+ case 0x1E7C:
+ bufpush(0x1E7D);
+ break;
+ case 0x1E7E:
+ bufpush(0x1E7F);
+ break;
+ case 0x1E80:
+ bufpush(0x1E81);
+ break;
+ case 0x1E82:
+ bufpush(0x1E83);
+ break;
+ case 0x1E84:
+ bufpush(0x1E85);
+ break;
+ case 0x1E86:
+ bufpush(0x1E87);
+ break;
+ case 0x1E88:
+ bufpush(0x1E89);
+ break;
+ case 0x1E8A:
+ bufpush(0x1E8B);
+ break;
+ case 0x1E8C:
+ bufpush(0x1E8D);
+ break;
+ case 0x1E8E:
+ bufpush(0x1E8F);
+ break;
+ case 0x1E90:
+ bufpush(0x1E91);
+ break;
+ case 0x1E92:
+ bufpush(0x1E93);
+ break;
+ case 0x1E94:
+ bufpush(0x1E95);
+ break;
+ case 0x1E96:
+ bufpush(0x0068);
+ bufpush(0x0331);
+ break;
+ case 0x1E97:
+ bufpush(0x0074);
+ bufpush(0x0308);
+ break;
+ case 0x1E98:
+ bufpush(0x0077);
+ bufpush(0x030A);
+ break;
+ case 0x1E99:
+ bufpush(0x0079);
+ bufpush(0x030A);
+ break;
+ case 0x1E9A:
+ bufpush(0x0061);
+ bufpush(0x02BE);
+ break;
+ case 0x1E9B:
+ bufpush(0x1E61);
+ break;
+ case 0x1EA0:
+ bufpush(0x1EA1);
+ break;
+ case 0x1EA2:
+ bufpush(0x1EA3);
+ break;
+ case 0x1EA4:
+ bufpush(0x1EA5);
+ break;
+ case 0x1EA6:
+ bufpush(0x1EA7);
+ break;
+ case 0x1EA8:
+ bufpush(0x1EA9);
+ break;
+ case 0x1EAA:
+ bufpush(0x1EAB);
+ break;
+ case 0x1EAC:
+ bufpush(0x1EAD);
+ break;
+ case 0x1EAE:
+ bufpush(0x1EAF);
+ break;
+ case 0x1EB0:
+ bufpush(0x1EB1);
+ break;
+ case 0x1EB2:
+ bufpush(0x1EB3);
+ break;
+ case 0x1EB4:
+ bufpush(0x1EB5);
+ break;
+ case 0x1EB6:
+ bufpush(0x1EB7);
+ break;
+ case 0x1EB8:
+ bufpush(0x1EB9);
+ break;
+ case 0x1EBA:
+ bufpush(0x1EBB);
+ break;
+ case 0x1EBC:
+ bufpush(0x1EBD);
+ break;
+ case 0x1EBE:
+ bufpush(0x1EBF);
+ break;
+ case 0x1EC0:
+ bufpush(0x1EC1);
+ break;
+ case 0x1EC2:
+ bufpush(0x1EC3);
+ break;
+ case 0x1EC4:
+ bufpush(0x1EC5);
+ break;
+ case 0x1EC6:
+ bufpush(0x1EC7);
+ break;
+ case 0x1EC8:
+ bufpush(0x1EC9);
+ break;
+ case 0x1ECA:
+ bufpush(0x1ECB);
+ break;
+ case 0x1ECC:
+ bufpush(0x1ECD);
+ break;
+ case 0x1ECE:
+ bufpush(0x1ECF);
+ break;
+ case 0x1ED0:
+ bufpush(0x1ED1);
+ break;
+ case 0x1ED2:
+ bufpush(0x1ED3);
+ break;
+ case 0x1ED4:
+ bufpush(0x1ED5);
+ break;
+ case 0x1ED6:
+ bufpush(0x1ED7);
+ break;
+ case 0x1ED8:
+ bufpush(0x1ED9);
+ break;
+ case 0x1EDA:
+ bufpush(0x1EDB);
+ break;
+ case 0x1EDC:
+ bufpush(0x1EDD);
+ break;
+ case 0x1EDE:
+ bufpush(0x1EDF);
+ break;
+ case 0x1EE0:
+ bufpush(0x1EE1);
+ break;
+ case 0x1EE2:
+ bufpush(0x1EE3);
+ break;
+ case 0x1EE4:
+ bufpush(0x1EE5);
+ break;
+ case 0x1EE6:
+ bufpush(0x1EE7);
+ break;
+ case 0x1EE8:
+ bufpush(0x1EE9);
+ break;
+ case 0x1EEA:
+ bufpush(0x1EEB);
+ break;
+ case 0x1EEC:
+ bufpush(0x1EED);
+ break;
+ case 0x1EEE:
+ bufpush(0x1EEF);
+ break;
+ case 0x1EF0:
+ bufpush(0x1EF1);
+ break;
+ case 0x1EF2:
+ bufpush(0x1EF3);
+ break;
+ case 0x1EF4:
+ bufpush(0x1EF5);
+ break;
+ case 0x1EF6:
+ bufpush(0x1EF7);
+ break;
+ case 0x1EF8:
+ bufpush(0x1EF9);
+ break;
+ case 0x1F08:
+ bufpush(0x1F00);
+ break;
+ case 0x1F09:
+ bufpush(0x1F01);
+ break;
+ case 0x1F0A:
+ bufpush(0x1F02);
+ break;
+ case 0x1F0B:
+ bufpush(0x1F03);
+ break;
+ case 0x1F0C:
+ bufpush(0x1F04);
+ break;
+ case 0x1F0D:
+ bufpush(0x1F05);
+ break;
+ case 0x1F0E:
+ bufpush(0x1F06);
+ break;
+ case 0x1F0F:
+ bufpush(0x1F07);
+ break;
+ case 0x1F18:
+ bufpush(0x1F10);
+ break;
+ case 0x1F19:
+ bufpush(0x1F11);
+ break;
+ case 0x1F1A:
+ bufpush(0x1F12);
+ break;
+ case 0x1F1B:
+ bufpush(0x1F13);
+ break;
+ case 0x1F1C:
+ bufpush(0x1F14);
+ break;
+ case 0x1F1D:
+ bufpush(0x1F15);
+ break;
+ case 0x1F28:
+ bufpush(0x1F20);
+ break;
+ case 0x1F29:
+ bufpush(0x1F21);
+ break;
+ case 0x1F2A:
+ bufpush(0x1F22);
+ break;
+ case 0x1F2B:
+ bufpush(0x1F23);
+ break;
+ case 0x1F2C:
+ bufpush(0x1F24);
+ break;
+ case 0x1F2D:
+ bufpush(0x1F25);
+ break;
+ case 0x1F2E:
+ bufpush(0x1F26);
+ break;
+ case 0x1F2F:
+ bufpush(0x1F27);
+ break;
+ case 0x1F38:
+ bufpush(0x1F30);
+ break;
+ case 0x1F39:
+ bufpush(0x1F31);
+ break;
+ case 0x1F3A:
+ bufpush(0x1F32);
+ break;
+ case 0x1F3B:
+ bufpush(0x1F33);
+ break;
+ case 0x1F3C:
+ bufpush(0x1F34);
+ break;
+ case 0x1F3D:
+ bufpush(0x1F35);
+ break;
+ case 0x1F3E:
+ bufpush(0x1F36);
+ break;
+ case 0x1F3F:
+ bufpush(0x1F37);
+ break;
+ case 0x1F48:
+ bufpush(0x1F40);
+ break;
+ case 0x1F49:
+ bufpush(0x1F41);
+ break;
+ case 0x1F4A:
+ bufpush(0x1F42);
+ break;
+ case 0x1F4B:
+ bufpush(0x1F43);
+ break;
+ case 0x1F4C:
+ bufpush(0x1F44);
+ break;
+ case 0x1F4D:
+ bufpush(0x1F45);
+ break;
+ case 0x1F50:
+ bufpush(0x03C5);
+ bufpush(0x0313);
+ break;
+ case 0x1F52:
+ bufpush(0x03C5);
+ bufpush(0x0313);
+ bufpush(0x0300);
+ break;
+ case 0x1F54:
+ bufpush(0x03C5);
+ bufpush(0x0313);
+ bufpush(0x0301);
+ break;
+ case 0x1F56:
+ bufpush(0x03C5);
+ bufpush(0x0313);
+ bufpush(0x0342);
+ break;
+ case 0x1F59:
+ bufpush(0x1F51);
+ break;
+ case 0x1F5B:
+ bufpush(0x1F53);
+ break;
+ case 0x1F5D:
+ bufpush(0x1F55);
+ break;
+ case 0x1F5F:
+ bufpush(0x1F57);
+ break;
+ case 0x1F68:
+ bufpush(0x1F60);
+ break;
+ case 0x1F69:
+ bufpush(0x1F61);
+ break;
+ case 0x1F6A:
+ bufpush(0x1F62);
+ break;
+ case 0x1F6B:
+ bufpush(0x1F63);
+ break;
+ case 0x1F6C:
+ bufpush(0x1F64);
+ break;
+ case 0x1F6D:
+ bufpush(0x1F65);
+ break;
+ case 0x1F6E:
+ bufpush(0x1F66);
+ break;
+ case 0x1F6F:
+ bufpush(0x1F67);
+ break;
+ case 0x1F80:
+ bufpush(0x1F00);
+ bufpush(0x03B9);
+ break;
+ case 0x1F81:
+ bufpush(0x1F01);
+ bufpush(0x03B9);
+ break;
+ case 0x1F82:
+ bufpush(0x1F02);
+ bufpush(0x03B9);
+ break;
+ case 0x1F83:
+ bufpush(0x1F03);
+ bufpush(0x03B9);
+ break;
+ case 0x1F84:
+ bufpush(0x1F04);
+ bufpush(0x03B9);
+ break;
+ case 0x1F85:
+ bufpush(0x1F05);
+ bufpush(0x03B9);
+ break;
+ case 0x1F86:
+ bufpush(0x1F06);
+ bufpush(0x03B9);
+ break;
+ case 0x1F87:
+ bufpush(0x1F07);
+ bufpush(0x03B9);
+ break;
+ case 0x1F88:
+ bufpush(0x1F00);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1F89:
+ bufpush(0x1F01);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1F8A:
+ bufpush(0x1F02);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1F8B:
+ bufpush(0x1F03);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1F8C:
+ bufpush(0x1F04);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1F8D:
+ bufpush(0x1F05);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1F8E:
+ bufpush(0x1F06);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1F8F:
+ bufpush(0x1F07);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1F90:
+ bufpush(0x1F20);
+ bufpush(0x03B9);
+ break;
+ case 0x1F91:
+ bufpush(0x1F21);
+ bufpush(0x03B9);
+ break;
+ case 0x1F92:
+ bufpush(0x1F22);
+ bufpush(0x03B9);
+ break;
+ case 0x1F93:
+ bufpush(0x1F23);
+ bufpush(0x03B9);
+ break;
+ case 0x1F94:
+ bufpush(0x1F24);
+ bufpush(0x03B9);
+ break;
+ case 0x1F95:
+ bufpush(0x1F25);
+ bufpush(0x03B9);
+ break;
+ case 0x1F96:
+ bufpush(0x1F26);
+ bufpush(0x03B9);
+ break;
+ case 0x1F97:
+ bufpush(0x1F27);
+ bufpush(0x03B9);
+ break;
+ case 0x1F98:
+ bufpush(0x1F20);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1F99:
+ bufpush(0x1F21);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1F9A:
+ bufpush(0x1F22);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1F9B:
+ bufpush(0x1F23);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1F9C:
+ bufpush(0x1F24);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1F9D:
+ bufpush(0x1F25);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1F9E:
+ bufpush(0x1F26);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1F9F:
+ bufpush(0x1F27);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1FA0:
+ bufpush(0x1F60);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA1:
+ bufpush(0x1F61);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA2:
+ bufpush(0x1F62);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA3:
+ bufpush(0x1F63);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA4:
+ bufpush(0x1F64);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA5:
+ bufpush(0x1F65);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA6:
+ bufpush(0x1F66);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA7:
+ bufpush(0x1F67);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA8:
+ bufpush(0x1F60);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1FA9:
+ bufpush(0x1F61);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1FAA:
+ bufpush(0x1F62);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1FAB:
+ bufpush(0x1F63);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1FAC:
+ bufpush(0x1F64);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1FAD:
+ bufpush(0x1F65);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1FAE:
+ bufpush(0x1F66);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1FAF:
+ bufpush(0x1F67);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1FB2:
+ bufpush(0x1F70);
+ bufpush(0x03B9);
+ break;
+ case 0x1FB3:
+ bufpush(0x03B1);
+ bufpush(0x03B9);
+ break;
+ case 0x1FB4:
+ bufpush(0x03AC);
+ bufpush(0x03B9);
+ break;
+ case 0x1FB6:
+ bufpush(0x03B1);
+ bufpush(0x0342);
+ break;
+ case 0x1FB7:
+ bufpush(0x03B1);
+ bufpush(0x0342);
+ bufpush(0x03B9);
+ break;
+ case 0x1FB8:
+ bufpush(0x1FB0);
+ break;
+ case 0x1FB9:
+ bufpush(0x1FB1);
+ break;
+ case 0x1FBA:
+ bufpush(0x1F70);
+ break;
+ case 0x1FBB:
+ bufpush(0x1F71);
+ break;
+ case 0x1FBC:
+ bufpush(0x03B1);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1FBE:
+ bufpush(0x03B9);
+ break;
+ case 0x1FC2:
+ bufpush(0x1F74);
+ bufpush(0x03B9);
+ break;
+ case 0x1FC3:
+ bufpush(0x03B7);
+ bufpush(0x03B9);
+ break;
+ case 0x1FC4:
+ bufpush(0x03AE);
+ bufpush(0x03B9);
+ break;
+ case 0x1FC6:
+ bufpush(0x03B7);
+ bufpush(0x0342);
+ break;
+ case 0x1FC7:
+ bufpush(0x03B7);
+ bufpush(0x0342);
+ bufpush(0x03B9);
+ break;
+ case 0x1FC8:
+ bufpush(0x1F72);
+ break;
+ case 0x1FC9:
+ bufpush(0x1F73);
+ break;
+ case 0x1FCA:
+ bufpush(0x1F74);
+ break;
+ case 0x1FCB:
+ bufpush(0x1F75);
+ break;
+ case 0x1FCC:
+ bufpush(0x03B7);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1FD2:
+ bufpush(0x03B9);
+ bufpush(0x0308);
+ bufpush(0x0300);
+ break;
+ case 0x1FD3:
+ bufpush(0x03B9);
+ bufpush(0x0308);
+ bufpush(0x0301);
+ break;
+ case 0x1FD6:
+ bufpush(0x03B9);
+ bufpush(0x0342);
+ break;
+ case 0x1FD7:
+ bufpush(0x03B9);
+ bufpush(0x0308);
+ bufpush(0x0342);
+ break;
+ case 0x1FD8:
+ bufpush(0x1FD0);
+ break;
+ case 0x1FD9:
+ bufpush(0x1FD1);
+ break;
+ case 0x1FDA:
+ bufpush(0x1F76);
+ break;
+ case 0x1FDB:
+ bufpush(0x1F77);
+ break;
+ case 0x1FE2:
+ bufpush(0x03C5);
+ bufpush(0x0308);
+ bufpush(0x0300);
+ break;
+ case 0x1FE3:
+ bufpush(0x03C5);
+ bufpush(0x0308);
+ bufpush(0x0301);
+ break;
+ case 0x1FE4:
+ bufpush(0x03C1);
+ bufpush(0x0313);
+ break;
+ case 0x1FE6:
+ bufpush(0x03C5);
+ bufpush(0x0342);
+ break;
+ case 0x1FE7:
+ bufpush(0x03C5);
+ bufpush(0x0308);
+ bufpush(0x0342);
+ break;
+ case 0x1FE8:
+ bufpush(0x1FE0);
+ break;
+ case 0x1FE9:
+ bufpush(0x1FE1);
+ break;
+ case 0x1FEA:
+ bufpush(0x1F7A);
+ break;
+ case 0x1FEB:
+ bufpush(0x1F7B);
+ break;
+ case 0x1FEC:
+ bufpush(0x1FE5);
+ break;
+ case 0x1FF2:
+ bufpush(0x1F7C);
+ bufpush(0x03B9);
+ break;
+ case 0x1FF3:
+ bufpush(0x03C9);
+ bufpush(0x03B9);
+ break;
+ case 0x1FF4:
+ bufpush(0x03CE);
+ bufpush(0x03B9);
+ break;
+ case 0x1FF6:
+ bufpush(0x03C9);
+ bufpush(0x0342);
+ break;
+ case 0x1FF7:
+ bufpush(0x03C9);
+ bufpush(0x0342);
+ bufpush(0x03B9);
+ break;
+ case 0x1FF8:
+ bufpush(0x1F78);
+ break;
+ case 0x1FF9:
+ bufpush(0x1F79);
+ break;
+ case 0x1FFA:
+ bufpush(0x1F7C);
+ break;
+ case 0x1FFB:
+ bufpush(0x1F7D);
+ break;
+ case 0x1FFC:
+ bufpush(0x03C9);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x2126:
+ bufpush(0x03C9);
+ break;
+ case 0x212A:
+ bufpush(0x006B);
+ break;
+ case 0x212B:
+ bufpush(0x00E5);
+ break;
+ case 0x2160:
+ bufpush(0x2170);
+ break;
+ case 0x2161:
+ bufpush(0x2171);
+ break;
+ case 0x2162:
+ bufpush(0x2172);
+ break;
+ case 0x2163:
+ bufpush(0x2173);
+ break;
+ case 0x2164:
+ bufpush(0x2174);
+ break;
+ case 0x2165:
+ bufpush(0x2175);
+ break;
+ case 0x2166:
+ bufpush(0x2176);
+ break;
+ case 0x2167:
+ bufpush(0x2177);
+ break;
+ case 0x2168:
+ bufpush(0x2178);
+ break;
+ case 0x2169:
+ bufpush(0x2179);
+ break;
+ case 0x216A:
+ bufpush(0x217A);
+ break;
+ case 0x216B:
+ bufpush(0x217B);
+ break;
+ case 0x216C:
+ bufpush(0x217C);
+ break;
+ case 0x216D:
+ bufpush(0x217D);
+ break;
+ case 0x216E:
+ bufpush(0x217E);
+ break;
+ case 0x216F:
+ bufpush(0x217F);
+ break;
+ case 0x24B6:
+ bufpush(0x24D0);
+ break;
+ case 0x24B7:
+ bufpush(0x24D1);
+ break;
+ case 0x24B8:
+ bufpush(0x24D2);
+ break;
+ case 0x24B9:
+ bufpush(0x24D3);
+ break;
+ case 0x24BA:
+ bufpush(0x24D4);
+ break;
+ case 0x24BB:
+ bufpush(0x24D5);
+ break;
+ case 0x24BC:
+ bufpush(0x24D6);
+ break;
+ case 0x24BD:
+ bufpush(0x24D7);
+ break;
+ case 0x24BE:
+ bufpush(0x24D8);
+ break;
+ case 0x24BF:
+ bufpush(0x24D9);
+ break;
+ case 0x24C0:
+ bufpush(0x24DA);
+ break;
+ case 0x24C1:
+ bufpush(0x24DB);
+ break;
+ case 0x24C2:
+ bufpush(0x24DC);
+ break;
+ case 0x24C3:
+ bufpush(0x24DD);
+ break;
+ case 0x24C4:
+ bufpush(0x24DE);
+ break;
+ case 0x24C5:
+ bufpush(0x24DF);
+ break;
+ case 0x24C6:
+ bufpush(0x24E0);
+ break;
+ case 0x24C7:
+ bufpush(0x24E1);
+ break;
+ case 0x24C8:
+ bufpush(0x24E2);
+ break;
+ case 0x24C9:
+ bufpush(0x24E3);
+ break;
+ case 0x24CA:
+ bufpush(0x24E4);
+ break;
+ case 0x24CB:
+ bufpush(0x24E5);
+ break;
+ case 0x24CC:
+ bufpush(0x24E6);
+ break;
+ case 0x24CD:
+ bufpush(0x24E7);
+ break;
+ case 0x24CE:
+ bufpush(0x24E8);
+ break;
+ case 0x24CF:
+ bufpush(0x24E9);
+ break;
+ case 0xFB00:
+ bufpush(0x0066);
+ bufpush(0x0066);
+ break;
+ case 0xFB01:
+ bufpush(0x0066);
+ bufpush(0x0069);
+ break;
+ case 0xFB02:
+ bufpush(0x0066);
+ bufpush(0x006C);
+ break;
+ case 0xFB03:
+ bufpush(0x0066);
+ bufpush(0x0066);
+ bufpush(0x0069);
+ break;
+ case 0xFB04:
+ bufpush(0x0066);
+ bufpush(0x0066);
+ bufpush(0x006C);
+ break;
+ case 0xFB05:
+ bufpush(0x0073);
+ bufpush(0x0074);
+ break;
+ case 0xFB06:
+ bufpush(0x0073);
+ bufpush(0x0074);
+ break;
+ case 0xFB13:
+ bufpush(0x0574);
+ bufpush(0x0576);
+ break;
+ case 0xFB14:
+ bufpush(0x0574);
+ bufpush(0x0565);
+ break;
+ case 0xFB15:
+ bufpush(0x0574);
+ bufpush(0x056B);
+ break;
+ case 0xFB16:
+ bufpush(0x057E);
+ bufpush(0x0576);
+ break;
+ case 0xFB17:
+ bufpush(0x0574);
+ bufpush(0x056D);
+ break;
+ case 0xFF21:
+ bufpush(0xFF41);
+ break;
+ case 0xFF22:
+ bufpush(0xFF42);
+ break;
+ case 0xFF23:
+ bufpush(0xFF43);
+ break;
+ case 0xFF24:
+ bufpush(0xFF44);
+ break;
+ case 0xFF25:
+ bufpush(0xFF45);
+ break;
+ case 0xFF26:
+ bufpush(0xFF46);
+ break;
+ case 0xFF27:
+ bufpush(0xFF47);
+ break;
+ case 0xFF28:
+ bufpush(0xFF48);
+ break;
+ case 0xFF29:
+ bufpush(0xFF49);
+ break;
+ case 0xFF2A:
+ bufpush(0xFF4A);
+ break;
+ case 0xFF2B:
+ bufpush(0xFF4B);
+ break;
+ case 0xFF2C:
+ bufpush(0xFF4C);
+ break;
+ case 0xFF2D:
+ bufpush(0xFF4D);
+ break;
+ case 0xFF2E:
+ bufpush(0xFF4E);
+ break;
+ case 0xFF2F:
+ bufpush(0xFF4F);
+ break;
+ case 0xFF30:
+ bufpush(0xFF50);
+ break;
+ case 0xFF31:
+ bufpush(0xFF51);
+ break;
+ case 0xFF32:
+ bufpush(0xFF52);
+ break;
+ case 0xFF33:
+ bufpush(0xFF53);
+ break;
+ case 0xFF34:
+ bufpush(0xFF54);
+ break;
+ case 0xFF35:
+ bufpush(0xFF55);
+ break;
+ case 0xFF36:
+ bufpush(0xFF56);
+ break;
+ case 0xFF37:
+ bufpush(0xFF57);
+ break;
+ case 0xFF38:
+ bufpush(0xFF58);
+ break;
+ case 0xFF39:
+ bufpush(0xFF59);
+ break;
+ case 0xFF3A:
+ bufpush(0xFF5A);
+ break;
+ case 0x10400:
+ bufpush(0x10428);
+ break;
+ case 0x10401:
+ bufpush(0x10429);
+ break;
+ case 0x10402:
+ bufpush(0x1042A);
+ break;
+ case 0x10403:
+ bufpush(0x1042B);
+ break;
+ case 0x10404:
+ bufpush(0x1042C);
+ break;
+ case 0x10405:
+ bufpush(0x1042D);
+ break;
+ case 0x10406:
+ bufpush(0x1042E);
+ break;
+ case 0x10407:
+ bufpush(0x1042F);
+ break;
+ case 0x10408:
+ bufpush(0x10430);
+ break;
+ case 0x10409:
+ bufpush(0x10431);
+ break;
+ case 0x1040A:
+ bufpush(0x10432);
+ break;
+ case 0x1040B:
+ bufpush(0x10433);
+ break;
+ case 0x1040C:
+ bufpush(0x10434);
+ break;
+ case 0x1040D:
+ bufpush(0x10435);
+ break;
+ case 0x1040E:
+ bufpush(0x10436);
+ break;
+ case 0x1040F:
+ bufpush(0x10437);
+ break;
+ case 0x10410:
+ bufpush(0x10438);
+ break;
+ case 0x10411:
+ bufpush(0x10439);
+ break;
+ case 0x10412:
+ bufpush(0x1043A);
+ break;
+ case 0x10413:
+ bufpush(0x1043B);
+ break;
+ case 0x10414:
+ bufpush(0x1043C);
+ break;
+ case 0x10415:
+ bufpush(0x1043D);
+ break;
+ case 0x10416:
+ bufpush(0x1043E);
+ break;
+ case 0x10417:
+ bufpush(0x1043F);
+ break;
+ case 0x10418:
+ bufpush(0x10440);
+ break;
+ case 0x10419:
+ bufpush(0x10441);
+ break;
+ case 0x1041A:
+ bufpush(0x10442);
+ break;
+ case 0x1041B:
+ bufpush(0x10443);
+ break;
+ case 0x1041C:
+ bufpush(0x10444);
+ break;
+ case 0x1041D:
+ bufpush(0x10445);
+ break;
+ case 0x1041E:
+ bufpush(0x10446);
+ break;
+ case 0x1041F:
+ bufpush(0x10447);
+ break;
+ case 0x10420:
+ bufpush(0x10448);
+ break;
+ case 0x10421:
+ bufpush(0x10449);
+ break;
+ case 0x10422:
+ bufpush(0x1044A);
+ break;
+ case 0x10423:
+ bufpush(0x1044B);
+ break;
+ case 0x10424:
+ bufpush(0x1044C);
+ break;
+ case 0x10425:
+ bufpush(0x1044D);
+ break;
+ }
diff --git a/src/debug.h b/src/debug.h
new file mode 100644
index 0000000..af1d017
--- /dev/null
+++ b/src/debug.h
@@ -0,0 +1,36 @@
+#ifndef __debug_h__
+#define __debug_h__
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+
+#ifdef NDEBUG
+#define debug(M, ...)
+#else
+#define debug(M, ...) \
+ fprintf(stderr, "DEBUG %s:%d: " M "\n", __FILE__, __LINE__, ##__VA_ARGS__)
+#endif
+
+#define clean_errno() (errno == 0 ? "None" : strerror(errno))
+
+#define log_err(M, ...) \
+ fprintf(stderr, "[ERROR] (%s:%d: errno: %s) " M "\n", __FILE__, __LINE__, \
+ clean_errno(), ##__VA_ARGS__)
+
+#define log_warn(M, ...) \
+ fprintf(stderr, "[WARN] (%s:%d: errno: %s) " M "\n", __FILE__, __LINE__, \
+ clean_errno(), ##__VA_ARGS__)
+
+#define log_info(M, ...) fprintf(stderr, "[INFO] (%s:%d) " M "\n", __FILE__, \
+ __LINE__, ##__VA_ARGS__)
+
+#define check(A, M, ...) \
+ if(!(A)) { log_err(M, ##__VA_ARGS__); errno=0; goto error; }
+
+#define sentinel(M, ...) \
+ { log_err(M, ##__VA_ARGS__); errno=0; goto error; }
+
+#define check_debug(A, M, ...) \
+ if(!(A)) { debug(M, ##__VA_ARGS__); errno=0; goto error; }
+
+#endif
diff --git a/src/detab.c b/src/detab.c
new file mode 100644
index 0000000..e03fcf7
--- /dev/null
+++ b/src/detab.c
@@ -0,0 +1,48 @@
+#include "bstrlib.h"
+
+// UTF-8 aware detab: assumes s has no newlines, or only a final newline.
+// Return 0 on success, BSTR_ERR if invalid UTF-8.
+extern int bdetab(bstring s, int utf8)
+{
+ unsigned char c;
+ int pos = 0; // a count of characters
+ int byte = 0; // a count of bytes
+ int high_chars_to_skip = 0;
+ int numspaces = 0;
+ while ((c = bchar(s, byte))) {
+ if (utf8 && high_chars_to_skip > 0) {
+ if (c >= 0x80) {
+ high_chars_to_skip--;
+ byte++;
+ } else {
+ return BSTR_ERR; // invalid utf-8
+ }
+ } else if (c == '\t') {
+ bdelete(s, byte, 1); // delete tab character
+ numspaces = 4 - (pos % 4);
+ binsertch(s, byte, numspaces, ' ');
+ byte += numspaces;
+ pos += numspaces;
+ } else if (c <= 0x80 || !utf8) {
+ byte++;
+ pos++;
+ } else { // multibyte utf8 sequences
+ if (c >> 1 == 0176) {
+ high_chars_to_skip = 5;
+ } else if (c >> 2 == 076) {
+ high_chars_to_skip = 4;
+ } else if (c >> 3 == 036) {
+ high_chars_to_skip = 3;
+ } else if (c >> 4 == 016) {
+ high_chars_to_skip = 2;
+ } else if (c >> 5 == 06) {
+ high_chars_to_skip = 1;
+ } else {
+ return BSTR_ERR; // invalid utf-8
+ }
+ pos++;
+ byte++;
+ }
+ }
+ return 0;
+}
diff --git a/src/getopt.c b/src/getopt.c
new file mode 100644
index 0000000..321dd9f
--- /dev/null
+++ b/src/getopt.c
@@ -0,0 +1,199 @@
+/* $Id: getopt.c 4022 2008-03-31 06:11:07Z rra $
+ *
+ * Replacement implementation of getopt.
+ *
+ * This is a replacement implementation for getopt based on the my_getopt
+ * distribution by Benjamin Sittler. Only the getopt interface is included,
+ * since remctl doesn't use GNU long options, and the code has been rearranged
+ * and reworked somewhat to fit with the remctl coding style.
+ *
+ * Copyright 1997, 2000, 2001, 2002 Benjamin Sittler
+ * Copyright 2008 Russ Allbery <rra@stanford.edu>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <config.h>
+#include <portable/system.h>
+#include <portable/getopt.h>
+
+/*
+ * If we're running the test suite, rename getopt and the global variables to
+ * avoid conflicts with the system version.
+ */
+#if TESTING
+# define getopt test_getopt
+int test_getopt(int, char **, const char *);
+# define optind test_optind
+# define opterr test_opterr
+# define optopt test_optopt
+# define optarg test_optarg
+#endif
+
+/* Initialize global interface variables. */
+int optind = 1;
+int opterr = 1;
+int optopt = 0;
+char *optarg = NULL;
+
+/*
+ * This is the plain old UNIX getopt, with GNU-style extensions. If you're
+ * porting some piece of UNIX software, this is all you need. It supports
+ * GNU-style permutation and optional arguments, but does not support the GNU
+ * -W extension.
+ *
+ * This function is not re-entrant or thread-safe, has static variables, and
+ * generally isn't a great interface, but normally you only call it once.
+ */
+int
+getopt(int argc, char *argv[], const char *optstring)
+{
+ const char *p;
+ size_t offset = 0;
+ char mode = '\0';
+ int colon_mode = 0;
+ int option = -1;
+
+ /* Holds the current position in the parameter being parsed. */
+ static int charind = 0;
+
+ /*
+ * By default, getopt permutes argv as it scans and leaves all non-options
+ * at the end. This can be changed with the first character of optstring
+ * or the environment variable POSIXLY_CORRECT. With a first character of
+ * '+' or when POSIXLY_CORRECT is set, option processing stops at the
+ * first non-option. If the first character is '-', each non-option argv
+ * element is handled as if it were the argument of an option with
+ * character code 1. mode holds this character.
+ *
+ * After the optional leading '+' and '-', optstring may contain ':'. If
+ * present, missing arguments return ':' instead of '?'. colon_mode holds
+ * this setting.
+ */
+ if (getenv("POSIXLY_CORRECT") != NULL) {
+ mode = '+';
+ colon_mode = '+';
+ } else {
+ if (optstring[offset] == '+' || optstring[offset] == '-') {
+ mode = optstring[offset];
+ offset++;
+ }
+ if (optstring[offset] == ':') {
+ colon_mode = 1;
+ offset++;
+ }
+ }
+
+ /*
+ * charind holds where we left off. If it's set, we were in the middle
+ * of an argv element; if not, we pick up with the next element of
+ * optind.
+ */
+ optarg = NULL;
+ if (charind == 0) {
+ if (optind >= argc)
+ option = -1;
+ else if (strcmp(argv[optind], "--") == 0) {
+ optind++;
+ option = -1;
+ } else if (argv[optind][0] != '-' || argv[optind][1] == '\0') {
+ char *tmp;
+ int i, j, k, end;
+
+ if (mode == '+')
+ option = -1;
+ else if (mode == '-') {
+ optarg = argv[optind];
+ optind++;
+ option = 1;
+ } else {
+ for (i = optind + 1, j = optind; i < argc; i++)
+ if ((argv[i][0] == '-') && (argv[i][1] != '\0')) {
+ optind = i;
+ option = getopt(argc, argv, optstring);
+ while (i > j) {
+ --i;
+ tmp = argv[i];
+ end = (charind == 0) ? optind - 1 : optind;
+ for (k = i; k + 1 <= end; k++) {
+ argv[k] = argv[k + 1];
+ }
+ argv[end] = tmp;
+ --optind;
+ }
+ break;
+ }
+ if (i == argc)
+ option = -1;
+ }
+ return option;
+ } else {
+ charind = 1;
+ }
+ }
+ if (charind != 0) {
+ optopt = argv[optind][charind];
+ for (p = optstring + offset; *p != '\0'; p++)
+ if (optopt == *p) {
+ p++;
+ if (*p == ':') {
+ if (argv[optind][charind + 1] != '\0') {
+ optarg = &argv[optind][charind + 1];
+ optind++;
+ charind = 0;
+ } else {
+ p++;
+ if (*p != ':') {
+ charind = 0;
+ optind++;
+ if (optind >= argc) {
+ if (opterr)
+ fprintf(stderr, "%s: option requires"
+ " an argument -- %c\n", argv[0],
+ optopt);
+ option = colon_mode ? ':' : '?';
+ goto done;
+ } else {
+ optarg = argv[optind];
+ optind++;
+ }
+ }
+ }
+ }
+ option = optopt;
+ }
+ if (option == -1) {
+ if (opterr)
+ fprintf(stderr, "%s: illegal option -- %c\n", argv[0], optopt);
+ option = '?';
+ }
+ }
+
+done:
+ if (charind != 0) {
+ charind++;
+ if (argv[optind][charind] == '\0') {
+ optind++;
+ charind = 0;
+ }
+ }
+ if (optind > argc)
+ optind = argc;
+ return option;
+}
diff --git a/src/html.c b/src/html.c
new file mode 100644
index 0000000..56d5dbb
--- /dev/null
+++ b/src/html.c
@@ -0,0 +1,276 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include "bstrlib.h"
+#include "stmd.h"
+#include "debug.h"
+#include "scanners.h"
+
+// Functions to convert block and inline lists to HTML strings.
+
+// Escape special characters in HTML. More efficient than
+// three calls to bfindreplace. If preserve_entities is set,
+// existing entities are left alone.
+static bstring escape_html(bstring inp, bool preserve_entities)
+{
+ int pos = 0;
+ int match;
+ char c;
+ bstring escapable = blk2bstr("&<>\"", 4);
+ bstring ent;
+ bstring s = bstrcpy(inp);
+ while ((pos = binchr(s, pos, escapable)) != BSTR_ERR) {
+ c = bchar(s,pos);
+ switch (c) {
+ case '<':
+ bdelete(s, pos, 1);
+ ent = blk2bstr("&lt;", 4);
+ binsert(s, pos, ent, ' ');
+ bdestroy(ent);
+ pos += 4;
+ break;
+ case '>':
+ bdelete(s, pos, 1);
+ ent = blk2bstr("&gt;", 4);
+ binsert(s, pos, ent, ' ');
+ bdestroy(ent);
+ pos += 4;
+ break;
+ case '&':
+ if (preserve_entities && (match = scan_entity(s, pos))) {
+ pos += match;
+ } else {
+ bdelete(s, pos, 1);
+ ent = blk2bstr("&amp;", 5);
+ binsert(s, pos, ent, ' ');
+ bdestroy(ent);
+ pos += 5;
+ }
+ break;
+ case '"':
+ bdelete(s, pos, 1);
+ ent = blk2bstr("&quot;", 6);
+ binsert(s, pos, ent, ' ');
+ bdestroy(ent);
+ pos += 6;
+ break;
+ default:
+ bdelete(s, pos, 1);
+ log_err("unexpected character %02x", c);
+ }
+ }
+ bdestroy(escapable);
+ return s;
+}
+
+static inline void cr(bstring buffer)
+{
+ int c = bchar(buffer, blength(buffer) - 1);
+ if (c != '\n' && c) {
+ bconchar(buffer, '\n');
+ }
+}
+
+// Convert a block list to HTML. Returns 0 on success, and sets result.
+extern int blocks_to_html(block* b, bstring* result, bool tight)
+{
+ bstring contents = NULL;
+ bstring escaped, escaped2;
+ struct bstrList * info_words;
+ struct ListData * data;
+ bstring mbstart;
+ bstring html = blk2bstr("", 0);
+
+ while(b != NULL) {
+ switch(b->tag) {
+ case document:
+ check(blocks_to_html(b->children, &contents, false) == 0,
+ "error converting blocks to html");
+ bformata(html, "%s", contents->data);
+ bdestroy(contents);
+ break;
+ case paragraph:
+ check(inlines_to_html(b->inline_content, &contents) == 0,
+ "error converting inlines to html");
+ if (tight) {
+ bformata(html, "%s", contents->data);
+ } else {
+ cr(html);
+ bformata(html, "<p>%s</p>", contents->data);
+ cr(html);
+ }
+ bdestroy(contents);
+ break;
+ case block_quote:
+ check(blocks_to_html(b->children, &contents, false) == 0,
+ "error converting blocks to html");
+ cr(html);
+ bformata(html, "<blockquote>\n%s</blockquote>", contents->data);
+ cr(html);
+ bdestroy(contents);
+ break;
+ case list_item:
+ check(blocks_to_html(b->children, &contents, tight) == 0,
+ "error converting blocks to html");
+ brtrimws(contents);
+ cr(html);
+ bformata(html, "<li>%s</li>", contents->data);
+ cr(html);
+ bdestroy(contents);
+ break;
+ case list:
+ // make sure a list starts at the beginning of the line:
+ cr(html);
+ data = &(b->attributes.list_data);
+ check(blocks_to_html(b->children, &contents, data->tight) == 0,
+ "error converting blocks to html");
+ mbstart = bformat(" start=\"%d\"", data->start);
+ bformata(html, "<%s%s>\n%s</%s>",
+ data->list_type == bullet ? "ul" : "ol",
+ data->start == 1 ? "" : (char*) mbstart->data,
+ contents->data,
+ data->list_type == bullet ? "ul" : "ol");
+ cr(html);
+ bdestroy(contents);
+ bdestroy(mbstart);
+ break;
+ case atx_header:
+ case setext_header:
+ check(inlines_to_html(b->inline_content, &contents) == 0,
+ "error converting inlines to html");
+ cr(html);
+ bformata(html, "<h%d>%s</h%d>",
+ b->attributes.header_level,
+ contents->data,
+ b->attributes.header_level);
+ cr(html);
+ bdestroy(contents);
+ break;
+ case indented_code:
+ escaped = escape_html(b->string_content, false);
+ cr(html);
+ bformata(html, "<pre><code>%s</code></pre>", escaped->data);
+ cr(html);
+ bdestroy(escaped);
+ break;
+ case fenced_code:
+ escaped = escape_html(b->string_content, false);
+ cr(html);
+ bformata(html, "<pre");
+ if (blength(b->attributes.fenced_code_data.info) > 0) {
+ escaped2 = escape_html(b->attributes.fenced_code_data.info, true);
+ info_words = bsplit(escaped2, ' ');
+ bformata(html, " class=\"%s\"", info_words->entry[0]->data);
+ bdestroy(escaped2);
+ bstrListDestroy(info_words);
+ }
+ bformata(html, "><code>%s</code></pre>", escaped->data);
+ cr(html);
+ bdestroy(escaped);
+ break;
+ case html_block:
+ bformata(html, "%s", b->string_content->data);
+ break;
+ case hrule:
+ bformata(html, "<hr />");
+ cr(html);
+ break;
+ case reference_def:
+ break;
+ default:
+ log_warn("block type %d not implemented\n", b->tag);
+ break;
+ }
+ b = b->next;
+ }
+ *result = html;
+ return 0;
+ error:
+ return -1;
+}
+
+// Convert an inline list to HTML. Returns 0 on success, and sets result.
+extern int inlines_to_html(inl* ils, bstring* result)
+{
+ bstring contents = NULL;
+ bstring html = blk2bstr("", 0);
+ bstring mbtitle, escaped, escaped2;
+
+ while(ils != NULL) {
+ switch(ils->tag) {
+ case str:
+ escaped = escape_html(ils->content.literal, false);
+ bformata(html, "%s", escaped->data);
+ bdestroy(escaped);
+ break;
+ case linebreak:
+ bformata(html, "<br />\n");
+ break;
+ case softbreak:
+ bformata(html, "\n");
+ break;
+ case code:
+ escaped = escape_html(ils->content.literal, false);
+ bformata(html, "<code>%s</code>", escaped->data);
+ bdestroy(escaped);
+ break;
+ case raw_html:
+ case entity:
+ bformata(html, "%s", ils->content.literal->data);
+ break;
+ case link:
+ check(inlines_to_html(ils->content.inlines, &contents) == 0,
+ "error converting inlines to html");
+ if (blength(ils->content.linkable.title) > 0) {
+ escaped = escape_html(ils->content.linkable.title, true);
+ mbtitle = bformat(" title=\"%s\"", escaped->data);
+ bdestroy(escaped);
+ } else {
+ mbtitle = blk2bstr("",0);
+ }
+ escaped = escape_html(ils->content.linkable.url, true);
+ bformata(html, "<a href=\"%s\"%s>%s</a>",
+ escaped->data,
+ mbtitle->data,
+ contents->data);
+ bdestroy(escaped);
+ bdestroy(mbtitle);
+ bdestroy(contents);
+ break;
+ case image:
+ check(inlines_to_html(ils->content.inlines, &contents) == 0,
+ "error converting inlines to html");
+ escaped = escape_html(ils->content.linkable.url, true);
+ escaped2 = escape_html(contents, false);
+ bdestroy(contents);
+ bformata(html, "<img src=\"%s\" alt=\"%s\"",
+ escaped->data, escaped2->data);
+ bdestroy(escaped);
+ bdestroy(escaped2);
+ if (blength(ils->content.linkable.title) > 0) {
+ escaped = escape_html(ils->content.linkable.title, true);
+ bformata(html, " title=\"%s\"", escaped->data);
+ bdestroy(escaped);
+ }
+ bformata(html, " />");
+ break;
+ case strong:
+ check(inlines_to_html(ils->content.inlines, &contents) == 0,
+ "error converting inlines to html");
+ bformata(html, "<strong>%s</strong>", contents->data);
+ bdestroy(contents);
+ break;
+ case emph:
+ check(inlines_to_html(ils->content.inlines, &contents) == 0,
+ "error converting inlines to html");
+ bformata(html, "<em>%s</em>", contents->data);
+ bdestroy(contents);
+ break;
+ }
+ ils = ils->next;
+ }
+ *result = html;
+ return 0;
+ error:
+ return -1;
+}
diff --git a/src/inlines.c b/src/inlines.c
new file mode 100644
index 0000000..9e35178
--- /dev/null
+++ b/src/inlines.c
@@ -0,0 +1,998 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <ctype.h>
+#include "bstrlib.h"
+#include "stmd.h"
+#include "uthash.h"
+#include "debug.h"
+#include "scanners.h"
+#include "utf8.h"
+
+extern void free_reference(reference *ref) {
+ bdestroy(ref->label);
+ bdestroy(ref->url);
+ bdestroy(ref->title);
+ free(ref);
+}
+
+extern void free_reference_map(reference **refmap) {
+ /* free the hash table contents */
+ reference *s;
+ reference *tmp;
+ if (refmap != NULL) {
+ HASH_ITER(hh, *refmap, s, tmp) {
+ HASH_DEL(*refmap, s);
+ free_reference(s);
+ }
+ free(refmap);
+ }
+}
+
+// normalize reference: collapse internal whitespace to single space,
+// remove leading/trailing whitespace, case fold
+static bstring normalize_reference(bstring s)
+{
+ bstring normalized = case_fold(s);
+ int pos = 0;
+ int startpos;
+ char c;
+ while ((c = bchar(normalized, pos))) {
+ if (isspace(c)) {
+ startpos = pos;
+ // skip til next non-space
+ pos++;
+ while (isspace(bchar(s, pos))) {
+ pos++;
+ }
+ bdelete(normalized, startpos, pos - startpos);
+ binsertch(normalized, startpos, 1, ' ');
+ pos = startpos + 1;
+ }
+ pos++;
+ }
+ btrimws(normalized);
+ return normalized;
+}
+
+// Returns reference if refmap contains a reference with matching
+// label, otherwise NULL.
+extern reference* lookup_reference(reference** refmap, bstring lab)
+{
+ reference * ref = NULL;
+ bstring label = normalize_reference(lab);
+ if (refmap != NULL) {
+ HASH_FIND_STR(*refmap, (char*) label->data, ref);
+ }
+ bdestroy(label);
+ return ref;
+}
+
+extern reference* make_reference(bstring label, bstring url, bstring title)
+{
+ reference * ref;
+ ref = malloc(sizeof(reference));
+ ref->label = normalize_reference(label);
+ ref->url = bstrcpy(url);
+ ref->title = bstrcpy(title);
+ return ref;
+}
+
+extern void add_reference(reference** refmap, reference* ref)
+{
+ reference * t = NULL;
+ HASH_FIND(hh, *refmap, (char*) ref->label->data,
+ (unsigned) blength(ref->label), t);
+ if (t == NULL) {
+ HASH_ADD_KEYPTR(hh, *refmap, (char*) ref->label->data,
+ (unsigned) blength(ref->label), ref);
+ } else {
+ free_reference(ref); // we free this now since it won't be in the refmap
+ }
+}
+
+// Create an inline with a linkable string value.
+inline static inl* make_linkable(int t, inl* label, bstring url, bstring title)
+{
+ inl* e = (inl*) malloc(sizeof(inl));
+ e->tag = t;
+ e->content.linkable.label = label;
+ e->content.linkable.url = url;
+ e->content.linkable.title = title;
+ e->next = NULL;
+ return e;
+}
+
+inline static inl* make_inlines(int t, inl* contents)
+{
+ inl* e = (inl*) malloc(sizeof(inl));
+ e->tag = t;
+ e->content.inlines = contents;
+ e->next = NULL;
+ return e;
+}
+
+// Create an inline with a literal string value.
+inline static inl* make_literal(int t, bstring s)
+{
+ inl* e = (inl*) malloc(sizeof(inl));
+ e->tag = t;
+ e->content.literal = s;
+ e->next = NULL;
+ return e;
+}
+
+// Create an inline with no value.
+inline static inl* make_simple(int t)
+{
+ inl* e = (inl*) malloc(sizeof(inl));
+ e->tag = t;
+ e->next = NULL;
+ return e;
+}
+
+// Macros for creating various kinds of inlines.
+#define make_str(s) make_literal(str, s)
+#define make_code(s) make_literal(code, s)
+#define make_raw_html(s) make_literal(raw_html, s)
+#define make_entity(s) make_literal(entity, s)
+#define make_linebreak() make_simple(linebreak)
+#define make_softbreak() make_simple(softbreak)
+#define make_link(label, url, title) make_linkable(link, label, url, title)
+#define make_image(alt, url, title) make_linkable(image, alt, url, title)
+#define make_emph(contents) make_inlines(emph, contents)
+#define make_strong(contents) make_inlines(strong, contents)
+
+// Free an inline list.
+extern void free_inlines(inl* e)
+{
+ inl * next;
+ while (e != NULL) {
+ switch (e->tag){
+ case str:
+ case raw_html:
+ case code:
+ case entity:
+ bdestroy(e->content.literal);
+ break;
+ case linebreak:
+ case softbreak:
+ break;
+ case link:
+ case image:
+ bdestroy(e->content.linkable.url);
+ bdestroy(e->content.linkable.title);
+ free_inlines(e->content.linkable.label);
+ break;
+ case emph:
+ case strong:
+ free_inlines(e->content.inlines);
+ break;
+ default:
+ break;
+ }
+ next = e->next;
+ free(e);
+ e = next;
+ }
+}
+
+// Append inline list b to the end of inline list a.
+// Return pointer to head of new list.
+inline static inl* append_inlines(inl* a, inl* b)
+{
+ if (a == NULL) { // NULL acts like an empty list
+ return b;
+ }
+ inl* cur = a;
+ while (cur->next) {
+ cur = cur->next;
+ }
+ cur->next = b;
+ return a;
+}
+
+// Make a 'subject' from an input string.
+static subject* make_subject(bstring s, reference** refmap)
+{
+ subject* e = (subject*) malloc(sizeof(subject));
+ // remove final whitespace
+ brtrimws(s);
+ e->buffer = s;
+ e->pos = 0;
+ e->label_nestlevel = 0;
+ e->reference_map = refmap;
+ return e;
+}
+
+inline static int isbacktick(int c)
+{
+ return (c == '`');
+}
+
+// Return the next character in the subject, without advancing.
+// Return 0 if at the end of the subject.
+#define peek_char(subj) bchar(subj->buffer, subj->pos)
+
+// Return true if there are more characters in the subject.
+inline static int is_eof(subject* subj)
+{
+ return (subj->pos >= blength(subj->buffer));
+}
+
+// Advance the subject. Doesn't check for eof.
+#define advance(subj) subj->pos += 1
+
+// Take characters while a predicate holds, and return a string.
+inline static bstring take_while(subject* subj, int (*f)(int))
+{
+ unsigned char c;
+ int startpos = subj->pos;
+ int len = 0;
+ while ((c = peek_char(subj)) && (*f)(c)) {
+ advance(subj);
+ len++;
+ }
+ return bmidstr(subj->buffer, startpos, len);
+}
+
+// Take one character and return a string, or NULL if eof.
+inline static bstring take_one(subject* subj)
+{
+ int startpos = subj->pos;
+ if (is_eof(subj)){
+ return NULL;
+ } else {
+ advance(subj);
+ return bmidstr(subj->buffer, startpos, 1);
+ }
+}
+
+// Try to process a backtick code span that began with a
+// span of ticks of length openticklength length (already
+// parsed). Return 0 if you don't find matching closing
+// backticks, otherwise return the position in the subject
+// after the closing backticks.
+static int scan_to_closing_backticks(subject* subj, int openticklength)
+{
+ // read non backticks
+ char c;
+ while ((c = peek_char(subj)) && c != '`') {
+ advance(subj);
+ }
+ if (is_eof(subj)) {
+ return 0; // did not find closing ticks, return 0
+ }
+ int numticks = 0;
+ while (peek_char(subj) == '`') {
+ advance(subj);
+ numticks++;
+ }
+ if (numticks != openticklength){
+ return(scan_to_closing_backticks(subj, openticklength));
+ }
+ return (subj->pos);
+}
+
+// Destructively modify bstring, collapsing consecutive
+// space and newline characters into a single space.
+static int normalize_whitespace(bstring s)
+{
+ bool last_char_was_space = false;
+ int pos = 0;
+ char c;
+ while ((c = bchar(s, pos))) {
+ switch (c) {
+ case ' ':
+ if (last_char_was_space) {
+ bdelete(s, pos, 1);
+ } else {
+ pos++;
+ }
+ last_char_was_space = true;
+ break;
+ case '\n':
+ if (last_char_was_space) {
+ bdelete(s, pos, 1);
+ } else {
+ bdelete(s, pos, 1);
+ binsertch(s, pos, 1, ' ');
+ pos++;
+ }
+ last_char_was_space = true;
+ break;
+ default:
+ pos++;
+ last_char_was_space = false;
+ }
+ }
+ return 0;
+}
+
+// Parse backtick code section or raw backticks, return an inline.
+// Assumes that the subject has a backtick at the current position.
+static inl* handle_backticks(subject *subj)
+{
+ bstring openticks = take_while(subj, isbacktick);
+ bstring result;
+ int ticklength = blength(openticks);
+ int startpos = subj->pos;
+ int endpos = scan_to_closing_backticks(subj, ticklength);
+ if (endpos == 0) { // not found
+ subj->pos = startpos; // rewind
+ return make_str(openticks);
+ } else {
+ bdestroy(openticks);
+ result = bmidstr(subj->buffer, startpos, endpos - startpos - ticklength);
+ btrimws(result);
+ normalize_whitespace(result);
+ return make_code(result);
+ }
+}
+
+// Scan ***, **, or * and return number scanned, or 0.
+// Don't advance position.
+static int scan_delims(subject* subj, char c, bool * can_open, bool * can_close)
+{
+ int numdelims = 0;
+ char char_before, char_after;
+ int startpos = subj->pos;
+
+ char_before = subj->pos == 0 ? '\n' : bchar(subj->buffer, subj->pos - 1);
+ while (peek_char(subj) == c) {
+ numdelims++;
+ advance(subj);
+ }
+ char_after = peek_char(subj);
+ *can_open = numdelims > 0 && numdelims <= 3 && !isspace(char_after);
+ *can_close = numdelims > 0 && numdelims <= 3 && !isspace(char_before);
+ if (c == '_') {
+ *can_open = *can_open && !isalnum(char_before);
+ *can_close = *can_close && !isalnum(char_after);
+ }
+ subj->pos = startpos;
+ return numdelims;
+}
+
+// Parse strong/emph or a fallback.
+// Assumes the subject has '_' or '*' at the current position.
+static inl* handle_strong_emph(subject* subj, char c)
+{
+ bool can_open, can_close;
+ inl * result = NULL;
+ inl ** last = malloc(sizeof(inl *));
+ inl * new;
+ inl * il;
+ inl * first_head = NULL;
+ inl * first_close = NULL;
+ int first_close_delims = 0;
+ int numdelims;
+
+ *last = NULL;
+
+ numdelims = scan_delims(subj, c, &can_open, &can_close);
+ subj->pos += numdelims;
+
+ new = make_str(bmidstr(subj->buffer, subj->pos - numdelims, numdelims));
+ *last = new;
+ first_head = new;
+ result = new;
+
+ if (!can_open || numdelims == 0) {
+ goto done;
+ }
+
+ switch (numdelims) {
+ case 1:
+ while (true) {
+ numdelims = scan_delims(subj, c, &can_open, &can_close);
+ if (numdelims >= 1 && can_close) {
+ subj->pos += 1;
+ first_head->tag = emph;
+ bdestroy(first_head->content.literal);
+ first_head->content.inlines = first_head->next;
+ first_head->next = NULL;
+ goto done;
+ } else {
+ if (!parse_inline(subj, last)) {
+ goto done;
+ }
+ }
+ }
+ break;
+ case 2:
+ while (true) {
+ numdelims = scan_delims(subj, c, &can_open, &can_close);
+ if (numdelims >= 2 && can_close) {
+ subj->pos += 2;
+ first_head->tag = strong;
+ bdestroy(first_head->content.literal);
+ first_head->content.inlines = first_head->next;
+ first_head->next = NULL;
+ goto done;
+ } else {
+ if (!parse_inline(subj, last)) {
+ goto done;
+ }
+ }
+ }
+ break;
+ case 3:
+ while (true) {
+ numdelims = scan_delims(subj, c, &can_open, &can_close);
+ if (can_close && numdelims >= 1 && numdelims <= 3 &&
+ numdelims != first_close_delims) {
+ new = make_str(bmidstr(subj->buffer, subj->pos, numdelims));
+ append_inlines(*last, new);
+ *last = new;
+ if (numdelims == 3) {
+ numdelims = 1;
+ }
+ subj->pos += numdelims;
+ if (first_close) {
+ first_head->tag = first_close_delims == 1 ? strong : emph;
+ bdestroy(first_head->content.literal);
+ first_head->content.inlines =
+ make_inlines(first_close_delims == 1 ? emph : strong,
+ first_head->next);
+
+ il = first_head->next;
+ while (il->next && il->next != first_close) {
+ il = il->next;
+ }
+ il->next = NULL;
+
+ first_head->content.inlines->next = first_close->next;
+
+ il = first_head->content.inlines;
+ while (il->next && il->next != *last) {
+ il = il->next;
+ }
+ il->next = NULL;
+ free_inlines(*last);
+
+ first_close->next = NULL;
+ free_inlines(first_close);
+ first_head->next = NULL;
+ goto done;
+ } else {
+ first_close = *last;
+ first_close_delims = numdelims;
+ }
+ } else {
+ if (!parse_inline(subj, last)) {
+ goto done;
+ }
+ }
+ }
+ break;
+ default:
+ goto done;
+ }
+
+ done:
+ free(last);
+ return result;
+}
+
+// Parse backslash-escape or just a backslash, returning an inline.
+static inl* handle_backslash(subject *subj)
+{
+ advance(subj);
+ unsigned char nextchar = peek_char(subj);
+ if (ispunct(nextchar)) { // only ascii symbols and newline can be escaped
+ advance(subj);
+ return make_str(bformat("%c", nextchar));
+ } else if (nextchar == '\n') {
+ advance(subj);
+ return make_linebreak();
+ } else {
+ return make_str(bfromcstr("\\"));
+ }
+}
+
+// Parse an entity or a regular "&" string.
+// Assumes the subject has an '&' character at the current position.
+static inl* handle_entity(subject* subj)
+{
+ int match;
+ inl * result;
+ match = scan_entity(subj->buffer, subj->pos);
+ if (match) {
+ result = make_entity(bmidstr(subj->buffer, subj->pos, match));
+ subj->pos += match;
+ } else {
+ advance(subj);
+ result = make_str(bfromcstr("&"));
+ }
+ return result;
+}
+
+// Like make_str, but parses entities.
+// Returns an inline sequence consisting of str and entity elements.
+static inl * make_str_with_entities(bstring s)
+{
+ inl * result = NULL;
+ inl * new;
+ int searchpos;
+ char c;
+ subject * subj = make_subject(s, NULL);
+
+ while ((c = peek_char(subj))) {
+ switch (c) {
+ case '&':
+ new = handle_entity(subj);
+ break;
+ default:
+ searchpos = bstrchrp(subj->buffer, '&', subj->pos);
+ if (searchpos == BSTR_ERR) {
+ searchpos = blength(subj->buffer);
+ }
+ new = make_str(bmidstr(subj->buffer, subj->pos, searchpos - subj->pos));
+ subj->pos = searchpos;
+ }
+ result = append_inlines(result, new);
+ }
+ free(subj);
+ return result;
+}
+
+// Destructively unescape a string: remove backslashes before punctuation chars.
+extern int unescape(bstring url)
+{
+ // remove backslashes before punctuation chars:
+ int searchpos = 0;
+ while ((searchpos = bstrchrp(url, '\\', searchpos)) != BSTR_ERR) {
+ if (ispunct(bchar(url, searchpos + 1))) {
+ bdelete(url, searchpos, 1);
+ } else {
+ searchpos++;
+ }
+ }
+ return 0;
+}
+
+// Clean a URL: remove surrounding whitespace and surrounding <>,
+// and remove \ that escape punctuation.
+static int clean_url(bstring url)
+{
+ // remove surrounding <> if any:
+ int urllength = blength(url);
+ btrimws(url);
+ if (bchar(url, 0) == '<' && bchar(url, urllength - 1) == '>') {
+ bdelete(url, 0, 1);
+ bdelete(url, urllength - 2, 1);
+ }
+ unescape(url);
+ return 0;
+}
+
+// Clean a title: remove surrounding quotes and remove \ that escape punctuation.
+static int clean_title(bstring title)
+{
+ // remove surrounding quotes if any:
+ int titlelength = blength(title);
+ if ((bchar(title, 0) == '\'' && bchar(title, titlelength - 1) == '\'') ||
+ (bchar(title, 0) == '(' && bchar(title, titlelength - 1) == ')') ||
+ (bchar(title, 0) == '"' && bchar(title, titlelength - 1) == '"')) {
+ bdelete(title, 0, 1);
+ bdelete(title, titlelength - 2, 1);
+ }
+ unescape(title);
+ return 0;
+}
+
+// Parse an autolink or HTML tag.
+// Assumes the subject has a '<' character at the current position.
+static inl* handle_pointy_brace(subject* subj)
+{
+ int matchlen = 0;
+ bstring contents;
+ inl* result;
+
+ advance(subj); // advance past first <
+ // first try to match a URL autolink
+ matchlen = scan_autolink_uri(subj->buffer, subj->pos);
+ if (matchlen > 0) {
+ contents = bmidstr(subj->buffer, subj->pos, matchlen - 1);
+ subj->pos += matchlen;
+ result = make_link(make_str_with_entities(contents),
+ bstrcpy(contents), bfromcstr(""));
+ bdestroy(contents);
+ return result;
+ }
+ // next try to match an email autolink
+ matchlen = scan_autolink_email(subj->buffer, subj->pos);
+ if (matchlen > 0) {
+ contents = bmidstr(subj->buffer, subj->pos, matchlen - 1);
+ subj->pos += matchlen;
+ result = make_link(make_str_with_entities(contents),
+ bformat("mailto:%s", contents->data),
+ bfromcstr(""));
+ bdestroy(contents);
+ return result;
+ }
+ // finally, try to match an html tag
+ matchlen = scan_html_tag(subj->buffer, subj->pos);
+ if (matchlen > 0) {
+ contents = bmidstr(subj->buffer, subj->pos, matchlen);
+ binsertch(contents, 0, 1, '<');
+ subj->pos += matchlen;
+ return make_raw_html(contents);
+ } else {// if nothing matches, just return the opening <:
+ return make_str(bfromcstr("<"));
+ }
+}
+
+// Parse a link label. Returns 1 if successful.
+// Unless raw_label is null, it is set to point to the raw contents of the [].
+// Assumes the subject has a '[' character at the current position.
+// Returns 0 and does not advance if no matching ] is found.
+// Note the precedence: code backticks have precedence over label bracket
+// markers, which have precedence over *, _, and other inline formatting
+// markers. So, 2 below contains a link while 1 does not:
+// 1. [a link `with a ](/url)` character
+// 2. [a link *with emphasized ](/url) text*
+static int link_label(subject* subj, bstring* raw_label)
+{
+ int nestlevel = 0;
+ inl* tmp = NULL;
+ bstring raw;
+ int startpos = subj->pos;
+ if (subj->label_nestlevel) {
+ // if we've already checked to the end of the subject
+ // for a label, even with a different starting [, we
+ // know we won't find one here and we can just return.
+ // Note: nestlevel 1 would be: [foo [bar]
+ // nestlevel 2 would be: [foo [bar [baz]
+ subj->label_nestlevel--;
+ return 0;
+ }
+ advance(subj); // advance past [
+ char c;
+ while ((c = peek_char(subj)) && (c != ']' || nestlevel > 0)) {
+ switch (c) {
+ case '`':
+ tmp = handle_backticks(subj);
+ free_inlines(tmp);
+ break;
+ case '<':
+ tmp = handle_pointy_brace(subj);
+ free_inlines(tmp);
+ break;
+ case '[': // nested []
+ nestlevel++;
+ advance(subj);
+ break;
+ case ']': // nested []
+ nestlevel--;
+ advance(subj);
+ break;
+ case '\\':
+ advance(subj);
+ if (ispunct(peek_char(subj))) {
+ advance(subj);
+ }
+ break;
+ default:
+ advance(subj);
+ }
+ }
+ if (c == ']') {
+ if (raw_label != NULL) {
+ raw = bmidstr(subj->buffer, startpos + 1, subj->pos - (startpos + 1));
+ *raw_label = raw;
+ }
+ subj->label_nestlevel = 0;
+ advance(subj); // advance past ]
+ return 1;
+ } else {
+ if (c == 0) {
+ subj->label_nestlevel = nestlevel;
+ }
+ subj->pos = startpos; // rewind
+ return 0;
+ }
+}
+
+// Parse a link or the link portion of an image, or return a fallback.
+static inl* handle_left_bracket(subject* subj)
+{
+ inl* lab = NULL;
+ inl* result = NULL;
+ reference* ref;
+ int n;
+ int sps;
+ int found_label;
+ int endlabel, starturl, endurl, starttitle, endtitle, endall;
+ bstring url, title, rawlabel, reflabel;
+ bstring rawlabel2 = NULL;
+ found_label = link_label(subj, &rawlabel);
+ endlabel = subj->pos;
+ if (found_label) {
+ if (peek_char(subj) == '(' &&
+ ((sps = scan_spacechars(subj->buffer, subj->pos + 1)) > -1) &&
+ ((n = scan_link_url(subj->buffer, subj->pos + 1 + sps)) > -1)) {
+ // try to parse an explicit link:
+ starturl = subj->pos + 1 + sps; // after (
+ endurl = starturl + n;
+ starttitle = endurl + scan_spacechars(subj->buffer, endurl);
+ // ensure there are spaces btw url and title
+ endtitle = (starttitle == endurl) ? starttitle :
+ starttitle + scan_link_title(subj->buffer, starttitle);
+ endall = endtitle + scan_spacechars(subj->buffer, endtitle);
+ if (bchar(subj->buffer, endall) == ')') {
+ subj->pos = endall + 1;
+ url = bmidstr(subj->buffer, starturl, endurl - starturl);
+ clean_url(url);
+ title = bmidstr(subj->buffer, starttitle, endtitle - starttitle);
+ clean_title(title);
+ lab = parse_inlines(rawlabel, NULL);
+ bdestroy(rawlabel);
+ return make_link(lab, url, title);
+ } else {
+ // if we get here, we matched a label but didn't get further:
+ subj->pos = endlabel;
+ lab = parse_inlines(rawlabel, subj->reference_map);
+ bdestroy(rawlabel);
+ result = append_inlines(make_str(bfromcstr("[")),
+ append_inlines(lab,
+ make_str(bfromcstr("]"))));
+ return result;
+ }
+ } else {
+ // Check for reference link.
+ // First, see if there's another label:
+ subj->pos = subj->pos + scan_spacechars(subj->buffer, endlabel);
+ reflabel = rawlabel;
+ // if followed by a nonempty link label, we change reflabel to it:
+ if (peek_char(subj) == '[' &&
+ link_label(subj, &rawlabel2)) {
+ if (blength(rawlabel2) > 0) {
+ reflabel = rawlabel2;
+ }
+ } else {
+ subj->pos = endlabel;
+ }
+ // lookup rawlabel in subject->reference_map:
+ ref = lookup_reference(subj->reference_map, reflabel);
+ if (ref != NULL) { // found
+ lab = parse_inlines(rawlabel, NULL);
+ result = make_link(lab, bstrcpy(ref->url), bstrcpy(ref->title));
+ } else {
+ subj->pos = endlabel;
+ lab = parse_inlines(rawlabel, subj->reference_map);
+ result = append_inlines(make_str(bfromcstr("[")),
+ append_inlines(lab, make_str(bfromcstr("]"))));
+ }
+ bdestroy(rawlabel);
+ bdestroy(rawlabel2);
+ return result;
+ }
+ }
+ // If we fall through to here, it means we didn't match a link:
+ advance(subj); // advance past [
+ return make_str(bfromcstr("["));
+}
+
+// Parse a hard or soft linebreak, returning an inline.
+// Assumes the subject has a newline at the current position.
+static inl* handle_newline(subject *subj)
+{
+ int nlpos = subj->pos;
+ // skip over newline
+ advance(subj);
+ // skip spaces at beginning of line
+ while (peek_char(subj) == ' ') {
+ advance(subj);
+ }
+ if (nlpos > 1 &&
+ bchar(subj->buffer, nlpos - 1) == ' ' &&
+ bchar(subj->buffer, nlpos - 2) == ' ') {
+ return make_linebreak();
+ } else {
+ return make_softbreak();
+ }
+}
+
+inline static int not_eof(subject* subj)
+{
+ return !is_eof(subj);
+}
+
+// Parse inlines while a predicate is satisfied. Return inlines.
+extern inl* parse_inlines_while(subject* subj, int (*f)(subject*))
+{
+ inl* result = NULL;
+ inl** last = &result;
+ while ((*f)(subj) && parse_inline(subj, last)) {
+ }
+ return result;
+}
+
+// Parse an inline, advancing subject, and add it to last element.
+// Adjust tail to point to new last element of list.
+// Return 0 if no inline can be parsed, 1 otherwise.
+extern int parse_inline(subject* subj, inl ** last)
+{
+ inl* new = NULL;
+ bstring contents;
+ bstring special_chars;
+ unsigned char c;
+ int endpos;
+ c = peek_char(subj);
+ if (c == 0) {
+ return 0;
+ }
+ switch(c){
+ case '\n':
+ new = handle_newline(subj);
+ break;
+ case '`':
+ new = handle_backticks(subj);
+ break;
+ case '\\':
+ new = handle_backslash(subj);
+ break;
+ case '&':
+ new = handle_entity(subj);
+ break;
+ case '<':
+ new = handle_pointy_brace(subj);
+ break;
+ case '_':
+ if (subj->pos > 0 && (isalnum(bchar(subj->buffer, subj->pos - 1)) ||
+ bchar(subj->buffer, subj->pos - 1) == '_')) {
+ new = make_str(take_one(subj));
+ } else {
+ new = handle_strong_emph(subj, '_');
+ }
+ break;
+ case '*':
+ new = handle_strong_emph(subj, '*');
+ break;
+ case '[':
+ new = handle_left_bracket(subj);
+ break;
+ case '!':
+ advance(subj);
+ if (peek_char(subj) == '[') {
+ new = handle_left_bracket(subj);
+ if (new != NULL && new->tag == link) {
+ new->tag = image;
+ } else {
+ new = append_inlines(make_str(bfromcstr("!")), new);
+ }
+ } else {
+ new = make_str(bfromcstr("!"));
+ }
+ break;
+ default:
+ // we read until we hit a special character
+ special_chars = bfromcstr("\n\\`&_*[]<!");
+ endpos = binchr(subj->buffer, subj->pos, special_chars);
+ bdestroy(special_chars);
+ if (endpos == subj->pos) {
+ // current char is special: read a 1-character str
+ contents = take_one(subj);
+ } else if (endpos == BSTR_ERR) {
+ // special char not found, take whole rest of buffer:
+ endpos = subj->buffer->slen;
+ contents = bmidstr(subj->buffer, subj->pos, endpos - subj->pos);
+ subj->pos = endpos;
+ } else {
+ // take buffer from subj->pos to endpos to str.
+ contents = bmidstr(subj->buffer, subj->pos, endpos - subj->pos);
+ subj->pos = endpos;
+ // if we're at a newline, strip trailing spaces.
+ if (peek_char(subj) == '\n') {
+ brtrimws(contents);
+ }
+ }
+ new = make_str(contents);
+ }
+ if (*last == NULL) {
+ *last = new;
+ } else {
+ append_inlines(*last, new);
+ }
+ return 1;
+}
+
+extern inl* parse_inlines(bstring input, reference** refmap)
+{
+ subject * subj = make_subject(input, refmap);
+ inl * result = parse_inlines_while(subj, not_eof);
+ free(subj);
+ return result;
+}
+
+// Parse zero or more space characters, including at most one newline.
+void spnl(subject* subj)
+{
+ bool seen_newline = false;
+ while (peek_char(subj) == ' ' ||
+ (!seen_newline &&
+ (seen_newline = peek_char(subj) == '\n'))) {
+ advance(subj);
+ }
+}
+
+// Parse reference. Assumes string begins with '[' character.
+// Modify refmap if a reference is encountered.
+// Return 0 if no reference found, otherwise position of subject
+// after reference is parsed.
+extern int parse_reference(bstring input, reference** refmap)
+{
+ subject * subj = make_subject(input, NULL);
+ bstring lab = NULL;
+ bstring url = NULL;
+ bstring title = NULL;
+ int matchlen = 0;
+ int beforetitle;
+ reference * new = NULL;
+ int newpos;
+
+ // parse label:
+ if (!link_label(subj, &lab)) {
+ free(subj);
+ return 0;
+ }
+ // colon:
+ if (peek_char(subj) == ':') {
+ advance(subj);
+ } else {
+ free(subj);
+ bdestroy(lab);
+ return 0;
+ }
+ // parse link url:
+ spnl(subj);
+ matchlen = scan_link_url(subj->buffer, subj->pos);
+ if (matchlen) {
+ url = bmidstr(subj->buffer, subj->pos, matchlen);
+ clean_url(url);
+ subj->pos += matchlen;
+ } else {
+ free(subj);
+ bdestroy(lab);
+ bdestroy(url);
+ return 0;
+ }
+ // parse optional link_title
+ beforetitle = subj->pos;
+ spnl(subj);
+ matchlen = scan_link_title(subj->buffer, subj->pos);
+ if (matchlen) {
+ title = bmidstr(subj->buffer, subj->pos, matchlen);
+ clean_title(title);
+ subj->pos += matchlen;
+ } else {
+ subj->pos = beforetitle;
+ title = bfromcstr("");
+ }
+ // parse final spaces and newline:
+ while (peek_char(subj) == ' ') {
+ advance(subj);
+ }
+ if (peek_char(subj) == '\n') {
+ advance(subj);
+ } else if (peek_char(subj) != 0) {
+ free(subj);
+ bdestroy(lab);
+ bdestroy(url);
+ bdestroy(title);
+ return 0;
+ }
+ // insert reference into refmap
+ new = make_reference(lab, url, title);
+ add_reference(refmap, new);
+
+ newpos = subj->pos;
+ free(subj);
+ bdestroy(lab);
+ bdestroy(url);
+ bdestroy(title);
+ return newpos;
+}
+
diff --git a/src/main.c b/src/main.c
new file mode 100644
index 0000000..40a63bc
--- /dev/null
+++ b/src/main.c
@@ -0,0 +1,102 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include "bstrlib.h"
+#include "stmd.h"
+#include "debug.h"
+
+void print_usage()
+{
+ printf("Usage: stmd [FILE*]\n");
+ printf("Options: --help, -h Print usage information\n");
+ printf(" --ast Print AST instead of HTML\n");
+ printf(" --version Print version\n");
+}
+
+int main(int argc, char *argv[]) {
+ int i;
+ bool ast = false;
+ int g = 0;
+ int numfps = 0;
+ int files[argc];
+
+ for (i=1; i < argc; i++) {
+ if (strcmp(argv[i], "--version") == 0) {
+ printf("stmd %s", VERSION);
+ printf(" - standard markdown converter (c) 2014 John MacFarlane\n");
+ exit(0);
+ } else if ((strcmp(argv[i], "--help") == 0) ||
+ (strcmp(argv[i], "-h") == 0)) {
+ print_usage();
+ exit(0);
+ } else if (strcmp(argv[i], "--ast") == 0) {
+ ast = true;
+ } else if (*argv[i] == '-') {
+ print_usage();
+ exit(1);
+ } else { // treat as file argument
+ files[g] = i;
+ g++;
+ }
+ }
+
+ numfps = g;
+ bstring s = NULL;
+ bstring html;
+ g = 0;
+ block * cur = make_document();
+ int linenum = 1;
+ extern int errno;
+ FILE * fp = NULL;
+
+ if (numfps == 0) {
+ // read from stdin
+ while ((s = bgets((bNgetc) fgetc, stdin, '\n'))) {
+ check(incorporate_line(s, linenum, &cur) == 0,
+ "error incorporating line %d", linenum);
+ bdestroy(s);
+ linenum++;
+ }
+ } else {
+ // iterate over input file pointers
+ for (g=0; g < numfps; g++) {
+
+ fp = fopen(argv[files[g]], "r");
+ if (fp == NULL) {
+ fprintf(stderr, "Error opening file %s: %s\n",
+ argv[files[g]], strerror(errno));
+ exit(1);
+ }
+
+ struct bStream *stream = bsopen((bNread)fread, fp);
+ if (stream == NULL) {
+ printf("Error opening stream\n");
+ }
+ while (bsreadln(s, stream, '\n') != BSTR_ERR) {
+ check(incorporate_line(s, linenum, &cur) == 0,
+ "error incorporating line %d of %s", linenum, argv[files[g]]);
+ linenum++;
+ }
+ bsclose(stream);
+ }
+ }
+
+ while (cur != cur->top) {
+ finalize(cur, linenum);
+ cur = cur->parent;
+ }
+ check(cur == cur->top, "problems finalizing open containers");
+ finalize(cur, linenum);
+ process_inlines(cur, cur->attributes.refmap);
+ if (ast) {
+ print_blocks(cur, 0);
+ } else {
+ check(blocks_to_html(cur, &html, false) == 0, "could not format as HTML");
+ printf("%s", html->data);
+ bdestroy(html);
+ }
+ free_blocks(cur);
+ return 0;
+error:
+ return -1;
+}
+
diff --git a/src/print.c b/src/print.c
new file mode 100644
index 0000000..a924870
--- /dev/null
+++ b/src/print.c
@@ -0,0 +1,168 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include "bstrlib.h"
+#include "stmd.h"
+#include "debug.h"
+
+static bstring format_str(bstring s)
+{
+ int pos = 0;
+ int len = blength(s);
+ bstring result = bfromcstr("");
+ char c;
+ bformata(result, "\"");
+ while (pos < len) {
+ c = bchar(s, pos);
+ switch (c) {
+ case '\n':
+ bformata(result, "\\n");
+ break;
+ case '"':
+ bformata(result, "\\\"");
+ break;
+ case '\\':
+ bformata(result, "\\\\");
+ break;
+ default:
+ bformata(result, "%c", c);
+ }
+ pos++;
+ }
+ bformata(result, "\"");
+ return result;
+}
+
+// Functions to pretty-print inline and block lists, for debugging.
+// Prettyprint an inline list, for debugging.
+extern void print_blocks(block* b, int indent)
+{
+ struct ListData * data;
+ while(b != NULL) {
+ // printf("%3d %3d %3d| ", b->start_line, b->start_column, b->end_line);
+ for (int i=0; i < indent; i++) {
+ putchar(' ');
+ }
+ switch(b->tag) {
+ case document:
+ printf("document\n");
+ print_blocks(b->children, indent + 2);
+ break;
+ case block_quote:
+ printf("block_quote\n");
+ print_blocks(b->children, indent + 2);
+ break;
+ case list_item:
+ data = &(b->attributes.list_data);
+ printf("list_item\n");
+ print_blocks(b->children, indent + 2);
+ break;
+ case list:
+ data = &(b->attributes.list_data);
+ if (data->list_type == ordered) {
+ printf("list (type=ordered tight=%s start=%d delim=%s)\n",
+ (data->tight ? "true" : "false"),
+ data->start,
+ (data->delimiter == parens ? "parens" : "period"));
+ } else {
+ printf("list (type=bullet tight=%s bullet_char=%c)\n",
+ (data->tight ? "true" : "false"),
+ data->bullet_char);
+ }
+ print_blocks(b->children, indent + 2);
+ break;
+ case atx_header:
+ printf("atx_header (level=%d)\n", b->attributes.header_level);
+ print_inlines(b->inline_content, indent + 2);
+ break;
+ case setext_header:
+ printf("setext_header (level=%d)\n", b->attributes.header_level);
+ print_inlines(b->inline_content, indent + 2);
+ break;
+ case paragraph:
+ printf("paragraph\n");
+ print_inlines(b->inline_content, indent + 2);
+ break;
+ case hrule:
+ printf("hrule\n");
+ break;
+ case indented_code:
+ printf("indented_code %s\n", format_str(b->string_content)->data);
+ break;
+ case fenced_code:
+ printf("fenced_code length=%d info=%s %s\n",
+ b->attributes.fenced_code_data.fence_length,
+ format_str(b->attributes.fenced_code_data.info)->data,
+ format_str(b->string_content)->data);
+ break;
+ case html_block:
+ printf("html_block %s\n", format_str(b->string_content)->data);
+ break;
+ case reference_def:
+ printf("reference_def\n");
+ break;
+ default:
+ log_warn("block type %d not implemented\n", b->tag);
+ break;
+ }
+ b = b->next;
+ }
+}
+
+// Prettyprint an inline list, for debugging.
+extern void print_inlines(inl* ils, int indent)
+{
+ while(ils != NULL) {
+ /*
+ // we add 11 extra spaces for the line/column info
+ for (int i=0; i < 11; i++) {
+ putchar(' ');
+ }
+ putchar('|');
+ putchar(' ');
+ */
+ for (int i=0; i < indent; i++) {
+ putchar(' ');
+ }
+ switch(ils->tag) {
+ case str:
+ printf("str %s\n", format_str(ils->content.literal)->data);
+ break;
+ case linebreak:
+ printf("linebreak\n");
+ break;
+ case softbreak:
+ printf("softbreak\n");
+ break;
+ case code:
+ printf("code %s\n", format_str(ils->content.literal)->data);
+ break;
+ case raw_html:
+ printf("html %s\n", format_str(ils->content.literal)->data);
+ break;
+ case entity:
+ printf("entity %s\n", format_str(ils->content.literal)->data);
+ break;
+ case link:
+ printf("link url=%s title=%s\n",
+ format_str(ils->content.linkable.url)->data,
+ format_str(ils->content.linkable.title)->data);
+ print_inlines(ils->content.linkable.label, indent + 2);
+ break;
+ case image:
+ printf("image url=%s title=%s\n",
+ format_str(ils->content.linkable.url)->data,
+ format_str(ils->content.linkable.title)->data);
+ print_inlines(ils->content.linkable.label, indent + 2);
+ break;
+ case strong:
+ printf("strong\n");
+ print_inlines(ils->content.linkable.label, indent + 2);
+ break;
+ case emph:
+ printf("emph\n");
+ print_inlines(ils->content.linkable.label, indent + 2);
+ break;
+ }
+ ils = ils->next;
+ }
+}
diff --git a/src/scanners.h b/src/scanners.h
new file mode 100644
index 0000000..71e0520
--- /dev/null
+++ b/src/scanners.h
@@ -0,0 +1,15 @@
+#include "bstrlib.h"
+
+int scan_autolink_uri(bstring s, int pos);
+int scan_autolink_email(bstring s, int pos);
+int scan_html_tag(bstring s, int pos);
+int scan_html_block_tag(bstring s, int pos);
+int scan_link_url(bstring s, int pos);
+int scan_link_title(bstring s, int pos);
+int scan_spacechars(bstring s, int pos);
+int scan_atx_header_start(bstring s, int pos);
+int scan_setext_header_line(bstring s, int pos);
+int scan_hrule(bstring s, int pos);
+int scan_open_code_fence(bstring s, int pos);
+int scan_close_code_fence(bstring s, int pos, int len);
+int scan_entity(bstring s, int pos);
diff --git a/src/scanners.re b/src/scanners.re
new file mode 100644
index 0000000..f90238d
--- /dev/null
+++ b/src/scanners.re
@@ -0,0 +1,238 @@
+#include "bstrlib.h"
+
+/*!re2c
+ re2c:define:YYCTYPE = "unsigned char";
+ re2c:define:YYCURSOR = p;
+ re2c:define:YYMARKER = marker;
+ re2c:define:YYCTXMARKER = marker;
+ re2c:yyfill:enable = 0;
+
+ wordchar = [^\x00-\x20];
+
+ spacechar = [ \t\n];
+
+ reg_char = [^\\()\x00-\x20];
+
+ escaped_char = [\\][!"#$%&'()*+,./:;<=>?@[\\\]^_`{|}~-];
+
+ tagname = [A-Za-z][A-Za-z0-9]*;
+
+ blocktagname = 'article'|'header'|'aside'|'hgroup'|'blockquote'|'hr'|'body'|'li'|'br'|'map'|'button'|'object'|'canvas'|'ol'|'caption'|'output'|'col'|'p'|'colgroup'|'pre'|'dd'|'progress'|'div'|'section'|'dl'|'table'|'td'|'dt'|'tbody'|'embed'|'textarea'|'fieldset'|'tfoot'|'figcaption'|'th'|'figure'|'thead'|'footer'|'footer'|'tr'|'form'|'ul'|'h1'|'h2'|'h3'|'h4'|'h5'|'h6'|'video'|'script'|'style';
+
+ attributename = [a-zA-Z_:][a-zA-Z0-9:._-]*;
+
+ unquotedvalue = [^\"'=<>`\x00]+;
+ singlequotedvalue = ['][^'\x00]*['];
+ doublequotedvalue = [\"][^\"\x00]*[\"];
+
+ attributevalue = unquotedvalue | singlequotedvalue | doublequotedvalue;
+
+ attributevaluespec = spacechar* [=] spacechar* attributevalue;
+
+ attribute = spacechar+ attributename attributevaluespec?;
+
+ opentag = tagname attribute* spacechar* [/]? [>];
+ closetag = [/] tagname spacechar* [>];
+
+ htmlcomment = "!--" ([^-\x00]+ | [-][^-\x00]+)* "-->";
+
+ processinginstruction = "?" ([^?>\x00]+ | [?][^>\x00])* "?>";
+
+ declaration = "!" [A-Z]+ spacechar+ [^>\x00]* ">";
+
+ cdata = "![CDATA[" ([^\]\x00]+ | "]" [^\]\x00] | "]]" [^>\x00])* "]]>";
+
+ htmltag = opentag | closetag | htmlcomment | processinginstruction |
+ declaration | cdata;
+
+ in_parens_nosp = [(] (reg_char|escaped_char)* [)];
+
+ in_double_quotes = ["] (escaped_char|[^"\x00])* ["];
+ in_single_quotes = ['] (escaped_char|[^'\x00])* ['];
+ in_parens = [(] (escaped_char|[^)\x00])* [)];
+
+ scheme = 'coap'|'doi'|'javascript'|'aaa'|'aaas'|'about'|'acap'|'cap'|'cid'|'crid'|'data'|'dav'|'dict'|'dns'|'file'|'ftp'|'geo'|'go'|'gopher'|'h323'|'http'|'https'|'iax'|'icap'|'im'|'imap'|'info'|'ipp'|'iris'|'iris.beep'|'iris.xpc'|'iris.xpcs'|'iris.lwz'|'ldap'|'mailto'|'mid'|'msrp'|'msrps'|'mtqp'|'mupdate'|'news'|'nfs'|'ni'|'nih'|'nntp'|'opaquelocktoken'|'pop'|'pres'|'rtsp'|'service'|'session'|'shttp'|'sieve'|'sip'|'sips'|'sms'|'snmp'|'soap.beep'|'soap.beeps'|'tag'|'tel'|'telnet'|'tftp'|'thismessage'|'tn3270'|'tip'|'tv'|'urn'|'vemmi'|'ws'|'wss'|'xcon'|'xcon-userid'|'xmlrpc.beep'|'xmlrpc.beeps'|'xmpp'|'z39.50r'|'z39.50s'|'adiumxtra'|'afp'|'afs'|'aim'|'apt'|'attachment'|'aw'|'beshare'|'bitcoin'|'bolo'|'callto'|'chrome'|'chrome-extension'|'com-eventbrite-attendee'|'content'|'cvs'|'dlna-playsingle'|'dlna-playcontainer'|'dtn'|'dvb'|'ed2k'|'facetime'|'feed'|'finger'|'fish'|'gg'|'git'|'gizmoproject'|'gtalk'|'hcp'|'icon'|'ipn'|'irc'|'irc6'|'ircs'|'itms'|'jar'|'jms'|'keyparc'|'lastfm'|'ldaps'|'magnet'|'maps'|'market'|'message'|'mms'|'ms-help'|'msnim'|'mumble'|'mvn'|'notes'|'oid'|'palm'|'paparazzi'|'platform'|'proxy'|'psyc'|'query'|'res'|'resource'|'rmi'|'rsync'|'rtmp'|'secondlife'|'sftp'|'sgn'|'skype'|'smb'|'soldat'|'spotify'|'ssh'|'steam'|'svn'|'teamspeak'|'things'|'udp'|'unreal'|'ut2004'|'ventrilo'|'view-source'|'webcal'|'wtai'|'wyciwyg'|'xfire'|'xri'|'ymsgr';
+*/
+
+// Try to match URI autolink after first <, returning number of chars matched.
+extern int scan_autolink_uri(bstring s, int pos)
+{
+ unsigned char * marker = NULL;
+ unsigned char * p = &(s->data[pos]);
+ unsigned char * start = p;
+/*!re2c
+ scheme [:]([^\x00-\x20<>\\]|escaped_char)*[>] { return (p - start); }
+ .? { return 0; }
+*/
+}
+
+// Try to match email autolink after first <, returning num of chars matched.
+extern int scan_autolink_email(bstring s, int pos)
+{
+ unsigned char * marker = NULL;
+ unsigned char * p = &(s->data[pos]);
+ unsigned char * start = p;
+/*!re2c
+ [a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+
+ [@]
+ [a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?
+ ([.][a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*
+ [>] { return (p - start); }
+ .? { return 0; }
+*/
+}
+
+// Try to match an HTML tag after first <, returning num of chars matched.
+extern int scan_html_tag(bstring s, int pos)
+{
+ unsigned char * marker = NULL;
+ unsigned char * p = &(s->data[pos]);
+ unsigned char * start = p;
+/*!re2c
+ htmltag { return (p - start); }
+ .? { return 0; }
+*/
+}
+
+// Try to match an HTML block tag including first <,
+// returning num of chars matched.
+extern int scan_html_block_tag(bstring s, int pos)
+{
+ unsigned char * marker = NULL;
+ unsigned char * p = &(s->data[pos]);
+ unsigned char * start = p;
+/*!re2c
+ [<] [/] blocktagname (spacechar | [>]) { return (p - start); }
+ [<] blocktagname (spacechar | [/>]) { return (p - start); }
+ [<] [!?] { return (p - start); }
+ .? { return 0; }
+*/
+}
+
+// Try to match a URL in a link or reference, return number of chars matched.
+// This may optionally be contained in <..>; otherwise
+// whitespace and unbalanced right parentheses aren't allowed.
+// Newlines aren't ever allowed.
+extern int scan_link_url(bstring s, int pos)
+{
+ unsigned char * marker = NULL;
+ unsigned char * p = &(s->data[pos]);
+ unsigned char * start = p;
+/*!re2c
+ [ \n]* [<] ([^<>\n\\\x00] | escaped_char | [\\])* [>] { return (p - start); }
+ [ \n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (p - start); }
+ .? { return 0; }
+*/
+}
+
+// Try to match a link title (in single quotes, in double quotes, or
+// in parentheses), returning number of chars matched. Allow one
+// level of internal nesting (quotes within quotes).
+extern int scan_link_title(bstring s, int pos)
+{
+ unsigned char * marker = NULL;
+ unsigned char * p = &(s->data[pos]);
+ unsigned char * start = p;
+/*!re2c
+ ["] (escaped_char|[^"\x00])* ["] { return (p - start); }
+ ['] (escaped_char|[^'\x00])* ['] { return (p - start); }
+ [(] (escaped_char|[^)\x00])* [)] { return (p - start); }
+ .? { return 0; }
+*/
+}
+
+// Match space characters, including newlines.
+extern int scan_spacechars(bstring s, int pos)
+{
+ unsigned char * p = &(s->data[pos]);
+ unsigned char * start = p;
+/*!re2c
+ [ \t\n]* { return (p - start); }
+ . { return 0; }
+*/
+}
+
+// Match ATX header start.
+extern int scan_atx_header_start(bstring s, int pos)
+{
+ unsigned char * marker = NULL;
+ unsigned char * p = &(s->data[pos]);
+ unsigned char * start = p;
+/*!re2c
+ [#]{1,6} ([ ]+|[\n]) { return (p - start); }
+ .? { return 0; }
+*/
+}
+
+// Match sexext header line. Return 1 for level-1 header,
+// 2 for level-2, 0 for no match.
+extern int scan_setext_header_line(bstring s, int pos)
+{
+ unsigned char * marker = NULL;
+ unsigned char * p = &(s->data[pos]);
+/*!re2c
+ [=]+ [ ]* [\n] { return 1; }
+ [-]+ [ ]* [\n] { return 2; }
+ .? { return 0; }
+*/
+}
+
+// Scan a horizontal rule line: "...three or more hyphens, asterisks,
+// or underscores on a line by themselves. If you wish, you may use
+// spaces between the hyphens or asterisks."
+extern int scan_hrule(bstring s, int pos)
+{
+ unsigned char * marker = NULL;
+ unsigned char * p = &(s->data[pos]);
+ unsigned char * start = p;
+/*!re2c
+ ([*][ ]*){3,} [ \t]* [\n] { return (p - start); }
+ ([_][ ]*){3,} [ \t]* [\n] { return (p - start); }
+ ([-][ ]*){3,} [ \t]* [\n] { return (p - start); }
+ .? { return 0; }
+*/
+}
+
+// Scan an opening code fence.
+extern int scan_open_code_fence(bstring s, int pos)
+{
+ unsigned char * marker = NULL;
+ unsigned char * p = &(s->data[pos]);
+ unsigned char * start = p;
+/*!re2c
+ [`]{3,} / [^`\n\x00]*[\n] { return (p - start); }
+ [~]{3,} / [^~\n\x00]*[\n] { return (p - start); }
+ .? { return 0; }
+*/
+}
+
+// Scan a closing code fence with length at least len.
+extern int scan_close_code_fence(bstring s, int pos, int len)
+{
+ unsigned char * marker = NULL;
+ unsigned char * p = &(s->data[pos]);
+ unsigned char * start = p;
+/*!re2c
+ ([`]{3,} | [~]{3,}) / spacechar* [\n]
+ { if (p - start > len) {
+ return (p - start);
+ } else {
+ return 0;
+ } }
+ .? { return 0; }
+*/
+}
+
+// Scans an entity.
+// Returns number of chars matched.
+extern int scan_entity(bstring s, int pos)
+{
+ unsigned char * marker = NULL;
+ unsigned char * p = &(s->data[pos]);
+ unsigned char * start = p;
+/*!re2c
+ [&] ([#] ([Xx][A-Fa-f0-9]{1,8}|[0-9]{1,8}) |[A-Za-z][A-Za-z0-9]{1,31} ) [;]
+ { return (p - start); }
+ .? { return 0; }
+*/
+}
diff --git a/src/stmd.h b/src/stmd.h
new file mode 100644
index 0000000..5e34399
--- /dev/null
+++ b/src/stmd.h
@@ -0,0 +1,121 @@
+#include <stdbool.h>
+#include "bstrlib.h"
+#include "uthash.h"
+
+#define VERSION "0.1"
+#define CODE_INDENT 4
+
+typedef struct Inline {
+ enum { str, softbreak, linebreak, code, raw_html, entity,
+ emph, strong, link, image } tag;
+ union {
+ bstring literal;
+ struct Inline* inlines;
+ struct { struct Inline* label;
+ bstring url;
+ bstring title;
+ } linkable;
+ } content;
+ struct Inline* next;
+} inl;
+
+typedef struct Reference {
+ bstring label;
+ bstring url;
+ bstring title;
+ UT_hash_handle hh; // used by uthash
+} reference;
+
+typedef struct Subject {
+ bstring buffer;
+ int pos;
+ reference** reference_map;
+ int label_nestlevel;
+} subject;
+
+// Types for blocks
+
+struct ListData {
+ enum { bullet,
+ ordered } list_type;
+ int marker_offset;
+ int padding;
+ int start;
+ enum { period,
+ parens } delimiter;
+ unsigned char bullet_char;
+ bool tight;
+};
+
+struct FencedCodeData {
+ int fence_length;
+ int fence_offset;
+ char fence_char;
+ bstring info;
+};
+
+typedef struct Block {
+ enum { document,
+ block_quote,
+ list,
+ list_item,
+ fenced_code,
+ indented_code,
+ html_block,
+ paragraph,
+ atx_header,
+ setext_header,
+ hrule,
+ reference_def
+ } tag;
+ int start_line;
+ int start_column;
+ int end_line;
+ bool open;
+ bool last_line_blank;
+ struct Block* children;
+ struct Block* last_child;
+ struct Block* parent;
+ struct Block* top;
+ bstring string_content;
+ inl* inline_content;
+ union {
+ struct ListData list_data;
+ struct FencedCodeData fenced_code_data;
+ int header_level;
+ reference** refmap;
+ } attributes;
+ struct Block * next;
+ struct Block * prev;
+} block;
+
+int parse_inline(subject* subj, inl ** last);
+inl* parse_inlines(bstring input, reference** refmap);
+inl* parse_inlines_while(subject* subj, int (*f)(subject*));
+void free_inlines(inl* e);
+int parse_reference(bstring input, reference** refmap);
+void free_reference(reference *ref);
+void free_reference_map(reference **refmap);
+reference* make_reference(bstring label, bstring url, bstring title);
+reference* lookup_reference(reference** refmap, bstring label);
+void add_reference(reference** refmap, reference* ref);
+int unescape(bstring s);
+
+extern block* make_document();
+extern block* add_child(block* parent,
+ int block_type, int start_line, int start_column);
+void free_blocks(block* e);
+
+// FOR NOW:
+int process_inlines(block* cur, reference** refmap);
+int incorporate_line(bstring ln, int line_number, block** curptr);
+int finalize(block* b, int line_number);
+
+void print_inlines(inl* ils, int indent);
+void print_blocks(block* blk, int indent);
+
+int blocks_to_html(block* b, bstring* result, bool tight);
+int inlines_to_html(inl* b, bstring* result);
+
+int bdetab(bstring s, int utf8);
+
diff --git a/src/utf8.c b/src/utf8.c
new file mode 100644
index 0000000..4bb3b35
--- /dev/null
+++ b/src/utf8.c
@@ -0,0 +1,106 @@
+#include <stdlib.h>
+#include "bstrlib.h"
+#include "debug.h"
+
+#define advance(s) \
+ s++; \
+ check(*s >> 6 == 0x02, "UTF-8 decode error on byte %x", *s);
+
+// Reads a unicode code point from a UTF8-encoded string, and
+// puts it in the pointer n. If something illegal
+// is encountered, 0xFFFD is emitted.
+// Returns a pointer to next position in string, or NULL if no
+// more characters remain.
+extern unsigned char * from_utf8(unsigned char * s, unsigned int *n)
+{
+ int x = 0;
+
+ if (*s == 0) {
+ return NULL;
+ } else if (*s < 0x80) {
+ x = *s;
+ } else if (*s >> 5 == 0x06) {
+ x = *s & 0x1F;
+ advance(s);
+ x = (x << 6) + (*s & 0x3F);
+ } else if (*s >> 4 == 0x0E) {
+ x = *s & 0x0F;
+ advance(s);
+ x = (x << 6) + (*s & 0x3F);
+ advance(s);
+ x = (x << 6) + (*s & 0x3F);
+ } else if (*s >> 3 == 0x1E) {
+ x = *s & 0x07;
+ advance(s);
+ x = (x << 6) + (*s & 0x3F);
+ advance(s);
+ x = (x << 6) + (*s & 0x3F);
+ advance(s);
+ x = (x << 6) + (*s & 0x3F);
+ } else if (*s >> 2 == 0x3E) {
+ x = *s & 0x03;
+ advance(s);
+ x = (x << 6) + (*s & 0x3F);
+ advance(s);
+ x = (x << 6) + (*s & 0x3F);
+ advance(s);
+ x = (x << 6) + (*s & 0x3F);
+ advance(s);
+ x = (x << 6) + (*s & 0x3F);
+ } else {
+ log_err("UTF-8 decode error on byte %x", *s);
+ goto error;
+ }
+ *n = x;
+ s++;
+ return s;
+ error:
+ *n = 0xFFFD;
+ return s;
+}
+
+// Converts the unicode code point c to UTF-8,
+// putting the result in dest. Returns 0 on success, -1 on error.
+extern int to_utf8(unsigned int c, bstring dest)
+{
+ if (c < 0x80) {
+ bconchar(dest, c);
+ } else if (c < 0x800) {
+ bconchar(dest, 192 + c/64);
+ bconchar(dest, 128 + c%64);
+ } else if (c - 0xd800u < 0x800) {
+ goto error;
+ } else if (c < 0x10000) {
+ bconchar(dest, 224 + c / 4096);
+ bconchar(dest, 128 + c /64%64);
+ bconchar(dest, 128 + c%64);
+ } else if (c < 0x110000) {
+ bconchar(dest, 240 + c/262144);
+ bconchar(dest, 128 + c/4096%64);
+ bconchar(dest, 128 + c/64%64);
+ bconchar(dest, 128 + c%64);
+ } else {
+ goto error;
+ }
+ return 0;
+error:
+ return -1;
+}
+
+#define bufpush(x) \
+ check(to_utf8(x, buf) == 0, "UTF-8 encode error on code point %04x", x)
+
+// Returns the case-folded version of the source string, or NULL on error.
+extern bstring case_fold(bstring source)
+{
+ unsigned char * s = source->data;
+ unsigned int c = 0;
+ bstring buf = bfromcstr("");
+ while ((s = from_utf8(s, &c))) {
+#include "case_fold_switch.c"
+ }
+ return buf;
+error:
+ return NULL;
+}
+
diff --git a/src/utf8.h b/src/utf8.h
new file mode 100644
index 0000000..fe59a90
--- /dev/null
+++ b/src/utf8.h
@@ -0,0 +1,6 @@
+#include <stdlib.h>
+#include "bstrlib.h"
+
+extern unsigned char * from_utf8(unsigned char * s, unsigned int *n);
+extern int to_utf8(unsigned int c, bstring dest);
+extern bstring case_fold(bstring source);
diff --git a/src/uthash.h b/src/uthash.h
new file mode 100644
index 0000000..b9bc7e9
--- /dev/null
+++ b/src/uthash.h
@@ -0,0 +1,948 @@
+/*
+Copyright (c) 2003-2013, Troy D. Hanson http://troydhanson.github.com/uthash/
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef UTHASH_H
+#define UTHASH_H
+
+#include <string.h> /* memcmp,strlen */
+#include <stddef.h> /* ptrdiff_t */
+#include <stdlib.h> /* exit() */
+
+/* These macros use decltype or the earlier __typeof GNU extension.
+ As decltype is only available in newer compilers (VS2010 or gcc 4.3+
+ when compiling c++ source) this code uses whatever method is needed
+ or, for VS2008 where neither is available, uses casting workarounds. */
+#ifdef _MSC_VER /* MS compiler */
+#if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */
+#define DECLTYPE(x) (decltype(x))
+#else /* VS2008 or older (or VS2010 in C mode) */
+#define NO_DECLTYPE
+#define DECLTYPE(x)
+#endif
+#else /* GNU, Sun and other compilers */
+#define DECLTYPE(x) (__typeof(x))
+#endif
+
+#ifdef NO_DECLTYPE
+#define DECLTYPE_ASSIGN(dst,src) \
+do { \
+ char **_da_dst = (char**)(&(dst)); \
+ *_da_dst = (char*)(src); \
+} while(0)
+#else
+#define DECLTYPE_ASSIGN(dst,src) \
+do { \
+ (dst) = DECLTYPE(dst)(src); \
+} while(0)
+#endif
+
+/* a number of the hash function use uint32_t which isn't defined on win32 */
+#ifdef _MSC_VER
+typedef unsigned int uint32_t;
+typedef unsigned char uint8_t;
+#else
+#include <inttypes.h> /* uint32_t */
+#endif
+
+#define UTHASH_VERSION 1.9.8
+
+#ifndef uthash_fatal
+#define uthash_fatal(msg) exit(-1) /* fatal error (out of memory,etc) */
+#endif
+#ifndef uthash_malloc
+#define uthash_malloc(sz) malloc(sz) /* malloc fcn */
+#endif
+#ifndef uthash_free
+#define uthash_free(ptr,sz) free(ptr) /* free fcn */
+#endif
+
+#ifndef uthash_noexpand_fyi
+#define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */
+#endif
+#ifndef uthash_expand_fyi
+#define uthash_expand_fyi(tbl) /* can be defined to log expands */
+#endif
+
+/* initial number of buckets */
+#define HASH_INITIAL_NUM_BUCKETS 32 /* initial number of buckets */
+#define HASH_INITIAL_NUM_BUCKETS_LOG2 5 /* lg2 of initial number of buckets */
+#define HASH_BKT_CAPACITY_THRESH 10 /* expand when bucket count reaches */
+
+/* calculate the element whose hash handle address is hhe */
+#define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho)))
+
+#define HASH_FIND(hh,head,keyptr,keylen,out) \
+do { \
+ unsigned _hf_bkt,_hf_hashv; \
+ out=NULL; \
+ if (head) { \
+ HASH_FCN(keyptr,keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt); \
+ if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv)) { \
+ HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], \
+ keyptr,keylen,out); \
+ } \
+ } \
+} while (0)
+
+#ifdef HASH_BLOOM
+#define HASH_BLOOM_BITLEN (1ULL << HASH_BLOOM)
+#define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8) + ((HASH_BLOOM_BITLEN%8) ? 1:0)
+#define HASH_BLOOM_MAKE(tbl) \
+do { \
+ (tbl)->bloom_nbits = HASH_BLOOM; \
+ (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \
+ if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \
+ memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \
+ (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \
+} while (0)
+
+#define HASH_BLOOM_FREE(tbl) \
+do { \
+ uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \
+} while (0)
+
+#define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8] |= (1U << ((idx)%8)))
+#define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8] & (1U << ((idx)%8)))
+
+#define HASH_BLOOM_ADD(tbl,hashv) \
+ HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1)))
+
+#define HASH_BLOOM_TEST(tbl,hashv) \
+ HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1)))
+
+#else
+#define HASH_BLOOM_MAKE(tbl)
+#define HASH_BLOOM_FREE(tbl)
+#define HASH_BLOOM_ADD(tbl,hashv)
+#define HASH_BLOOM_TEST(tbl,hashv) (1)
+#define HASH_BLOOM_BYTELEN 0
+#endif
+
+#define HASH_MAKE_TABLE(hh,head) \
+do { \
+ (head)->hh.tbl = (UT_hash_table*)uthash_malloc( \
+ sizeof(UT_hash_table)); \
+ if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \
+ memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \
+ (head)->hh.tbl->tail = &((head)->hh); \
+ (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \
+ (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \
+ (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \
+ (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \
+ HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \
+ if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \
+ memset((head)->hh.tbl->buckets, 0, \
+ HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \
+ HASH_BLOOM_MAKE((head)->hh.tbl); \
+ (head)->hh.tbl->signature = HASH_SIGNATURE; \
+} while(0)
+
+#define HASH_ADD(hh,head,fieldname,keylen_in,add) \
+ HASH_ADD_KEYPTR(hh,head,&((add)->fieldname),keylen_in,add)
+
+#define HASH_REPLACE(hh,head,fieldname,keylen_in,add,replaced) \
+do { \
+ replaced=NULL; \
+ HASH_FIND(hh,head,&((add)->fieldname),keylen_in,replaced); \
+ if (replaced!=NULL) { \
+ HASH_DELETE(hh,head,replaced); \
+ }; \
+ HASH_ADD(hh,head,fieldname,keylen_in,add); \
+} while(0)
+
+#define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \
+do { \
+ unsigned _ha_bkt; \
+ (add)->hh.next = NULL; \
+ (add)->hh.key = (char*)(keyptr); \
+ (add)->hh.keylen = (unsigned)(keylen_in); \
+ if (!(head)) { \
+ head = (add); \
+ (head)->hh.prev = NULL; \
+ HASH_MAKE_TABLE(hh,head); \
+ } else { \
+ (head)->hh.tbl->tail->next = (add); \
+ (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \
+ (head)->hh.tbl->tail = &((add)->hh); \
+ } \
+ (head)->hh.tbl->num_items++; \
+ (add)->hh.tbl = (head)->hh.tbl; \
+ HASH_FCN(keyptr,keylen_in, (head)->hh.tbl->num_buckets, \
+ (add)->hh.hashv, _ha_bkt); \
+ HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt],&(add)->hh); \
+ HASH_BLOOM_ADD((head)->hh.tbl,(add)->hh.hashv); \
+ HASH_EMIT_KEY(hh,head,keyptr,keylen_in); \
+ HASH_FSCK(hh,head); \
+} while(0)
+
+#define HASH_TO_BKT( hashv, num_bkts, bkt ) \
+do { \
+ bkt = ((hashv) & ((num_bkts) - 1)); \
+} while(0)
+
+/* delete "delptr" from the hash table.
+ * "the usual" patch-up process for the app-order doubly-linked-list.
+ * The use of _hd_hh_del below deserves special explanation.
+ * These used to be expressed using (delptr) but that led to a bug
+ * if someone used the same symbol for the head and deletee, like
+ * HASH_DELETE(hh,users,users);
+ * We want that to work, but by changing the head (users) below
+ * we were forfeiting our ability to further refer to the deletee (users)
+ * in the patch-up process. Solution: use scratch space to
+ * copy the deletee pointer, then the latter references are via that
+ * scratch pointer rather than through the repointed (users) symbol.
+ */
+#define HASH_DELETE(hh,head,delptr) \
+do { \
+ unsigned _hd_bkt; \
+ struct UT_hash_handle *_hd_hh_del; \
+ if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \
+ uthash_free((head)->hh.tbl->buckets, \
+ (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
+ HASH_BLOOM_FREE((head)->hh.tbl); \
+ uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \
+ head = NULL; \
+ } else { \
+ _hd_hh_del = &((delptr)->hh); \
+ if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \
+ (head)->hh.tbl->tail = \
+ (UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \
+ (head)->hh.tbl->hho); \
+ } \
+ if ((delptr)->hh.prev) { \
+ ((UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \
+ (head)->hh.tbl->hho))->next = (delptr)->hh.next; \
+ } else { \
+ DECLTYPE_ASSIGN(head,(delptr)->hh.next); \
+ } \
+ if (_hd_hh_del->next) { \
+ ((UT_hash_handle*)((ptrdiff_t)_hd_hh_del->next + \
+ (head)->hh.tbl->hho))->prev = \
+ _hd_hh_del->prev; \
+ } \
+ HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \
+ HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \
+ (head)->hh.tbl->num_items--; \
+ } \
+ HASH_FSCK(hh,head); \
+} while (0)
+
+
+/* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */
+#define HASH_FIND_STR(head,findstr,out) \
+ HASH_FIND(hh,head,findstr,strlen(findstr),out)
+#define HASH_ADD_STR(head,strfield,add) \
+ HASH_ADD(hh,head,strfield,strlen(add->strfield),add)
+#define HASH_REPLACE_STR(head,strfield,add,replaced) \
+ HASH_REPLACE(hh,head,strfield,strlen(add->strfield),add,replaced)
+#define HASH_FIND_INT(head,findint,out) \
+ HASH_FIND(hh,head,findint,sizeof(int),out)
+#define HASH_ADD_INT(head,intfield,add) \
+ HASH_ADD(hh,head,intfield,sizeof(int),add)
+#define HASH_REPLACE_INT(head,intfield,add,replaced) \
+ HASH_REPLACE(hh,head,intfield,sizeof(int),add,replaced)
+#define HASH_FIND_PTR(head,findptr,out) \
+ HASH_FIND(hh,head,findptr,sizeof(void *),out)
+#define HASH_ADD_PTR(head,ptrfield,add) \
+ HASH_ADD(hh,head,ptrfield,sizeof(void *),add)
+#define HASH_REPLACE_PTR(head,ptrfield,add,replaced) \
+ HASH_REPLACE(hh,head,ptrfield,sizeof(void *),add,replaced)
+#define HASH_DEL(head,delptr) \
+ HASH_DELETE(hh,head,delptr)
+
+/* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined.
+ * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined.
+ */
+#ifdef HASH_DEBUG
+#define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0)
+#define HASH_FSCK(hh,head) \
+do { \
+ unsigned _bkt_i; \
+ unsigned _count, _bkt_count; \
+ char *_prev; \
+ struct UT_hash_handle *_thh; \
+ if (head) { \
+ _count = 0; \
+ for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \
+ _bkt_count = 0; \
+ _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \
+ _prev = NULL; \
+ while (_thh) { \
+ if (_prev != (char*)(_thh->hh_prev)) { \
+ HASH_OOPS("invalid hh_prev %p, actual %p\n", \
+ _thh->hh_prev, _prev ); \
+ } \
+ _bkt_count++; \
+ _prev = (char*)(_thh); \
+ _thh = _thh->hh_next; \
+ } \
+ _count += _bkt_count; \
+ if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \
+ HASH_OOPS("invalid bucket count %d, actual %d\n", \
+ (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \
+ } \
+ } \
+ if (_count != (head)->hh.tbl->num_items) { \
+ HASH_OOPS("invalid hh item count %d, actual %d\n", \
+ (head)->hh.tbl->num_items, _count ); \
+ } \
+ /* traverse hh in app order; check next/prev integrity, count */ \
+ _count = 0; \
+ _prev = NULL; \
+ _thh = &(head)->hh; \
+ while (_thh) { \
+ _count++; \
+ if (_prev !=(char*)(_thh->prev)) { \
+ HASH_OOPS("invalid prev %p, actual %p\n", \
+ _thh->prev, _prev ); \
+ } \
+ _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \
+ _thh = ( _thh->next ? (UT_hash_handle*)((char*)(_thh->next) + \
+ (head)->hh.tbl->hho) : NULL ); \
+ } \
+ if (_count != (head)->hh.tbl->num_items) { \
+ HASH_OOPS("invalid app item count %d, actual %d\n", \
+ (head)->hh.tbl->num_items, _count ); \
+ } \
+ } \
+} while (0)
+#else
+#define HASH_FSCK(hh,head)
+#endif
+
+/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to
+ * the descriptor to which this macro is defined for tuning the hash function.
+ * The app can #include <unistd.h> to get the prototype for write(2). */
+#ifdef HASH_EMIT_KEYS
+#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \
+do { \
+ unsigned _klen = fieldlen; \
+ write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \
+ write(HASH_EMIT_KEYS, keyptr, fieldlen); \
+} while (0)
+#else
+#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)
+#endif
+
+/* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */
+#ifdef HASH_FUNCTION
+#define HASH_FCN HASH_FUNCTION
+#else
+#define HASH_FCN HASH_JEN
+#endif
+
+/* The Bernstein hash function, used in Perl prior to v5.6 */
+#define HASH_BER(key,keylen,num_bkts,hashv,bkt) \
+do { \
+ unsigned _hb_keylen=keylen; \
+ char *_hb_key=(char*)(key); \
+ (hashv) = 0; \
+ while (_hb_keylen--) { (hashv) = ((hashv) * 33) + *_hb_key++; } \
+ bkt = (hashv) & (num_bkts-1); \
+} while (0)
+
+
+/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at
+ * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */
+#define HASH_SAX(key,keylen,num_bkts,hashv,bkt) \
+do { \
+ unsigned _sx_i; \
+ char *_hs_key=(char*)(key); \
+ hashv = 0; \
+ for(_sx_i=0; _sx_i < keylen; _sx_i++) \
+ hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \
+ bkt = hashv & (num_bkts-1); \
+} while (0)
+
+#define HASH_FNV(key,keylen,num_bkts,hashv,bkt) \
+do { \
+ unsigned _fn_i; \
+ char *_hf_key=(char*)(key); \
+ hashv = 2166136261UL; \
+ for(_fn_i=0; _fn_i < keylen; _fn_i++) \
+ hashv = (hashv * 16777619) ^ _hf_key[_fn_i]; \
+ bkt = hashv & (num_bkts-1); \
+} while(0)
+
+#define HASH_OAT(key,keylen,num_bkts,hashv,bkt) \
+do { \
+ unsigned _ho_i; \
+ char *_ho_key=(char*)(key); \
+ hashv = 0; \
+ for(_ho_i=0; _ho_i < keylen; _ho_i++) { \
+ hashv += _ho_key[_ho_i]; \
+ hashv += (hashv << 10); \
+ hashv ^= (hashv >> 6); \
+ } \
+ hashv += (hashv << 3); \
+ hashv ^= (hashv >> 11); \
+ hashv += (hashv << 15); \
+ bkt = hashv & (num_bkts-1); \
+} while(0)
+
+#define HASH_JEN_MIX(a,b,c) \
+do { \
+ a -= b; a -= c; a ^= ( c >> 13 ); \
+ b -= c; b -= a; b ^= ( a << 8 ); \
+ c -= a; c -= b; c ^= ( b >> 13 ); \
+ a -= b; a -= c; a ^= ( c >> 12 ); \
+ b -= c; b -= a; b ^= ( a << 16 ); \
+ c -= a; c -= b; c ^= ( b >> 5 ); \
+ a -= b; a -= c; a ^= ( c >> 3 ); \
+ b -= c; b -= a; b ^= ( a << 10 ); \
+ c -= a; c -= b; c ^= ( b >> 15 ); \
+} while (0)
+
+#define HASH_JEN(key,keylen,num_bkts,hashv,bkt) \
+do { \
+ unsigned _hj_i,_hj_j,_hj_k; \
+ unsigned char *_hj_key=(unsigned char*)(key); \
+ hashv = 0xfeedbeef; \
+ _hj_i = _hj_j = 0x9e3779b9; \
+ _hj_k = (unsigned)(keylen); \
+ while (_hj_k >= 12) { \
+ _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \
+ + ( (unsigned)_hj_key[2] << 16 ) \
+ + ( (unsigned)_hj_key[3] << 24 ) ); \
+ _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \
+ + ( (unsigned)_hj_key[6] << 16 ) \
+ + ( (unsigned)_hj_key[7] << 24 ) ); \
+ hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \
+ + ( (unsigned)_hj_key[10] << 16 ) \
+ + ( (unsigned)_hj_key[11] << 24 ) ); \
+ \
+ HASH_JEN_MIX(_hj_i, _hj_j, hashv); \
+ \
+ _hj_key += 12; \
+ _hj_k -= 12; \
+ } \
+ hashv += keylen; \
+ switch ( _hj_k ) { \
+ case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); \
+ case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); \
+ case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); \
+ case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); \
+ case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); \
+ case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); \
+ case 5: _hj_j += _hj_key[4]; \
+ case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); \
+ case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); \
+ case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); \
+ case 1: _hj_i += _hj_key[0]; \
+ } \
+ HASH_JEN_MIX(_hj_i, _hj_j, hashv); \
+ bkt = hashv & (num_bkts-1); \
+} while(0)
+
+/* The Paul Hsieh hash function */
+#undef get16bits
+#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
+ || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
+#define get16bits(d) (*((const uint16_t *) (d)))
+#endif
+
+#if !defined (get16bits)
+#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \
+ +(uint32_t)(((const uint8_t *)(d))[0]) )
+#endif
+#define HASH_SFH(key,keylen,num_bkts,hashv,bkt) \
+do { \
+ unsigned char *_sfh_key=(unsigned char*)(key); \
+ uint32_t _sfh_tmp, _sfh_len = keylen; \
+ \
+ int _sfh_rem = _sfh_len & 3; \
+ _sfh_len >>= 2; \
+ hashv = 0xcafebabe; \
+ \
+ /* Main loop */ \
+ for (;_sfh_len > 0; _sfh_len--) { \
+ hashv += get16bits (_sfh_key); \
+ _sfh_tmp = (uint32_t)(get16bits (_sfh_key+2)) << 11 ^ hashv; \
+ hashv = (hashv << 16) ^ _sfh_tmp; \
+ _sfh_key += 2*sizeof (uint16_t); \
+ hashv += hashv >> 11; \
+ } \
+ \
+ /* Handle end cases */ \
+ switch (_sfh_rem) { \
+ case 3: hashv += get16bits (_sfh_key); \
+ hashv ^= hashv << 16; \
+ hashv ^= (uint32_t)(_sfh_key[sizeof (uint16_t)] << 18); \
+ hashv += hashv >> 11; \
+ break; \
+ case 2: hashv += get16bits (_sfh_key); \
+ hashv ^= hashv << 11; \
+ hashv += hashv >> 17; \
+ break; \
+ case 1: hashv += *_sfh_key; \
+ hashv ^= hashv << 10; \
+ hashv += hashv >> 1; \
+ } \
+ \
+ /* Force "avalanching" of final 127 bits */ \
+ hashv ^= hashv << 3; \
+ hashv += hashv >> 5; \
+ hashv ^= hashv << 4; \
+ hashv += hashv >> 17; \
+ hashv ^= hashv << 25; \
+ hashv += hashv >> 6; \
+ bkt = hashv & (num_bkts-1); \
+} while(0)
+
+#ifdef HASH_USING_NO_STRICT_ALIASING
+/* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads.
+ * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error.
+ * MurmurHash uses the faster approach only on CPU's where we know it's safe.
+ *
+ * Note the preprocessor built-in defines can be emitted using:
+ *
+ * gcc -m64 -dM -E - < /dev/null (on gcc)
+ * cc -## a.c (where a.c is a simple test file) (Sun Studio)
+ */
+#if (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86))
+#define MUR_GETBLOCK(p,i) p[i]
+#else /* non intel */
+#define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 0x3) == 0)
+#define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 0x3) == 1)
+#define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 0x3) == 2)
+#define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 0x3) == 3)
+#define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL))
+#if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__))
+#define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24))
+#define MUR_TWO_TWO(p) ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16))
+#define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >> 8))
+#else /* assume little endian non-intel */
+#define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24))
+#define MUR_TWO_TWO(p) ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16))
+#define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) << 8))
+#endif
+#define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) : \
+ (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \
+ (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) : \
+ MUR_ONE_THREE(p))))
+#endif
+#define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
+#define MUR_FMIX(_h) \
+do { \
+ _h ^= _h >> 16; \
+ _h *= 0x85ebca6b; \
+ _h ^= _h >> 13; \
+ _h *= 0xc2b2ae35l; \
+ _h ^= _h >> 16; \
+} while(0)
+
+#define HASH_MUR(key,keylen,num_bkts,hashv,bkt) \
+do { \
+ const uint8_t *_mur_data = (const uint8_t*)(key); \
+ const int _mur_nblocks = (keylen) / 4; \
+ uint32_t _mur_h1 = 0xf88D5353; \
+ uint32_t _mur_c1 = 0xcc9e2d51; \
+ uint32_t _mur_c2 = 0x1b873593; \
+ uint32_t _mur_k1 = 0; \
+ const uint8_t *_mur_tail; \
+ const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+_mur_nblocks*4); \
+ int _mur_i; \
+ for(_mur_i = -_mur_nblocks; _mur_i; _mur_i++) { \
+ _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i); \
+ _mur_k1 *= _mur_c1; \
+ _mur_k1 = MUR_ROTL32(_mur_k1,15); \
+ _mur_k1 *= _mur_c2; \
+ \
+ _mur_h1 ^= _mur_k1; \
+ _mur_h1 = MUR_ROTL32(_mur_h1,13); \
+ _mur_h1 = _mur_h1*5+0xe6546b64; \
+ } \
+ _mur_tail = (const uint8_t*)(_mur_data + _mur_nblocks*4); \
+ _mur_k1=0; \
+ switch((keylen) & 3) { \
+ case 3: _mur_k1 ^= _mur_tail[2] << 16; \
+ case 2: _mur_k1 ^= _mur_tail[1] << 8; \
+ case 1: _mur_k1 ^= _mur_tail[0]; \
+ _mur_k1 *= _mur_c1; \
+ _mur_k1 = MUR_ROTL32(_mur_k1,15); \
+ _mur_k1 *= _mur_c2; \
+ _mur_h1 ^= _mur_k1; \
+ } \
+ _mur_h1 ^= (keylen); \
+ MUR_FMIX(_mur_h1); \
+ hashv = _mur_h1; \
+ bkt = hashv & (num_bkts-1); \
+} while(0)
+#endif /* HASH_USING_NO_STRICT_ALIASING */
+
+/* key comparison function; return 0 if keys equal */
+#define HASH_KEYCMP(a,b,len) memcmp(a,b,len)
+
+/* iterate over items in a known bucket to find desired item */
+#define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out) \
+do { \
+ if (head.hh_head) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,head.hh_head)); \
+ else out=NULL; \
+ while (out) { \
+ if ((out)->hh.keylen == keylen_in) { \
+ if ((HASH_KEYCMP((out)->hh.key,keyptr,keylen_in)) == 0) break; \
+ } \
+ if ((out)->hh.hh_next) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,(out)->hh.hh_next)); \
+ else out = NULL; \
+ } \
+} while(0)
+
+/* add an item to a bucket */
+#define HASH_ADD_TO_BKT(head,addhh) \
+do { \
+ head.count++; \
+ (addhh)->hh_next = head.hh_head; \
+ (addhh)->hh_prev = NULL; \
+ if (head.hh_head) { (head).hh_head->hh_prev = (addhh); } \
+ (head).hh_head=addhh; \
+ if (head.count >= ((head.expand_mult+1) * HASH_BKT_CAPACITY_THRESH) \
+ && (addhh)->tbl->noexpand != 1) { \
+ HASH_EXPAND_BUCKETS((addhh)->tbl); \
+ } \
+} while(0)
+
+/* remove an item from a given bucket */
+#define HASH_DEL_IN_BKT(hh,head,hh_del) \
+ (head).count--; \
+ if ((head).hh_head == hh_del) { \
+ (head).hh_head = hh_del->hh_next; \
+ } \
+ if (hh_del->hh_prev) { \
+ hh_del->hh_prev->hh_next = hh_del->hh_next; \
+ } \
+ if (hh_del->hh_next) { \
+ hh_del->hh_next->hh_prev = hh_del->hh_prev; \
+ }
+
+/* Bucket expansion has the effect of doubling the number of buckets
+ * and redistributing the items into the new buckets. Ideally the
+ * items will distribute more or less evenly into the new buckets
+ * (the extent to which this is true is a measure of the quality of
+ * the hash function as it applies to the key domain).
+ *
+ * With the items distributed into more buckets, the chain length
+ * (item count) in each bucket is reduced. Thus by expanding buckets
+ * the hash keeps a bound on the chain length. This bounded chain
+ * length is the essence of how a hash provides constant time lookup.
+ *
+ * The calculation of tbl->ideal_chain_maxlen below deserves some
+ * explanation. First, keep in mind that we're calculating the ideal
+ * maximum chain length based on the *new* (doubled) bucket count.
+ * In fractions this is just n/b (n=number of items,b=new num buckets).
+ * Since the ideal chain length is an integer, we want to calculate
+ * ceil(n/b). We don't depend on floating point arithmetic in this
+ * hash, so to calculate ceil(n/b) with integers we could write
+ *
+ * ceil(n/b) = (n/b) + ((n%b)?1:0)
+ *
+ * and in fact a previous version of this hash did just that.
+ * But now we have improved things a bit by recognizing that b is
+ * always a power of two. We keep its base 2 log handy (call it lb),
+ * so now we can write this with a bit shift and logical AND:
+ *
+ * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0)
+ *
+ */
+#define HASH_EXPAND_BUCKETS(tbl) \
+do { \
+ unsigned _he_bkt; \
+ unsigned _he_bkt_i; \
+ struct UT_hash_handle *_he_thh, *_he_hh_nxt; \
+ UT_hash_bucket *_he_new_buckets, *_he_newbkt; \
+ _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \
+ 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \
+ if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \
+ memset(_he_new_buckets, 0, \
+ 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \
+ tbl->ideal_chain_maxlen = \
+ (tbl->num_items >> (tbl->log2_num_buckets+1)) + \
+ ((tbl->num_items & ((tbl->num_buckets*2)-1)) ? 1 : 0); \
+ tbl->nonideal_items = 0; \
+ for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \
+ { \
+ _he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \
+ while (_he_thh) { \
+ _he_hh_nxt = _he_thh->hh_next; \
+ HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2, _he_bkt); \
+ _he_newbkt = &(_he_new_buckets[ _he_bkt ]); \
+ if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \
+ tbl->nonideal_items++; \
+ _he_newbkt->expand_mult = _he_newbkt->count / \
+ tbl->ideal_chain_maxlen; \
+ } \
+ _he_thh->hh_prev = NULL; \
+ _he_thh->hh_next = _he_newbkt->hh_head; \
+ if (_he_newbkt->hh_head) _he_newbkt->hh_head->hh_prev = \
+ _he_thh; \
+ _he_newbkt->hh_head = _he_thh; \
+ _he_thh = _he_hh_nxt; \
+ } \
+ } \
+ uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
+ tbl->num_buckets *= 2; \
+ tbl->log2_num_buckets++; \
+ tbl->buckets = _he_new_buckets; \
+ tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \
+ (tbl->ineff_expands+1) : 0; \
+ if (tbl->ineff_expands > 1) { \
+ tbl->noexpand=1; \
+ uthash_noexpand_fyi(tbl); \
+ } \
+ uthash_expand_fyi(tbl); \
+} while(0)
+
+
+/* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */
+/* Note that HASH_SORT assumes the hash handle name to be hh.
+ * HASH_SRT was added to allow the hash handle name to be passed in. */
+#define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn)
+#define HASH_SRT(hh,head,cmpfcn) \
+do { \
+ unsigned _hs_i; \
+ unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \
+ struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \
+ if (head) { \
+ _hs_insize = 1; \
+ _hs_looping = 1; \
+ _hs_list = &((head)->hh); \
+ while (_hs_looping) { \
+ _hs_p = _hs_list; \
+ _hs_list = NULL; \
+ _hs_tail = NULL; \
+ _hs_nmerges = 0; \
+ while (_hs_p) { \
+ _hs_nmerges++; \
+ _hs_q = _hs_p; \
+ _hs_psize = 0; \
+ for ( _hs_i = 0; _hs_i < _hs_insize; _hs_i++ ) { \
+ _hs_psize++; \
+ _hs_q = (UT_hash_handle*)((_hs_q->next) ? \
+ ((void*)((char*)(_hs_q->next) + \
+ (head)->hh.tbl->hho)) : NULL); \
+ if (! (_hs_q) ) break; \
+ } \
+ _hs_qsize = _hs_insize; \
+ while ((_hs_psize > 0) || ((_hs_qsize > 0) && _hs_q )) { \
+ if (_hs_psize == 0) { \
+ _hs_e = _hs_q; \
+ _hs_q = (UT_hash_handle*)((_hs_q->next) ? \
+ ((void*)((char*)(_hs_q->next) + \
+ (head)->hh.tbl->hho)) : NULL); \
+ _hs_qsize--; \
+ } else if ( (_hs_qsize == 0) || !(_hs_q) ) { \
+ _hs_e = _hs_p; \
+ if (_hs_p){ \
+ _hs_p = (UT_hash_handle*)((_hs_p->next) ? \
+ ((void*)((char*)(_hs_p->next) + \
+ (head)->hh.tbl->hho)) : NULL); \
+ } \
+ _hs_psize--; \
+ } else if (( \
+ cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \
+ DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \
+ ) <= 0) { \
+ _hs_e = _hs_p; \
+ if (_hs_p){ \
+ _hs_p = (UT_hash_handle*)((_hs_p->next) ? \
+ ((void*)((char*)(_hs_p->next) + \
+ (head)->hh.tbl->hho)) : NULL); \
+ } \
+ _hs_psize--; \
+ } else { \
+ _hs_e = _hs_q; \
+ _hs_q = (UT_hash_handle*)((_hs_q->next) ? \
+ ((void*)((char*)(_hs_q->next) + \
+ (head)->hh.tbl->hho)) : NULL); \
+ _hs_qsize--; \
+ } \
+ if ( _hs_tail ) { \
+ _hs_tail->next = ((_hs_e) ? \
+ ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL); \
+ } else { \
+ _hs_list = _hs_e; \
+ } \
+ if (_hs_e) { \
+ _hs_e->prev = ((_hs_tail) ? \
+ ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL); \
+ } \
+ _hs_tail = _hs_e; \
+ } \
+ _hs_p = _hs_q; \
+ } \
+ if (_hs_tail){ \
+ _hs_tail->next = NULL; \
+ } \
+ if ( _hs_nmerges <= 1 ) { \
+ _hs_looping=0; \
+ (head)->hh.tbl->tail = _hs_tail; \
+ DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \
+ } \
+ _hs_insize *= 2; \
+ } \
+ HASH_FSCK(hh,head); \
+ } \
+} while (0)
+
+/* This function selects items from one hash into another hash.
+ * The end result is that the selected items have dual presence
+ * in both hashes. There is no copy of the items made; rather
+ * they are added into the new hash through a secondary hash
+ * hash handle that must be present in the structure. */
+#define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \
+do { \
+ unsigned _src_bkt, _dst_bkt; \
+ void *_last_elt=NULL, *_elt; \
+ UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \
+ ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \
+ if (src) { \
+ for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \
+ for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \
+ _src_hh; \
+ _src_hh = _src_hh->hh_next) { \
+ _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \
+ if (cond(_elt)) { \
+ _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \
+ _dst_hh->key = _src_hh->key; \
+ _dst_hh->keylen = _src_hh->keylen; \
+ _dst_hh->hashv = _src_hh->hashv; \
+ _dst_hh->prev = _last_elt; \
+ _dst_hh->next = NULL; \
+ if (_last_elt_hh) { _last_elt_hh->next = _elt; } \
+ if (!dst) { \
+ DECLTYPE_ASSIGN(dst,_elt); \
+ HASH_MAKE_TABLE(hh_dst,dst); \
+ } else { \
+ _dst_hh->tbl = (dst)->hh_dst.tbl; \
+ } \
+ HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \
+ HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh); \
+ (dst)->hh_dst.tbl->num_items++; \
+ _last_elt = _elt; \
+ _last_elt_hh = _dst_hh; \
+ } \
+ } \
+ } \
+ } \
+ HASH_FSCK(hh_dst,dst); \
+} while (0)
+
+#define HASH_CLEAR(hh,head) \
+do { \
+ if (head) { \
+ uthash_free((head)->hh.tbl->buckets, \
+ (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \
+ HASH_BLOOM_FREE((head)->hh.tbl); \
+ uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \
+ (head)=NULL; \
+ } \
+} while(0)
+
+#define HASH_OVERHEAD(hh,head) \
+ (size_t)((((head)->hh.tbl->num_items * sizeof(UT_hash_handle)) + \
+ ((head)->hh.tbl->num_buckets * sizeof(UT_hash_bucket)) + \
+ (sizeof(UT_hash_table)) + \
+ (HASH_BLOOM_BYTELEN)))
+
+#ifdef NO_DECLTYPE
+#define HASH_ITER(hh,head,el,tmp) \
+for((el)=(head), (*(char**)(&(tmp)))=(char*)((head)?(head)->hh.next:NULL); \
+ el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL))
+#else
+#define HASH_ITER(hh,head,el,tmp) \
+for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL); \
+ el; (el)=(tmp),(tmp)=DECLTYPE(el)((tmp)?(tmp)->hh.next:NULL))
+#endif
+
+/* obtain a count of items in the hash */
+#define HASH_COUNT(head) HASH_CNT(hh,head)
+#define HASH_CNT(hh,head) ((head)?((head)->hh.tbl->num_items):0)
+
+typedef struct UT_hash_bucket {
+ struct UT_hash_handle *hh_head;
+ unsigned count;
+
+ /* expand_mult is normally set to 0. In this situation, the max chain length
+ * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If
+ * the bucket's chain exceeds this length, bucket expansion is triggered).
+ * However, setting expand_mult to a non-zero value delays bucket expansion
+ * (that would be triggered by additions to this particular bucket)
+ * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH.
+ * (The multiplier is simply expand_mult+1). The whole idea of this
+ * multiplier is to reduce bucket expansions, since they are expensive, in
+ * situations where we know that a particular bucket tends to be overused.
+ * It is better to let its chain length grow to a longer yet-still-bounded
+ * value, than to do an O(n) bucket expansion too often.
+ */
+ unsigned expand_mult;
+
+} UT_hash_bucket;
+
+/* random signature used only to find hash tables in external analysis */
+#define HASH_SIGNATURE 0xa0111fe1
+#define HASH_BLOOM_SIGNATURE 0xb12220f2
+
+typedef struct UT_hash_table {
+ UT_hash_bucket *buckets;
+ unsigned num_buckets, log2_num_buckets;
+ unsigned num_items;
+ struct UT_hash_handle *tail; /* tail hh in app order, for fast append */
+ ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */
+
+ /* in an ideal situation (all buckets used equally), no bucket would have
+ * more than ceil(#items/#buckets) items. that's the ideal chain length. */
+ unsigned ideal_chain_maxlen;
+
+ /* nonideal_items is the number of items in the hash whose chain position
+ * exceeds the ideal chain maxlen. these items pay the penalty for an uneven
+ * hash distribution; reaching them in a chain traversal takes >ideal steps */
+ unsigned nonideal_items;
+
+ /* ineffective expands occur when a bucket doubling was performed, but
+ * afterward, more than half the items in the hash had nonideal chain
+ * positions. If this happens on two consecutive expansions we inhibit any
+ * further expansion, as it's not helping; this happens when the hash
+ * function isn't a good fit for the key domain. When expansion is inhibited
+ * the hash will still work, albeit no longer in constant time. */
+ unsigned ineff_expands, noexpand;
+
+ uint32_t signature; /* used only to find hash tables in external analysis */
+#ifdef HASH_BLOOM
+ uint32_t bloom_sig; /* used only to test bloom exists in external analysis */
+ uint8_t *bloom_bv;
+ char bloom_nbits;
+#endif
+
+} UT_hash_table;
+
+typedef struct UT_hash_handle {
+ struct UT_hash_table *tbl;
+ void *prev; /* prev element in app order */
+ void *next; /* next element in app order */
+ struct UT_hash_handle *hh_prev; /* previous hh in bucket order */
+ struct UT_hash_handle *hh_next; /* next hh in bucket order */
+ void *key; /* ptr to enclosing struct's key */
+ unsigned keylen; /* enclosing struct's key len */
+ unsigned hashv; /* result of hash-fcn(key) */
+} UT_hash_handle;
+
+#endif /* UTHASH_H */