From e216094e2192c05ddbd0988458eb8c0012e7baf8 Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Tue, 2 Sep 2014 01:10:54 +0200 Subject: lol --- Makefile | 19 +- src/blocks.c | 1352 +++++++++++---------- src/bstrlib.c | 2979 ---------------------------------------------- src/bstrlib.h | 304 ----- src/buffer.c | 313 +++++ src/buffer.h | 119 ++ src/case_fold_switch.c | 2637 ---------------------------------------- src/case_fold_switch.inc | 2637 ++++++++++++++++++++++++++++++++++++++++ src/casefold.c | 2699 ----------------------------------------- src/detab.c | 48 - src/getopt.c | 199 ---- src/inlines.c | 1711 +++++++++++++------------- src/main.c | 2 +- src/scanners.h | 28 +- src/scanners.re | 54 +- src/stmd.h | 76 +- src/utf8.c | 221 ++-- 17 files changed, 4889 insertions(+), 10509 deletions(-) delete mode 100644 src/bstrlib.c delete mode 100644 src/bstrlib.h create mode 100644 src/buffer.c create mode 100644 src/buffer.h delete mode 100644 src/case_fold_switch.c create mode 100644 src/case_fold_switch.inc delete mode 100644 src/casefold.c delete mode 100644 src/detab.c delete mode 100644 src/getopt.c diff --git a/Makefile b/Makefile index 55b6645..cb5938d 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ -CFLAGS=-g -O3 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS) -LDFLAGS=-g -O3 -Wall -Werror +CFLAGS=-ggdb3 -O0 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS) +LDFLAGS=-ggdb3 -O0 -Wall -Werror SRCDIR=src DATADIR=data @@ -41,13 +41,13 @@ testjs: spec.txt benchjs: node js/bench.js ${BENCHINP} -$(PROG): $(SRCDIR)/main.c $(SRCDIR)/inlines.o $(SRCDIR)/blocks.o $(SRCDIR)/detab.o $(SRCDIR)/bstrlib.o $(SRCDIR)/scanners.o $(SRCDIR)/print.o $(SRCDIR)/html.o $(SRCDIR)/utf8.o +$(PROG): $(SRCDIR)/main.c $(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/html.o $(SRCDIR)/utf8.o $(CC) $(LDFLAGS) -o $@ $^ $(SRCDIR)/scanners.c: $(SRCDIR)/scanners.re re2c --case-insensitive -bis $< > $@ || (rm $@ && false) -$(SRCDIR)/case_fold_switch.c: $(DATADIR)/CaseFolding-3.2.0.txt +$(SRCDIR)/case_fold_switch.inc $(DATADIR)/CaseFolding-3.2.0.txt perl mkcasefold.pl < $< > $@ .PHONY: leakcheck clean fuzztest dingus upload @@ -58,6 +58,9 @@ dingus: leakcheck: $(PROG) cat oldtests/*/*.markdown | valgrind --leak-check=full --dsymutil=yes $(PROG) +operf: $(PROG) + operf $(PROG) /dev/null + fuzztest: for i in `seq 1 10`; do \ time cat /dev/urandom | head -c 100000 | iconv -f latin1 -t utf-8 | $(PROG) >/dev/null; done @@ -69,7 +72,7 @@ update-site: spec.html narrative.html (cd _site ; git pull ; git commit -a -m "Updated site for latest spec, narrative, js" ; git push; cd ..) clean: - -rm test $(SRCDIR)/*.o $(SRCDIR)/scanners.c - -rm -r *.dSYM - -rm README.html - -rm spec.md fuzz.txt spec.html + -rm -f test $(SRCDIR)/*.o $(SRCDIR)/scanners.c + -rm -rf *.dSYM + -rm -f README.html + -rm -f spec.md fuzz.txt spec.html diff --git a/src/blocks.c b/src/blocks.c index 2776231..eabac03 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -10,738 +11,767 @@ static block* make_block(int tag, int start_line, int start_column) { - block* e; - e = (block*) malloc(sizeof(block)); - e->tag = tag; - e->open = true; - e->last_line_blank = false; - e->start_line = start_line; - e->start_column = start_column; - e->end_line = start_line; - e->children = NULL; - e->last_child = NULL; - e->parent = NULL; - e->top = NULL; - e->attributes.refmap = NULL; - e->string_content = bfromcstr(""); - e->inline_content = NULL; - e->next = NULL; - e->prev = NULL; - return e; + block* e; + e = (block*) malloc(sizeof(block)); + e->tag = tag; + e->open = true; + e->last_line_blank = false; + e->start_line = start_line; + e->start_column = start_column; + e->end_line = start_line; + e->children = NULL; + e->last_child = NULL; + e->parent = NULL; + e->top = NULL; + e->attributes.refmap = NULL; + gh_buf_init(&e->string_content, 32); + e->string_pos = 0; + e->inline_content = NULL; + e->next = NULL; + e->prev = NULL; + return e; } // Create a root document block. extern block* make_document() { - block * e = make_block(document, 1, 1); - reference * map = NULL; - reference ** refmap; - refmap = (reference**) malloc(sizeof(reference*)); - *refmap = map; - e->attributes.refmap = refmap; - e->top = e; - return e; + block * e = make_block(document, 1, 1); + reference * map = NULL; + reference ** refmap; + refmap = (reference**) malloc(sizeof(reference*)); + *refmap = map; + e->attributes.refmap = refmap; + e->top = e; + return e; } // Returns true if line has only space characters, else false. -bool is_blank(bstring s, int offset) +bool is_blank(gh_buf *s, int offset) { - char c; - while ((c = bchar(s, offset))) { - if (c == '\n') { - return true; - } else if (c == ' ') { - offset++; - } else { - return false; - } - } - return true; + while (offset < s->size) { + switch (s->ptr[offset]) { + case '\n': + return true; + case ' ': + offset++; + default: + return false; + } + } + + return true; } static inline bool can_contain(int parent_type, int child_type) { - return ( parent_type == document || - parent_type == block_quote || - parent_type == list_item || - (parent_type == list && child_type == list_item) ); + return ( parent_type == document || + parent_type == block_quote || + parent_type == list_item || + (parent_type == list && child_type == list_item) ); } static inline bool accepts_lines(int block_type) { - return (block_type == paragraph || - block_type == atx_header || - block_type == indented_code || - block_type == fenced_code); + return (block_type == paragraph || + block_type == atx_header || + block_type == indented_code || + block_type == fenced_code); } -static int add_line(block* block, bstring ln, int offset) +static void add_line(block* block, gh_buf *ln, int offset) { - bstring s = bmidstr(ln, offset, blength(ln) - offset); - check(block->open, "attempted to add line (%s) to closed container (%d)", - ln->data, block->tag); - check(bformata(block->string_content, "%s", s->data) == 0, - "could not append line to string_content"); - bdestroy(s); - return 0; - error: - return -1; + assert(block->open); + gh_buf_put(&block->string_content, ln->ptr + offset, ln->size - offset); } -static int remove_trailing_blank_lines(bstring ln) +static void remove_trailing_blank_lines(gh_buf *ln) { - bstring tofind = bfromcstr(" \t\r\n"); - int pos; - // find last nonspace: - pos = bninchrr(ln, blength(ln) - 1, tofind); - if (pos == BSTR_ERR) { // all spaces - bassigncstr(ln, ""); - } else { - // find next newline after it - pos = bstrchrp(ln, '\n', pos); - if (pos != BSTR_ERR) { - check(bdelete(ln, pos, blength(ln) - pos) != BSTR_ERR, - "failed to delete trailing blank lines"); - } - } - bdestroy(tofind); - return 0; - error: - return -1; + int i; + + for (i = ln->size - 1; i >= 0; --i) { + char c = ln->ptr[i]; + + if (c != ' ' && c != '\t' && c != '\r' && c != '\n') + break; + } + + if (i < 0) { + gh_buf_clear(ln); + return; + } + + i = gh_buf_strchr(ln, '\n', i); + if (i >= 0) + gh_buf_truncate(ln, i + 1); } // Check to see if a block ends with a blank line, descending // if needed into lists and sublists. static bool ends_with_blank_line(block* block) { - if (block->last_line_blank) { - return true; - } - if ((block->tag == list || block->tag == list_item) && block->last_child) { - return ends_with_blank_line(block->last_child); - } else { - return false; - } + if (block->last_line_blank) { + return true; + } + if ((block->tag == list || block->tag == list_item) && block->last_child) { + return ends_with_blank_line(block->last_child); + } else { + return false; + } } // Break out of all containing lists static int break_out_of_lists(block ** bptr, int line_number) { - block * container = *bptr; - block * b = container->top; - // find first containing list: - while (b && b->tag != list) { - b = b->last_child; - } - if (b) { - while (container && container != b) { - finalize(container, line_number); - container = container->parent; - } - finalize(b, line_number); - *bptr = b->parent; - } - return 0; + block * container = *bptr; + block * b = container->top; + // find first containing list: + while (b && b->tag != list) { + b = b->last_child; + } + if (b) { + while (container && container != b) { + finalize(container, line_number); + container = container->parent; + } + finalize(b, line_number); + *bptr = b->parent; + } + return 0; } -extern int finalize(block* b, int line_number) +extern void finalize(block* b, int line_number) { - int firstlinelen; - int pos; - block* item; - block* subitem; - - check(b != NULL, "finalize called on null block"); - if (!b->open) { - return 0; // don't do anything if the block is already closed - } - b->open = false; - if (line_number > b->start_line) { - b->end_line = line_number - 1; - } else { - b->end_line = line_number; - } - - switch (b->tag) { - - case paragraph: - pos = 0; - while (bchar(b->string_content, 0) == '[' && - (pos = parse_reference(b->string_content, - b->top->attributes.refmap))) { - bdelete(b->string_content, 0, pos); - } - if (is_blank(b->string_content, 0)) { - b->tag = reference_def; - } - break; - - case indented_code: - remove_trailing_blank_lines(b->string_content); - bformata(b->string_content, "\n"); - break; - - case fenced_code: - // first line of contents becomes info - firstlinelen = bstrchr(b->string_content, '\n'); - b->attributes.fenced_code_data.info = - bmidstr(b->string_content, 0, firstlinelen); - bdelete(b->string_content, 0, firstlinelen + 1); // +1 for \n - btrimws(b->attributes.fenced_code_data.info); - unescape(b->attributes.fenced_code_data.info); - break; - - case list: // determine tight/loose status - b->attributes.list_data.tight = true; // tight by default - item = b->children; - - while (item) { - // check for non-final non-empty list item ending with blank line: - if (item->last_line_blank && item->next) { - b->attributes.list_data.tight = false; - break; - } - // recurse into children of list item, to see if there are - // spaces between them: - subitem = item->children; - while (subitem) { - if (ends_with_blank_line(subitem) && - (item->next || subitem->next)) { - b->attributes.list_data.tight = false; - break; - } - subitem = subitem->next; - } - if (!(b->attributes.list_data.tight)) { - break; - } - item = item->next; - } - - break; - - default: - break; - } - - return 0; - error: - return -1; + int firstlinelen; + int pos; + block* item; + block* subitem; + + if (!b->open) + return; // don't do anything if the block is already closed + + b->open = false; + if (line_number > b->start_line) { + b->end_line = line_number - 1; + } else { + b->end_line = line_number; + } + + switch (b->tag) { + case paragraph: + pos = 0; + while (gh_buf_at(&b->string_content, b->string_pos) == '[' && + (pos = parse_reference(&b->string_content, b->string_pos, + b->top->attributes.refmap))) { + b->string_pos = pos; + } + if (is_blank(&b->string_content, b->string_pos)) { + b->tag = reference_def; + } + break; + + case indented_code: + remove_trailing_blank_lines(&b->string_content); + gh_buf_putc(&b->string_content, '\n'); + break; + + case fenced_code: + // first line of contents becomes info + firstlinelen = gh_buf_strchr(&b->string_content, '\n', b->string_pos); + gh_buf_set( + &b->attributes.fenced_code_data.info, + b->string_content.ptr + b->string_pos, + firstlinelen + ); + + b->string_pos = firstlinelen + 1; + + gh_buf_trim(&b->attributes.fenced_code_data.info); + unescape_buffer(&b->attributes.fenced_code_data.info); + break; + + case list: // determine tight/loose status + b->attributes.list_data.tight = true; // tight by default + item = b->children; + + while (item) { + // check for non-final non-empty list item ending with blank line: + if (item->last_line_blank && item->next) { + b->attributes.list_data.tight = false; + break; + } + // recurse into children of list item, to see if there are + // spaces between them: + subitem = item->children; + while (subitem) { + if (ends_with_blank_line(subitem) && + (item->next || subitem->next)) { + b->attributes.list_data.tight = false; + break; + } + subitem = subitem->next; + } + if (!(b->attributes.list_data.tight)) { + break; + } + item = item->next; + } + + break; + + default: + break; + } } // Add a block as child of another. Return pointer to child. extern block* add_child(block* parent, - int block_type, int start_line, int start_column) + int block_type, int start_line, int start_column) { - // if 'parent' isn't the kind of block that can accept this child, - // then back up til we hit a block that can. - while (!can_contain(parent->tag, block_type)) { - finalize(parent, start_line); - parent = parent->parent; - } - - check(parent != NULL, "parent container cannot accept children"); - - block* child = make_block(block_type, start_line, start_column); - child->parent = parent; - child->top = parent->top; - - if (parent->last_child) { - parent->last_child->next = child; - child->prev = parent->last_child; - } else { - parent->children = child; - child->prev = NULL; - } - parent->last_child = child; - return child; - error: - return NULL; + assert(parent); + + // if 'parent' isn't the kind of block that can accept this child, + // then back up til we hit a block that can. + while (!can_contain(parent->tag, block_type)) { + finalize(parent, start_line); + parent = parent->parent; + } + + block* child = make_block(block_type, start_line, start_column); + child->parent = parent; + child->top = parent->top; + + if (parent->last_child) { + parent->last_child->next = child; + child->prev = parent->last_child; + } else { + parent->children = child; + child->prev = NULL; + } + parent->last_child = child; + return child; } // Free a block list and any children. extern void free_blocks(block* e) { - block * next; - while (e != NULL) { - next = e->next; - free_inlines(e->inline_content); - bdestroy(e->string_content); - if (e->tag == fenced_code) { - bdestroy(e->attributes.fenced_code_data.info); - } else if (e->tag == document) { - free_reference_map(e->attributes.refmap); - } - free_blocks(e->children); - free(e); - e = next; - } + block * next; + while (e != NULL) { + next = e->next; + free_inlines(e->inline_content); + gh_buf_free(&e->string_content); + if (e->tag == fenced_code) { + gh_buf_free(&e->attributes.fenced_code_data.info); + } else if (e->tag == document) { + free_reference_map(e->attributes.refmap); + } + free_blocks(e->children); + free(e); + e = next; + } } // Walk through block and all children, recursively, parsing // string content into inline content where appropriate. -int process_inlines(block* cur, reference** refmap) +void process_inlines(block* cur, reference** refmap) { - switch (cur->tag) { - - case paragraph: - case atx_header: - case setext_header: - check(cur->string_content != NULL, "string_content is NULL"); - cur->inline_content = parse_inlines(cur->string_content, refmap); - bdestroy(cur->string_content); - cur->string_content = NULL; - break; - - default: - break; - } - - block * child = cur->children; - while (child != NULL) { - process_inlines(child, refmap); - child = child->next; - } - - return 0; - error: - return -1; + switch (cur->tag) { + case paragraph: + case atx_header: + case setext_header: + cur->inline_content = parse_inlines(&cur->string_content, cur->string_pos, refmap); + // MEM + // gh_buf_free(&cur->string_content); + break; + + default: + break; + } + + block *child = cur->children; + while (child != NULL) { + process_inlines(child, refmap); + child = child->next; + } } // Attempts to parse a list item marker (bullet or enumerated). // On success, returns length of the marker, and populates // data with the details. On failure, returns 0. -static int parse_list_marker(bstring ln, int pos, - struct ListData ** dataptr) +static int parse_list_marker(gh_buf *ln, int pos, + struct ListData ** dataptr) { - char c; - int startpos; - int start = 1; - struct ListData * data; - - startpos = pos; - c = bchar(ln, pos); - - if ((c == '*' || c == '-' || c == '+') && !scan_hrule(ln, pos)) { - pos++; - if (!isspace(bchar(ln, pos))) { - return 0; - } - data = malloc(sizeof(struct ListData)); - data->marker_offset = 0; // will be adjusted later - data->list_type = bullet; - data->bullet_char = c; - data->start = 1; - data->delimiter = period; - data->tight = false; - - } else if (isdigit(c)) { - - pos++; - while (isdigit(bchar(ln, pos))) { - pos++; - } - - if (!sscanf((char *) ln->data + startpos, "%d", &start)) { - log_err("sscanf failed"); - return 0; - } - - c = bchar(ln, pos); - if (c == '.' || c == ')') { - pos++; - if (!isspace(bchar(ln, pos))) { - return 0; - } - data = malloc(sizeof(struct ListData)); - data->marker_offset = 0; // will be adjusted later - data->list_type = ordered; - data->bullet_char = 0; - data->start = start; - data->delimiter = (c == '.' ? period : parens); - data->tight = false; - } else { - return 0; - } - - } else { - return 0; - } - - *dataptr = data; - return (pos - startpos); + char c; + int startpos; + struct ListData * data; + + startpos = pos; + c = gh_buf_at(ln, pos); + + if ((c == '*' || c == '-' || c == '+') && !scan_hrule(ln, pos)) { + pos++; + if (!isspace(gh_buf_at(ln, pos))) { + return 0; + } + data = malloc(sizeof(struct ListData)); + data->marker_offset = 0; // will be adjusted later + data->list_type = bullet; + data->bullet_char = c; + data->start = 1; + data->delimiter = period; + data->tight = false; + + } else if (isdigit(c)) { + int start = 0; + + do { + start = (10 * start) + (gh_buf_at(ln, pos) - '0'); + pos++; + } while (isdigit(gh_buf_at(ln, pos))); + + c = gh_buf_at(ln, pos); + if (c == '.' || c == ')') { + pos++; + if (!isspace(gh_buf_at(ln, pos))) { + return 0; + } + data = malloc(sizeof(struct ListData)); + data->marker_offset = 0; // will be adjusted later + data->list_type = ordered; + data->bullet_char = 0; + data->start = start; + data->delimiter = (c == '.' ? period : parens); + data->tight = false; + } else { + return 0; + } + + } else { + return 0; + } + + *dataptr = data; + return (pos - startpos); } // Return 1 if list item belongs in list, else 0. static int lists_match(struct ListData list_data, - struct ListData item_data) + struct ListData item_data) +{ + return (list_data.list_type == item_data.list_type && + list_data.delimiter == item_data.delimiter && + // list_data.marker_offset == item_data.marker_offset && + list_data.bullet_char == item_data.bullet_char); +} + +static void expand_tabs(gh_buf *ob, const char *line, size_t size) +{ + size_t i = 0, tab = 0; + + while (i < size) { + size_t org = i; + + while (i < size && line[i] != '\t') { + i++; tab++; + } + + if (i > org) + gh_buf_put(ob, line + org, i - org); + + if (i >= size) + break; + + do { + gh_buf_putc(ob, ' '); tab++; + } while (tab % 4); + + i++; + } +} + +extern block *stmd_parse_document(const char *buffer, size_t len) { - return (list_data.list_type == item_data.list_type && - list_data.delimiter == item_data.delimiter && - // list_data.marker_offset == item_data.marker_offset && - list_data.bullet_char == item_data.bullet_char); + gh_buf line = GH_BUF_INIT; + + block *document = make_document(); + int linenum = 1; + const char *end = buffer + len; + + while (buffer < end) { + const char *eol = memchr(buffer, '\n', end - buffer); + + if (!eol) { + expand_tabs(&line, buffer, end - buffer); + buffer = end; + } else { + expand_tabs(&line, buffer, (eol - buffer) + 1); + buffer += (eol - buffer) + 1; + } + + incorporate_line(&line, linenum, &document); + gh_buf_clear(&line); + linenum++; + } + + gh_buf_free(&line); + + while (document != document->top) { + finalize(document, linenum); + document = document->parent; + } + + finalize(document, linenum); + process_inlines(document, document->attributes.refmap); + + return document; } // Process one line at a time, modifying a block. // Returns 0 if successful. curptr is changed to point to // the currently open block. -extern int incorporate_line(bstring ln, int line_number, block** curptr) +extern void incorporate_line(gh_buf *ln, int line_number, block** curptr) { - block* last_matched_container; - int offset = 0; - int matched = 0; - int lev = 0; - int i; - struct ListData * data = NULL; - bool all_matched = true; - block* container; - block* cur = *curptr; - bool blank = false; - int first_nonspace; - int indent; - - // detab input line - check(bdetab(ln, 1) != BSTR_ERR, - "invalid UTF-8 sequence in line %d\n", line_number); - - // container starts at the document root. - container = cur->top; - - // for each containing block, try to parse the associated line start. - // bail out on failure: container will point to the last matching block. - - while (container->last_child && container->last_child->open) { - container = container->last_child; - - first_nonspace = offset; - while (bchar(ln, first_nonspace) == ' ') { - first_nonspace++; - } - - indent = first_nonspace - offset; - blank = bchar(ln, first_nonspace) == '\n'; - - if (container->tag == block_quote) { - - matched = indent <= 3 && bchar(ln, first_nonspace) == '>'; - if (matched) { - offset = first_nonspace + 1; - if (bchar(ln, offset) == ' ') { - offset++; - } - } else { - all_matched = false; - } - - } else if (container->tag == list_item) { - - if (indent >= container->attributes.list_data.marker_offset + - container->attributes.list_data.padding) { - offset += container->attributes.list_data.marker_offset + - container->attributes.list_data.padding; - } else if (blank) { - offset = first_nonspace; - } else { - all_matched = false; - } - - } else if (container->tag == indented_code) { - - if (indent >= CODE_INDENT) { - offset += CODE_INDENT; - } else if (blank) { - offset = first_nonspace; - } else { - all_matched = false; - } - - } else if (container->tag == atx_header || - container->tag == setext_header) { - - // a header can never contain more than one line - all_matched = false; - - } else if (container->tag == fenced_code) { - - // skip optional spaces of fence offset - i = container->attributes.fenced_code_data.fence_offset; - while (i > 0 && bchar(ln, offset) == ' ') { - offset++; - i--; - } - - } else if (container->tag == html_block) { - - if (blank) { - all_matched = false; - } - - } else if (container->tag == paragraph) { - - if (blank) { - container->last_line_blank =true; - all_matched = false; - } - - } - - if (!all_matched) { - container = container->parent; // back up to last matching block - break; - } - } - - last_matched_container = container; - - // check to see if we've hit 2nd blank line, break out of list: - if (blank && container->last_line_blank) { - break_out_of_lists(&container, line_number); - } - - // unless last matched container is code block, try new container starts: - while (container->tag != fenced_code && container->tag != indented_code && - container->tag != html_block) { - - first_nonspace = offset; - while (bchar(ln, first_nonspace) == ' ') { - first_nonspace++; - } - - indent = first_nonspace - offset; - blank = bchar(ln, first_nonspace) == '\n'; - - if (indent >= CODE_INDENT) { - - if (cur->tag != paragraph && !blank) { - offset += CODE_INDENT; - container = add_child(container, indented_code, line_number, offset + 1); - } else { // indent > 4 in lazy line - break; - } - - } else if (bchar(ln, first_nonspace) == '>') { - - offset = first_nonspace + 1; - // optional following character - if (bchar(ln, offset) == ' ') { - offset++; - } - container = add_child(container, block_quote, line_number, offset + 1); - - } else if ((matched = scan_atx_header_start(ln, first_nonspace))) { - - offset = first_nonspace + matched; - container = add_child(container, atx_header, line_number, offset + 1); - int hashpos = bstrchrp(ln, '#', first_nonspace); - check(hashpos != BSTR_ERR, "no # found in atx header start"); - int level = 0; - while (bchar(ln, hashpos) == '#') { - level++; - hashpos++; - } - container->attributes.header_level = level; - - } else if ((matched = scan_open_code_fence(ln, first_nonspace))) { - - container = add_child(container, fenced_code, line_number, - first_nonspace + 1); - container->attributes.fenced_code_data.fence_char = bchar(ln, - first_nonspace); - container->attributes.fenced_code_data.fence_length = matched; - container->attributes.fenced_code_data.fence_offset = - first_nonspace - offset; - offset = first_nonspace + matched; - - } else if ((matched = scan_html_block_tag(ln, first_nonspace))) { - - container = add_child(container, html_block, line_number, - first_nonspace + 1); - // note, we don't adjust offset because the tag is part of the text - - } else if (container->tag == paragraph && - (lev = scan_setext_header_line(ln, first_nonspace)) && - // check that there is only one line in the paragraph: - bstrrchrp(container->string_content, '\n', - blength(container->string_content) - 2) == BSTR_ERR) { - - container->tag = setext_header; - container->attributes.header_level = lev; - offset = blength(ln) - 1; - - } else if (!(container->tag == paragraph && !all_matched) && - (matched = scan_hrule(ln, first_nonspace))) { - - // it's only now that we know the line is not part of a setext header: - container = add_child(container, hrule, line_number, first_nonspace + 1); - finalize(container, line_number); - container = container->parent; - offset = blength(ln) - 1; - - } else if ((matched = parse_list_marker(ln, first_nonspace, &data))) { - - // compute padding: - offset = first_nonspace + matched; - i = 0; - while (i <= 5 && bchar(ln, offset + i) == ' ') { - i++; - } - // i = number of spaces after marker, up to 5 - if (i >= 5 || i < 1 || bchar(ln, offset) == '\n') { - data->padding = matched + 1; - if (i > 0) { - offset += 1; - } - } else { - data->padding = matched + i; - offset += i; - } - - // check container; if it's a list, see if this list item - // can continue the list; otherwise, create a list container. - - data->marker_offset = indent; - - if (container->tag != list || - !lists_match(container->attributes.list_data, *data)) { - container = add_child(container, list, line_number, - first_nonspace + 1); - container->attributes.list_data = *data; - } - - // add the list item - container = add_child(container, list_item, line_number, - first_nonspace + 1); - container->attributes.list_data = *data; - free(data); - - } else { - break; - } - - if (accepts_lines(container->tag)) { - // if it's a line container, it can't contain other containers - break; - } - } - - // what remains at offset is a text line. add the text to the - // appropriate container. - - first_nonspace = offset; - while (bchar(ln, first_nonspace) == ' ') { - first_nonspace++; - } - - indent = first_nonspace - offset; - blank = bchar(ln, first_nonspace) == '\n'; - - // block quote lines are never blank as they start with > - // and we don't count blanks in fenced code for purposes of tight/loose - // lists or breaking out of lists. we also don't set last_line_blank - // on an empty list item. - container->last_line_blank = (blank && - container->tag != block_quote && - container->tag != fenced_code && - !(container->tag == list_item && - container->children == NULL && - container->start_line == line_number)); - - block *cont = container; - while (cont->parent) { - cont->parent->last_line_blank = false; - cont = cont->parent; - } - - if (cur != last_matched_container && - container == last_matched_container && - !blank && - cur->tag == paragraph && - blength(cur->string_content) > 0) { - - check(add_line(cur, ln, offset) == 0, "could not add line"); - - } else { // not a lazy continuation - - // finalize any blocks that were not matched and set cur to container: - while (cur != last_matched_container) { - - finalize(cur, line_number); - cur = cur->parent; - check(cur != NULL, "cur is NULL, last_matched_container->tag = %d", - last_matched_container->tag); - - } - - if (container->tag == indented_code) { - - check(add_line(container, ln, offset) == 0, "could not add line"); - - } else if (container->tag == fenced_code) { - - matched = (indent <= 3 - && bchar(ln, first_nonspace) == container->attributes.fenced_code_data.fence_char) - && scan_close_code_fence(ln, first_nonspace, - container->attributes.fenced_code_data.fence_length); - if (matched) { - // if closing fence, don't add line to container; instead, close it: - finalize(container, line_number); - container = container->parent; // back up to parent - } else { - check(add_line(container, ln, offset) == 0, "could not add line"); - } - - } else if (container->tag == html_block) { - - check(add_line(container, ln, offset) == 0, "could not add line"); - - } else if (blank) { - - // ??? do nothing - - } else if (container->tag == atx_header) { - - // chop off trailing ###s...use a scanner? - brtrimws(ln); - int p = blength(ln) - 1; - int numhashes = 0; - // if string ends in #s, remove these: - while (bchar(ln, p) == '#') { - p--; - numhashes++; - } - if (bchar(ln, p) == '\\') { - // the last # was escaped, so we include it. - p++; - numhashes--; - } - check(bdelete(ln, p + 1, numhashes) != BSTR_ERR, - "could not delete final hashes"); - check(add_line(container, ln, first_nonspace) == 0, "could not add line"); - finalize(container, line_number); - container = container->parent; - - } else if (accepts_lines(container->tag)) { - - check(add_line(container, ln, first_nonspace) == 0, "could not add line"); + block* last_matched_container; + int offset = 0; + int matched = 0; + int lev = 0; + int i; + struct ListData * data = NULL; + bool all_matched = true; + block* container; + block* cur = *curptr; + bool blank = false; + int first_nonspace; + int indent; + + // container starts at the document root. + container = cur->top; + + // for each containing block, try to parse the associated line start. + // bail out on failure: container will point to the last matching block. + + while (container->last_child && container->last_child->open) { + container = container->last_child; + + first_nonspace = offset; + while (gh_buf_at(ln, first_nonspace) == ' ') { + first_nonspace++; + } + + indent = first_nonspace - offset; + blank = gh_buf_at(ln, first_nonspace) == '\n'; + + if (container->tag == block_quote) { + + matched = indent <= 3 && gh_buf_at(ln, first_nonspace) == '>'; + if (matched) { + offset = first_nonspace + 1; + if (gh_buf_at(ln, offset) == ' ') { + offset++; + } + } else { + all_matched = false; + } + + } else if (container->tag == list_item) { + + if (indent >= container->attributes.list_data.marker_offset + + container->attributes.list_data.padding) { + offset += container->attributes.list_data.marker_offset + + container->attributes.list_data.padding; + } else if (blank) { + offset = first_nonspace; + } else { + all_matched = false; + } + + } else if (container->tag == indented_code) { + + if (indent >= CODE_INDENT) { + offset += CODE_INDENT; + } else if (blank) { + offset = first_nonspace; + } else { + all_matched = false; + } + + } else if (container->tag == atx_header || + container->tag == setext_header) { + + // a header can never contain more than one line + all_matched = false; + + } else if (container->tag == fenced_code) { + + // skip optional spaces of fence offset + i = container->attributes.fenced_code_data.fence_offset; + while (i > 0 && gh_buf_at(ln, offset) == ' ') { + offset++; + i--; + } + + } else if (container->tag == html_block) { + + if (blank) { + all_matched = false; + } + + } else if (container->tag == paragraph) { + + if (blank) { + container->last_line_blank = true; + all_matched = false; + } + + } + + if (!all_matched) { + container = container->parent; // back up to last matching block + break; + } + } + + last_matched_container = container; + + // check to see if we've hit 2nd blank line, break out of list: + if (blank && container->last_line_blank) { + break_out_of_lists(&container, line_number); + } + + // unless last matched container is code block, try new container starts: + while (container->tag != fenced_code && container->tag != indented_code && + container->tag != html_block) { + + first_nonspace = offset; + while (gh_buf_at(ln, first_nonspace) == ' ') { + first_nonspace++; + } + + indent = first_nonspace - offset; + blank = gh_buf_at(ln, first_nonspace) == '\n'; + + if (indent >= CODE_INDENT) { + + if (cur->tag != paragraph && !blank) { + offset += CODE_INDENT; + container = add_child(container, indented_code, line_number, offset + 1); + } else { // indent > 4 in lazy line + break; + } + + } else if (gh_buf_at(ln, first_nonspace) == '>') { + + offset = first_nonspace + 1; + // optional following character + if (gh_buf_at(ln, offset) == ' ') { + offset++; + } + container = add_child(container, block_quote, line_number, offset + 1); + + } else if ((matched = scan_atx_header_start(ln, first_nonspace))) { + + offset = first_nonspace + matched; + container = add_child(container, atx_header, line_number, offset + 1); + + int hashpos = gh_buf_strchr(ln, '#', first_nonspace); + assert(hashpos >= 0); + + int level = 0; + while (gh_buf_at(ln, hashpos) == '#') { + level++; + hashpos++; + } + container->attributes.header_level = level; + + } else if ((matched = scan_open_code_fence(ln, first_nonspace))) { + + container = add_child(container, fenced_code, line_number, + first_nonspace + 1); + container->attributes.fenced_code_data.fence_char = gh_buf_at(ln, + first_nonspace); + container->attributes.fenced_code_data.fence_length = matched; + container->attributes.fenced_code_data.fence_offset = + first_nonspace - offset; + offset = first_nonspace + matched; + + } else if ((matched = scan_html_block_tag(ln, first_nonspace))) { + + container = add_child(container, html_block, line_number, + first_nonspace + 1); + // note, we don't adjust offset because the tag is part of the text + + } else if (container->tag == paragraph && + (lev = scan_setext_header_line(ln, first_nonspace)) && + // check that there is only one line in the paragraph: + gh_buf_strrchr(&container->string_content, '\n', + gh_buf_len(&container->string_content) - 2) < 0) { + + container->tag = setext_header; + container->attributes.header_level = lev; + offset = gh_buf_len(ln) - 1; + + } else if (!(container->tag == paragraph && !all_matched) && + (matched = scan_hrule(ln, first_nonspace))) { + + // it's only now that we know the line is not part of a setext header: + container = add_child(container, hrule, line_number, first_nonspace + 1); + finalize(container, line_number); + container = container->parent; + offset = gh_buf_len(ln) - 1; + + } else if ((matched = parse_list_marker(ln, first_nonspace, &data))) { + + // compute padding: + offset = first_nonspace + matched; + i = 0; + while (i <= 5 && gh_buf_at(ln, offset + i) == ' ') { + i++; + } + // i = number of spaces after marker, up to 5 + if (i >= 5 || i < 1 || gh_buf_at(ln, offset) == '\n') { + data->padding = matched + 1; + if (i > 0) { + offset += 1; + } + } else { + data->padding = matched + i; + offset += i; + } + + // check container; if it's a list, see if this list item + // can continue the list; otherwise, create a list container. + + data->marker_offset = indent; + + if (container->tag != list || + !lists_match(container->attributes.list_data, *data)) { + container = add_child(container, list, line_number, + first_nonspace + 1); + container->attributes.list_data = *data; + } + + // add the list item + container = add_child(container, list_item, line_number, + first_nonspace + 1); + container->attributes.list_data = *data; + free(data); + + } else { + break; + } + + if (accepts_lines(container->tag)) { + // if it's a line container, it can't contain other containers + break; + } + } + + // what remains at offset is a text line. add the text to the + // appropriate container. + + first_nonspace = offset; + while (gh_buf_at(ln, first_nonspace) == ' ') { + first_nonspace++; + } + + indent = first_nonspace - offset; + blank = gh_buf_at(ln, first_nonspace) == '\n'; + + // block quote lines are never blank as they start with > + // and we don't count blanks in fenced code for purposes of tight/loose + // lists or breaking out of lists. we also don't set last_line_blank + // on an empty list item. + container->last_line_blank = (blank && + container->tag != block_quote && + container->tag != fenced_code && + !(container->tag == list_item && + container->children == NULL && + container->start_line == line_number)); + + block *cont = container; + while (cont->parent) { + cont->parent->last_line_blank = false; + cont = cont->parent; + } + + if (cur != last_matched_container && + container == last_matched_container && + !blank && + cur->tag == paragraph && + gh_buf_len(&cur->string_content) > 0) { + + add_line(cur, ln, offset); + + } else { // not a lazy continuation + + // finalize any blocks that were not matched and set cur to container: + while (cur != last_matched_container) { + + finalize(cur, line_number); + cur = cur->parent; + assert(cur != NULL); + } + + if (container->tag == indented_code) { + + add_line(container, ln, offset); + + } else if (container->tag == fenced_code) { + + matched = (indent <= 3 + && gh_buf_at(ln, first_nonspace) == container->attributes.fenced_code_data.fence_char) + && scan_close_code_fence(ln, first_nonspace, + container->attributes.fenced_code_data.fence_length); + if (matched) { + // if closing fence, don't add line to container; instead, close it: + finalize(container, line_number); + container = container->parent; // back up to parent + } else { + add_line(container, ln, offset); + } + + } else if (container->tag == html_block) { + + add_line(container, ln, offset); + + } else if (blank) { + + // ??? do nothing + + } else if (container->tag == atx_header) { + // chop off trailing ###s...use a scanner? + gh_buf_trim(ln); + int p = gh_buf_len(ln) - 1; + + // if string ends in #s, remove these: + while (gh_buf_at(ln, p) == '#') { + p--; + } + if (gh_buf_at(ln, p) == '\\') { + // the last # was escaped, so we include it. + p++; + } + + gh_buf_truncate(ln, p + 1); + add_line(container, ln, first_nonspace); + finalize(container, line_number); + container = container->parent; + + } else if (accepts_lines(container->tag)) { + + add_line(container, ln, first_nonspace); + + } else if (container->tag != hrule && container->tag != setext_header) { + + // create paragraph container for line + container = add_child(container, paragraph, line_number, first_nonspace + 1); + add_line(container, ln, first_nonspace); - } else if (container->tag != hrule && container->tag != setext_header) { - - // create paragraph container for line - container = add_child(container, paragraph, line_number, first_nonspace + 1); - check(add_line(container, ln, first_nonspace) == 0, "could not add line"); - - } else { + } else { + assert(false); + } - log_warn("Line %d with container type %d did not match any condition:\n\"%s\"", - line_number, container->tag, ln->data); - - } - *curptr = container; - } - - return 0; - error: - return -1; + *curptr = container; + } } diff --git a/src/bstrlib.c b/src/bstrlib.c deleted file mode 100644 index 1b19dbe..0000000 --- a/src/bstrlib.c +++ /dev/null @@ -1,2979 +0,0 @@ -/* - * This source file is part of the bstring string library. This code was - * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause - * BSD open source license or GPL v2.0. Refer to the accompanying documentation - * for details on usage and license. - */ - -/* - * bstrlib.c - * - * This file is the core module for implementing the bstring functions. - */ - -#if defined (_MSC_VER) -/* These warnings from MSVC++ are totally pointless. */ -# define _CRT_SECURE_NO_WARNINGS -#endif - -#include -#include -#include -#include -#include -#include -#include "bstrlib.h" - -/* Optionally include a mechanism for debugging memory */ - -#if defined(MEMORY_DEBUG) || defined(BSTRLIB_MEMORY_DEBUG) -#include "memdbg.h" -#endif - -#ifndef bstr__alloc -#define bstr__alloc(x) malloc (x) -#endif - -#ifndef bstr__free -#define bstr__free(p) free (p) -#endif - -#ifndef bstr__realloc -#define bstr__realloc(p,x) realloc ((p), (x)) -#endif - -#ifndef bstr__memcpy -#define bstr__memcpy(d,s,l) memcpy ((d), (s), (l)) -#endif - -#ifndef bstr__memmove -#define bstr__memmove(d,s,l) memmove ((d), (s), (l)) -#endif - -#ifndef bstr__memset -#define bstr__memset(d,c,l) memset ((d), (c), (l)) -#endif - -#ifndef bstr__memcmp -#define bstr__memcmp(d,c,l) memcmp ((d), (c), (l)) -#endif - -#ifndef bstr__memchr -#define bstr__memchr(s,c,l) memchr ((s), (c), (l)) -#endif - -/* Just a length safe wrapper for memmove. */ - -#define bBlockCopy(D,S,L) { if ((L) > 0) bstr__memmove ((D),(S),(L)); } - -/* Compute the snapped size for a given requested size. By snapping to powers - of 2 like this, repeated reallocations are avoided. */ -static int snapUpSize (int i) { - if (i < 8) { - i = 8; - } else { - unsigned int j; - j = (unsigned int) i; - - j |= (j >> 1); - j |= (j >> 2); - j |= (j >> 4); - j |= (j >> 8); /* Ok, since int >= 16 bits */ -#if (UINT_MAX != 0xffff) - j |= (j >> 16); /* For 32 bit int systems */ -#if (UINT_MAX > 0xffffffffUL) - j |= (j >> 32); /* For 64 bit int systems */ -#endif -#endif - /* Least power of two greater than i */ - j++; - if ((int) j >= i) i = (int) j; - } - return i; -} - -/* int balloc (bstring b, int len) - * - * Increase the size of the memory backing the bstring b to at least len. - */ -int balloc (bstring b, int olen) { - int len; - if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen <= 0 || - b->mlen < b->slen || olen <= 0) { - return BSTR_ERR; - } - - if (olen >= b->mlen) { - unsigned char * x; - - if ((len = snapUpSize (olen)) <= b->mlen) return BSTR_OK; - - /* Assume probability of a non-moving realloc is 0.125 */ - if (7 * b->mlen < 8 * b->slen) { - - /* If slen is close to mlen in size then use realloc to reduce - the memory defragmentation */ - - reallocStrategy:; - - x = (unsigned char *) bstr__realloc (b->data, (size_t) len); - if (x == NULL) { - - /* Since we failed, try allocating the tighest possible - allocation */ - - if (NULL == (x = (unsigned char *) bstr__realloc (b->data, (size_t) (len = olen)))) { - return BSTR_ERR; - } - } - } else { - - /* If slen is not close to mlen then avoid the penalty of copying - the extra bytes that are allocated, but not considered part of - the string */ - - if (NULL == (x = (unsigned char *) bstr__alloc ((size_t) len))) { - - /* Perhaps there is no available memory for the two - allocations to be in memory at once */ - - goto reallocStrategy; - - } else { - if (b->slen) bstr__memcpy ((char *) x, (char *) b->data, (size_t) b->slen); - bstr__free (b->data); - } - } - b->data = x; - b->mlen = len; - b->data[b->slen] = (unsigned char) '\0'; - } - - return BSTR_OK; -} - -/* int ballocmin (bstring b, int len) - * - * Set the size of the memory backing the bstring b to len or b->slen+1, - * whichever is larger. Note that repeated use of this function can degrade - * performance. - */ -int ballocmin (bstring b, int len) { - unsigned char * s; - - if (b == NULL || b->data == NULL || (b->slen+1) < 0 || b->mlen <= 0 || - b->mlen < b->slen || len <= 0) { - return BSTR_ERR; - } - - if (len < b->slen + 1) len = b->slen + 1; - - if (len != b->mlen) { - s = (unsigned char *) bstr__realloc (b->data, (size_t) len); - if (NULL == s) return BSTR_ERR; - s[b->slen] = (unsigned char) '\0'; - b->data = s; - b->mlen = len; - } - - return BSTR_OK; -} - -/* bstring bfromcstr (const char * str) - * - * Create a bstring which contains the contents of the '\0' terminated char * - * buffer str. - */ -bstring bfromcstr (const char * str) { -bstring b; -int i; -size_t j; - - if (str == NULL) return NULL; - j = (strlen) (str); - i = snapUpSize ((int) (j + (2 - (j != 0)))); - if (i <= (int) j) return NULL; - - b = (bstring) bstr__alloc (sizeof (struct tagbstring)); - if (NULL == b) return NULL; - b->slen = (int) j; - if (NULL == (b->data = (unsigned char *) bstr__alloc (b->mlen = i))) { - bstr__free (b); - return NULL; - } - - bstr__memcpy (b->data, str, j+1); - return b; -} - -/* bstring bfromcstralloc (int mlen, const char * str) - * - * Create a bstring which contains the contents of the '\0' terminated char * - * buffer str. The memory buffer backing the string is at least len - * characters in length. - */ -bstring bfromcstralloc (int mlen, const char * str) { -bstring b; -int i; -size_t j; - - if (str == NULL) return NULL; - j = (strlen) (str); - i = snapUpSize ((int) (j + (2 - (j != 0)))); - if (i <= (int) j) return NULL; - - b = (bstring) bstr__alloc (sizeof (struct tagbstring)); - if (b == NULL) return NULL; - b->slen = (int) j; - if (i < mlen) i = mlen; - - if (NULL == (b->data = (unsigned char *) bstr__alloc (b->mlen = i))) { - bstr__free (b); - return NULL; - } - - bstr__memcpy (b->data, str, j+1); - return b; -} - -/* bstring blk2bstr (const void * blk, int len) - * - * Create a bstring which contains the content of the block blk of length - * len. - */ -bstring blk2bstr (const void * blk, int len) { -bstring b; -int i; - - if (blk == NULL || len < 0) return NULL; - b = (bstring) bstr__alloc (sizeof (struct tagbstring)); - if (b == NULL) return NULL; - b->slen = len; - - i = len + (2 - (len != 0)); - i = snapUpSize (i); - - b->mlen = i; - - b->data = (unsigned char *) bstr__alloc ((size_t) b->mlen); - if (b->data == NULL) { - bstr__free (b); - return NULL; - } - - if (len > 0) bstr__memcpy (b->data, blk, (size_t) len); - b->data[len] = (unsigned char) '\0'; - - return b; -} - -/* char * bstr2cstr (const_bstring s, char z) - * - * Create a '\0' terminated char * buffer which is equal to the contents of - * the bstring s, except that any contained '\0' characters are converted - * to the character in z. This returned value should be freed with a - * bcstrfree () call, by the calling application. - */ -char * bstr2cstr (const_bstring b, char z) { -int i, l; -char * r; - - if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; - l = b->slen; - r = (char *) bstr__alloc ((size_t) (l + 1)); - if (r == NULL) return r; - - for (i=0; i < l; i ++) { - r[i] = (char) ((b->data[i] == '\0') ? z : (char) (b->data[i])); - } - - r[l] = (unsigned char) '\0'; - - return r; -} - -/* int bcstrfree (char * s) - * - * Frees a C-string generated by bstr2cstr (). This is normally unnecessary - * since it just wraps a call to bstr__free (), however, if bstr__alloc () - * and bstr__free () have been redefined as a macros within the bstrlib - * module (via defining them in memdbg.h after defining - * BSTRLIB_MEMORY_DEBUG) with some difference in behaviour from the std - * library functions, then this allows a correct way of freeing the memory - * that allows higher level code to be independent from these macro - * redefinitions. - */ -int bcstrfree (char * s) { - if (s) { - bstr__free (s); - return BSTR_OK; - } - return BSTR_ERR; -} - -/* int bconcat (bstring b0, const_bstring b1) - * - * Concatenate the bstring b1 to the bstring b0. - */ -int bconcat (bstring b0, const_bstring b1) { -int len, d; -bstring aux = (bstring) b1; - - if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL) return BSTR_ERR; - - d = b0->slen; - len = b1->slen; - if ((d | (b0->mlen - d) | len | (d + len)) < 0) return BSTR_ERR; - - if (b0->mlen <= d + len + 1) { - ptrdiff_t pd = b1->data - b0->data; - if (0 <= pd && pd < b0->mlen) { - if (NULL == (aux = bstrcpy (b1))) return BSTR_ERR; - } - if (balloc (b0, d + len + 1) != BSTR_OK) { - if (aux != b1) bdestroy (aux); - return BSTR_ERR; - } - } - - bBlockCopy (&b0->data[d], &aux->data[0], (size_t) len); - b0->data[d + len] = (unsigned char) '\0'; - b0->slen = d + len; - if (aux != b1) bdestroy (aux); - return BSTR_OK; -} - -/* int bconchar (bstring b, char c) -/ * - * Concatenate the single character c to the bstring b. - */ -int bconchar (bstring b, char c) { -int d; - - if (b == NULL) return BSTR_ERR; - d = b->slen; - if ((d | (b->mlen - d)) < 0 || balloc (b, d + 2) != BSTR_OK) return BSTR_ERR; - b->data[d] = (unsigned char) c; - b->data[d + 1] = (unsigned char) '\0'; - b->slen++; - return BSTR_OK; -} - -/* int bcatcstr (bstring b, const char * s) - * - * Concatenate a char * string to a bstring. - */ -int bcatcstr (bstring b, const char * s) { -char * d; -int i, l; - - if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen < b->slen - || b->mlen <= 0 || s == NULL) return BSTR_ERR; - - /* Optimistically concatenate directly */ - l = b->mlen - b->slen; - d = (char *) &b->data[b->slen]; - for (i=0; i < l; i++) { - if ((*d++ = *s++) == '\0') { - b->slen += i; - return BSTR_OK; - } - } - b->slen += i; - - /* Need to explicitely resize and concatenate tail */ - return bcatblk (b, (const void *) s, (int) strlen (s)); -} - -/* int bcatblk (bstring b, const void * s, int len) - * - * Concatenate a fixed length buffer to a bstring. - */ -int bcatblk (bstring b, const void * s, int len) { -int nl; - - if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen < b->slen - || b->mlen <= 0 || s == NULL || len < 0) return BSTR_ERR; - - if (0 > (nl = b->slen + len)) return BSTR_ERR; /* Overflow? */ - if (b->mlen <= nl && 0 > balloc (b, nl + 1)) return BSTR_ERR; - - bBlockCopy (&b->data[b->slen], s, (size_t) len); - b->slen = nl; - b->data[nl] = (unsigned char) '\0'; - return BSTR_OK; -} - -/* bstring bstrcpy (const_bstring b) - * - * Create a copy of the bstring b. - */ -bstring bstrcpy (const_bstring b) { -bstring b0; -int i,j; - - /* Attempted to copy an invalid string? */ - if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; - - b0 = (bstring) bstr__alloc (sizeof (struct tagbstring)); - if (b0 == NULL) { - /* Unable to allocate memory for string header */ - return NULL; - } - - i = b->slen; - j = snapUpSize (i + 1); - - b0->data = (unsigned char *) bstr__alloc (j); - if (b0->data == NULL) { - j = i + 1; - b0->data = (unsigned char *) bstr__alloc (j); - if (b0->data == NULL) { - /* Unable to allocate memory for string data */ - bstr__free (b0); - return NULL; - } - } - - b0->mlen = j; - b0->slen = i; - - if (i) bstr__memcpy ((char *) b0->data, (char *) b->data, i); - b0->data[b0->slen] = (unsigned char) '\0'; - - return b0; -} - -/* int bassign (bstring a, const_bstring b) - * - * Overwrite the string a with the contents of string b. - */ -int bassign (bstring a, const_bstring b) { - if (b == NULL || b->data == NULL || b->slen < 0) - return BSTR_ERR; - if (b->slen != 0) { - if (balloc (a, b->slen) != BSTR_OK) return BSTR_ERR; - bstr__memmove (a->data, b->data, b->slen); - } else { - if (a == NULL || a->data == NULL || a->mlen < a->slen || - a->slen < 0 || a->mlen == 0) - return BSTR_ERR; - } - a->data[b->slen] = (unsigned char) '\0'; - a->slen = b->slen; - return BSTR_OK; -} - -/* int bassignmidstr (bstring a, const_bstring b, int left, int len) - * - * Overwrite the string a with the middle of contents of string b - * starting from position left and running for a length len. left and - * len are clamped to the ends of b as with the function bmidstr. - */ -int bassignmidstr (bstring a, const_bstring b, int left, int len) { - if (b == NULL || b->data == NULL || b->slen < 0) - return BSTR_ERR; - - if (left < 0) { - len += left; - left = 0; - } - - if (len > b->slen - left) len = b->slen - left; - - if (a == NULL || a->data == NULL || a->mlen < a->slen || - a->slen < 0 || a->mlen == 0) - return BSTR_ERR; - - if (len > 0) { - if (balloc (a, len) != BSTR_OK) return BSTR_ERR; - bstr__memmove (a->data, b->data + left, len); - a->slen = len; - } else { - a->slen = 0; - } - a->data[a->slen] = (unsigned char) '\0'; - return BSTR_OK; -} - -/* int bassigncstr (bstring a, const char * str) - * - * Overwrite the string a with the contents of char * string str. Note that - * the bstring a must be a well defined and writable bstring. If an error - * occurs BSTR_ERR is returned however a may be partially overwritten. - */ -int bassigncstr (bstring a, const char * str) { -int i; -size_t len; - if (a == NULL || a->data == NULL || a->mlen < a->slen || - a->slen < 0 || a->mlen == 0 || NULL == str) - return BSTR_ERR; - - for (i=0; i < a->mlen; i++) { - if ('\0' == (a->data[i] = str[i])) { - a->slen = i; - return BSTR_OK; - } - } - - a->slen = i; - len = strlen (str + i); - if (len > INT_MAX || i + len + 1 > INT_MAX || - 0 > balloc (a, (int) (i + len + 1))) return BSTR_ERR; - bBlockCopy (a->data + i, str + i, (size_t) len + 1); - a->slen += (int) len; - return BSTR_OK; -} - -/* int bassignblk (bstring a, const void * s, int len) - * - * Overwrite the string a with the contents of the block (s, len). Note that - * the bstring a must be a well defined and writable bstring. If an error - * occurs BSTR_ERR is returned and a is not overwritten. - */ -int bassignblk (bstring a, const void * s, int len) { - if (a == NULL || a->data == NULL || a->mlen < a->slen || - a->slen < 0 || a->mlen == 0 || NULL == s || len + 1 < 1) - return BSTR_ERR; - if (len + 1 > a->mlen && 0 > balloc (a, len + 1)) return BSTR_ERR; - bBlockCopy (a->data, s, (size_t) len); - a->data[len] = (unsigned char) '\0'; - a->slen = len; - return BSTR_OK; -} - -/* int btrunc (bstring b, int n) - * - * Truncate the bstring to at most n characters. - */ -int btrunc (bstring b, int n) { - if (n < 0 || b == NULL || b->data == NULL || b->mlen < b->slen || - b->slen < 0 || b->mlen <= 0) return BSTR_ERR; - if (b->slen > n) { - b->slen = n; - b->data[n] = (unsigned char) '\0'; - } - return BSTR_OK; -} - -#define upcase(c) (toupper ((unsigned char) c)) -#define downcase(c) (tolower ((unsigned char) c)) -#define wspace(c) (isspace ((unsigned char) c)) - -/* int btoupper (bstring b) - * - * Convert contents of bstring to upper case. - */ -int btoupper (bstring b) { -int i, len; - if (b == NULL || b->data == NULL || b->mlen < b->slen || - b->slen < 0 || b->mlen <= 0) return BSTR_ERR; - for (i=0, len = b->slen; i < len; i++) { - b->data[i] = (unsigned char) upcase (b->data[i]); - } - return BSTR_OK; -} - -/* int btolower (bstring b) - * - * Convert contents of bstring to lower case. - */ -int btolower (bstring b) { -int i, len; - if (b == NULL || b->data == NULL || b->mlen < b->slen || - b->slen < 0 || b->mlen <= 0) return BSTR_ERR; - for (i=0, len = b->slen; i < len; i++) { - b->data[i] = (unsigned char) downcase (b->data[i]); - } - return BSTR_OK; -} - -/* int bstricmp (const_bstring b0, const_bstring b1) - * - * Compare two strings without differentiating between case. The return - * value is the difference of the values of the characters where the two - * strings first differ after lower case transformation, otherwise 0 is - * returned indicating that the strings are equal. If the lengths are - * different, then a difference from 0 is given, but if the first extra - * character is '\0', then it is taken to be the value UCHAR_MAX+1. - */ -int bstricmp (const_bstring b0, const_bstring b1) { -int i, v, n; - - if (bdata (b0) == NULL || b0->slen < 0 || - bdata (b1) == NULL || b1->slen < 0) return SHRT_MIN; - if ((n = b0->slen) > b1->slen) n = b1->slen; - else if (b0->slen == b1->slen && b0->data == b1->data) return BSTR_OK; - - for (i = 0; i < n; i ++) { - v = (char) downcase (b0->data[i]) - - (char) downcase (b1->data[i]); - if (0 != v) return v; - } - - if (b0->slen > n) { - v = (char) downcase (b0->data[n]); - if (v) return v; - return UCHAR_MAX + 1; - } - if (b1->slen > n) { - v = - (char) downcase (b1->data[n]); - if (v) return v; - return - (int) (UCHAR_MAX + 1); - } - return BSTR_OK; -} - -/* int bstrnicmp (const_bstring b0, const_bstring b1, int n) - * - * Compare two strings without differentiating between case for at most n - * characters. If the position where the two strings first differ is - * before the nth position, the return value is the difference of the values - * of the characters, otherwise 0 is returned. If the lengths are different - * and less than n characters, then a difference from 0 is given, but if the - * first extra character is '\0', then it is taken to be the value - * UCHAR_MAX+1. - */ -int bstrnicmp (const_bstring b0, const_bstring b1, int n) { -int i, v, m; - - if (bdata (b0) == NULL || b0->slen < 0 || - bdata (b1) == NULL || b1->slen < 0 || n < 0) return SHRT_MIN; - m = n; - if (m > b0->slen) m = b0->slen; - if (m > b1->slen) m = b1->slen; - - if (b0->data != b1->data) { - for (i = 0; i < m; i ++) { - v = (char) downcase (b0->data[i]); - v -= (char) downcase (b1->data[i]); - if (v != 0) return b0->data[i] - b1->data[i]; - } - } - - if (n == m || b0->slen == b1->slen) return BSTR_OK; - - if (b0->slen > m) { - v = (char) downcase (b0->data[m]); - if (v) return v; - return UCHAR_MAX + 1; - } - - v = - (char) downcase (b1->data[m]); - if (v) return v; - return - (int) (UCHAR_MAX + 1); -} - -/* int biseqcaseless (const_bstring b0, const_bstring b1) - * - * Compare two strings for equality without differentiating between case. - * If the strings differ other than in case, 0 is returned, if the strings - * are the same, 1 is returned, if there is an error, -1 is returned. If - * the length of the strings are different, this function is O(1). '\0' - * termination characters are not treated in any special way. - */ -int biseqcaseless (const_bstring b0, const_bstring b1) { -int i, n; - - if (bdata (b0) == NULL || b0->slen < 0 || - bdata (b1) == NULL || b1->slen < 0) return BSTR_ERR; - if (b0->slen != b1->slen) return BSTR_OK; - if (b0->data == b1->data || b0->slen == 0) return 1; - for (i=0, n=b0->slen; i < n; i++) { - if (b0->data[i] != b1->data[i]) { - unsigned char c = (unsigned char) downcase (b0->data[i]); - if (c != (unsigned char) downcase (b1->data[i])) return 0; - } - } - return 1; -} - -/* int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len) - * - * Compare beginning of string b0 with a block of memory of length len - * without differentiating between case for equality. If the beginning of b0 - * differs from the memory block other than in case (or if b0 is too short), - * 0 is returned, if the strings are the same, 1 is returned, if there is an - * error, -1 is returned. '\0' characters are not treated in any special - * way. - */ -int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len) { -int i; - - if (bdata (b0) == NULL || b0->slen < 0 || NULL == blk || len < 0) - return BSTR_ERR; - if (b0->slen < len) return BSTR_OK; - if (b0->data == (const unsigned char *) blk || len == 0) return 1; - - for (i = 0; i < len; i ++) { - if (b0->data[i] != ((const unsigned char *) blk)[i]) { - if (downcase (b0->data[i]) != - downcase (((const unsigned char *) blk)[i])) return 0; - } - } - return 1; -} - -/* - * int bltrimws (bstring b) - * - * Delete whitespace contiguous from the left end of the string. - */ -int bltrimws (bstring b) { -int i, len; - - if (b == NULL || b->data == NULL || b->mlen < b->slen || - b->slen < 0 || b->mlen <= 0) return BSTR_ERR; - - for (len = b->slen, i = 0; i < len; i++) { - if (!wspace (b->data[i])) { - return bdelete (b, 0, i); - } - } - - b->data[0] = (unsigned char) '\0'; - b->slen = 0; - return BSTR_OK; -} - -/* - * int brtrimws (bstring b) - * - * Delete whitespace contiguous from the right end of the string. - */ -int brtrimws (bstring b) { -int i; - - if (b == NULL || b->data == NULL || b->mlen < b->slen || - b->slen < 0 || b->mlen <= 0) return BSTR_ERR; - - for (i = b->slen - 1; i >= 0; i--) { - if (!wspace (b->data[i])) { - if (b->mlen > i) b->data[i+1] = (unsigned char) '\0'; - b->slen = i + 1; - return BSTR_OK; - } - } - - b->data[0] = (unsigned char) '\0'; - b->slen = 0; - return BSTR_OK; -} - -/* - * int btrimws (bstring b) - * - * Delete whitespace contiguous from both ends of the string. - */ -int btrimws (bstring b) { -int i, j; - - if (b == NULL || b->data == NULL || b->mlen < b->slen || - b->slen < 0 || b->mlen <= 0) return BSTR_ERR; - - for (i = b->slen - 1; i >= 0; i--) { - if (!wspace (b->data[i])) { - if (b->mlen > i) b->data[i+1] = (unsigned char) '\0'; - b->slen = i + 1; - for (j = 0; wspace (b->data[j]); j++) {} - return bdelete (b, 0, j); - } - } - - b->data[0] = (unsigned char) '\0'; - b->slen = 0; - return BSTR_OK; -} - -/* int biseq (const_bstring b0, const_bstring b1) - * - * Compare the string b0 and b1. If the strings differ, 0 is returned, if - * the strings are the same, 1 is returned, if there is an error, -1 is - * returned. If the length of the strings are different, this function is - * O(1). '\0' termination characters are not treated in any special way. - */ -int biseq (const_bstring b0, const_bstring b1) { - if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL || - b0->slen < 0 || b1->slen < 0) return BSTR_ERR; - if (b0->slen != b1->slen) return BSTR_OK; - if (b0->data == b1->data || b0->slen == 0) return 1; - return !bstr__memcmp (b0->data, b1->data, b0->slen); -} - -/* int bisstemeqblk (const_bstring b0, const void * blk, int len) - * - * Compare beginning of string b0 with a block of memory of length len for - * equality. If the beginning of b0 differs from the memory block (or if b0 - * is too short), 0 is returned, if the strings are the same, 1 is returned, - * if there is an error, -1 is returned. '\0' characters are not treated in - * any special way. - */ -int bisstemeqblk (const_bstring b0, const void * blk, int len) { -int i; - - if (bdata (b0) == NULL || b0->slen < 0 || NULL == blk || len < 0) - return BSTR_ERR; - if (b0->slen < len) return BSTR_OK; - if (b0->data == (const unsigned char *) blk || len == 0) return 1; - - for (i = 0; i < len; i ++) { - if (b0->data[i] != ((const unsigned char *) blk)[i]) return BSTR_OK; - } - return 1; -} - -/* int biseqcstr (const_bstring b, const char *s) - * - * Compare the bstring b and char * string s. The C string s must be '\0' - * terminated at exactly the length of the bstring b, and the contents - * between the two must be identical with the bstring b with no '\0' - * characters for the two contents to be considered equal. This is - * equivalent to the condition that their current contents will be always be - * equal when comparing them in the same format after converting one or the - * other. If the strings are equal 1 is returned, if they are unequal 0 is - * returned and if there is a detectable error BSTR_ERR is returned. - */ -int biseqcstr (const_bstring b, const char * s) { -int i; - if (b == NULL || s == NULL || b->data == NULL || b->slen < 0) return BSTR_ERR; - for (i=0; i < b->slen; i++) { - if (s[i] == '\0' || b->data[i] != (unsigned char) s[i]) return BSTR_OK; - } - return s[i] == '\0'; -} - -/* int biseqcstrcaseless (const_bstring b, const char *s) - * - * Compare the bstring b and char * string s. The C string s must be '\0' - * terminated at exactly the length of the bstring b, and the contents - * between the two must be identical except for case with the bstring b with - * no '\0' characters for the two contents to be considered equal. This is - * equivalent to the condition that their current contents will be always be - * equal ignoring case when comparing them in the same format after - * converting one or the other. If the strings are equal, except for case, - * 1 is returned, if they are unequal regardless of case 0 is returned and - * if there is a detectable error BSTR_ERR is returned. - */ -int biseqcstrcaseless (const_bstring b, const char * s) { -int i; - if (b == NULL || s == NULL || b->data == NULL || b->slen < 0) return BSTR_ERR; - for (i=0; i < b->slen; i++) { - if (s[i] == '\0' || - (b->data[i] != (unsigned char) s[i] && - downcase (b->data[i]) != (unsigned char) downcase (s[i]))) - return BSTR_OK; - } - return s[i] == '\0'; -} - -/* int bstrcmp (const_bstring b0, const_bstring b1) - * - * Compare the string b0 and b1. If there is an error, SHRT_MIN is returned, - * otherwise a value less than or greater than zero, indicating that the - * string pointed to by b0 is lexicographically less than or greater than - * the string pointed to by b1 is returned. If the the string lengths are - * unequal but the characters up until the length of the shorter are equal - * then a value less than, or greater than zero, indicating that the string - * pointed to by b0 is shorter or longer than the string pointed to by b1 is - * returned. 0 is returned if and only if the two strings are the same. If - * the length of the strings are different, this function is O(n). Like its - * standard C library counter part strcmp, the comparison does not proceed - * past any '\0' termination characters encountered. - */ -int bstrcmp (const_bstring b0, const_bstring b1) { -int i, v, n; - - if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL || - b0->slen < 0 || b1->slen < 0) return SHRT_MIN; - n = b0->slen; if (n > b1->slen) n = b1->slen; - if (b0->slen == b1->slen && (b0->data == b1->data || b0->slen == 0)) - return BSTR_OK; - - for (i = 0; i < n; i ++) { - v = ((char) b0->data[i]) - ((char) b1->data[i]); - if (v != 0) return v; - if (b0->data[i] == (unsigned char) '\0') return BSTR_OK; - } - - if (b0->slen > n) return 1; - if (b1->slen > n) return -1; - return BSTR_OK; -} - -/* int bstrncmp (const_bstring b0, const_bstring b1, int n) - * - * Compare the string b0 and b1 for at most n characters. If there is an - * error, SHRT_MIN is returned, otherwise a value is returned as if b0 and - * b1 were first truncated to at most n characters then bstrcmp was called - * with these new strings are paremeters. If the length of the strings are - * different, this function is O(n). Like its standard C library counter - * part strcmp, the comparison does not proceed past any '\0' termination - * characters encountered. - */ -int bstrncmp (const_bstring b0, const_bstring b1, int n) { -int i, v, m; - - if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL || - b0->slen < 0 || b1->slen < 0) return SHRT_MIN; - m = n; - if (m > b0->slen) m = b0->slen; - if (m > b1->slen) m = b1->slen; - - if (b0->data != b1->data) { - for (i = 0; i < m; i ++) { - v = ((char) b0->data[i]) - ((char) b1->data[i]); - if (v != 0) return v; - if (b0->data[i] == (unsigned char) '\0') return BSTR_OK; - } - } - - if (n == m || b0->slen == b1->slen) return BSTR_OK; - - if (b0->slen > m) return 1; - return -1; -} - -/* bstring bmidstr (const_bstring b, int left, int len) - * - * Create a bstring which is the substring of b starting from position left - * and running for a length len (clamped by the end of the bstring b.) If - * b is detectably invalid, then NULL is returned. The section described - * by (left, len) is clamped to the boundaries of b. - */ -bstring bmidstr (const_bstring b, int left, int len) { - - if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; - - if (left < 0) { - len += left; - left = 0; - } - - if (len > b->slen - left) len = b->slen - left; - - if (len <= 0) return bfromcstr (""); - return blk2bstr (b->data + left, len); -} - -/* int bdelete (bstring b, int pos, int len) - * - * Removes characters from pos to pos+len-1 inclusive and shifts the tail of - * the bstring starting from pos+len to pos. len must be positive for this - * call to have any effect. The section of the string described by (pos, - * len) is clamped to boundaries of the bstring b. - */ -int bdelete (bstring b, int pos, int len) { - /* Clamp to left side of bstring */ - if (pos < 0) { - len += pos; - pos = 0; - } - - if (len < 0 || b == NULL || b->data == NULL || b->slen < 0 || - b->mlen < b->slen || b->mlen <= 0) - return BSTR_ERR; - if (len > 0 && pos < b->slen) { - if (pos + len >= b->slen) { - b->slen = pos; - } else { - bBlockCopy ((char *) (b->data + pos), - (char *) (b->data + pos + len), - b->slen - (pos+len)); - b->slen -= len; - } - b->data[b->slen] = (unsigned char) '\0'; - } - return BSTR_OK; -} - -/* int bdestroy (bstring b) - * - * Free up the bstring. Note that if b is detectably invalid or not writable - * then no action is performed and BSTR_ERR is returned. Like a freed memory - * allocation, dereferences, writes or any other action on b after it has - * been bdestroyed is undefined. - */ -int bdestroy (bstring b) { - if (b == NULL || b->slen < 0 || b->mlen <= 0 || b->mlen < b->slen || - b->data == NULL) - return BSTR_ERR; - - bstr__free (b->data); - - /* In case there is any stale usage, there is one more chance to - notice this error. */ - - b->slen = -1; - b->mlen = -__LINE__; - b->data = NULL; - - bstr__free (b); - return BSTR_OK; -} - -/* int binstr (const_bstring b1, int pos, const_bstring b2) - * - * Search for the bstring b2 in b1 starting from position pos, and searching - * forward. If it is found then return with the first position where it is - * found, otherwise return BSTR_ERR. Note that this is just a brute force - * string searcher that does not attempt clever things like the Boyer-Moore - * search algorithm. Because of this there are many degenerate cases where - * this can take much longer than it needs to. - */ -int binstr (const_bstring b1, int pos, const_bstring b2) { -int j, ii, ll, lf; -unsigned char * d0; -unsigned char c0; -register unsigned char * d1; -register unsigned char c1; -register int i; - - if (b1 == NULL || b1->data == NULL || b1->slen < 0 || - b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; - if (b1->slen == pos) return (b2->slen == 0)?pos:BSTR_ERR; - if (b1->slen < pos || pos < 0) return BSTR_ERR; - if (b2->slen == 0) return pos; - - /* No space to find such a string? */ - if ((lf = b1->slen - b2->slen + 1) <= pos) return BSTR_ERR; - - /* An obvious alias case */ - if (b1->data == b2->data && pos == 0) return 0; - - i = pos; - - d0 = b2->data; - d1 = b1->data; - ll = b2->slen; - - /* Peel off the b2->slen == 1 case */ - c0 = d0[0]; - if (1 == ll) { - for (;i < lf; i++) if (c0 == d1[i]) return i; - return BSTR_ERR; - } - - c1 = c0; - j = 0; - lf = b1->slen - 1; - - ii = -1; - if (i < lf) do { - /* Unrolled current character test */ - if (c1 != d1[i]) { - if (c1 != d1[1+i]) { - i += 2; - continue; - } - i++; - } - - /* Take note if this is the start of a potential match */ - if (0 == j) ii = i; - - /* Shift the test character down by one */ - j++; - i++; - - /* If this isn't past the last character continue */ - if (j < ll) { - c1 = d0[j]; - continue; - } - - N0:; - - /* If no characters mismatched, then we matched */ - if (i == ii+j) return ii; - - /* Shift back to the beginning */ - i -= j; - j = 0; - c1 = c0; - } while (i < lf); - - /* Deal with last case if unrolling caused a misalignment */ - if (i == lf && ll == j+1 && c1 == d1[i]) goto N0; - - return BSTR_ERR; -} - -/* int binstrr (const_bstring b1, int pos, const_bstring b2) - * - * Search for the bstring b2 in b1 starting from position pos, and searching - * backward. If it is found then return with the first position where it is - * found, otherwise return BSTR_ERR. Note that this is just a brute force - * string searcher that does not attempt clever things like the Boyer-Moore - * search algorithm. Because of this there are many degenerate cases where - * this can take much longer than it needs to. - */ -int binstrr (const_bstring b1, int pos, const_bstring b2) { -int j, i, l; -unsigned char * d0, * d1; - - if (b1 == NULL || b1->data == NULL || b1->slen < 0 || - b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; - if (b1->slen == pos && b2->slen == 0) return pos; - if (b1->slen < pos || pos < 0) return BSTR_ERR; - if (b2->slen == 0) return pos; - - /* Obvious alias case */ - if (b1->data == b2->data && pos == 0 && b2->slen <= b1->slen) return 0; - - i = pos; - if ((l = b1->slen - b2->slen) < 0) return BSTR_ERR; - - /* If no space to find such a string then snap back */ - if (l + 1 <= i) i = l; - j = 0; - - d0 = b2->data; - d1 = b1->data; - l = b2->slen; - - for (;;) { - if (d0[j] == d1[i + j]) { - j ++; - if (j >= l) return i; - } else { - i --; - if (i < 0) break; - j=0; - } - } - - return BSTR_ERR; -} - -/* int binstrcaseless (const_bstring b1, int pos, const_bstring b2) - * - * Search for the bstring b2 in b1 starting from position pos, and searching - * forward but without regard to case. If it is found then return with the - * first position where it is found, otherwise return BSTR_ERR. Note that - * this is just a brute force string searcher that does not attempt clever - * things like the Boyer-Moore search algorithm. Because of this there are - * many degenerate cases where this can take much longer than it needs to. - */ -int binstrcaseless (const_bstring b1, int pos, const_bstring b2) { -int j, i, l, ll; -unsigned char * d0, * d1; - - if (b1 == NULL || b1->data == NULL || b1->slen < 0 || - b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; - if (b1->slen == pos) return (b2->slen == 0)?pos:BSTR_ERR; - if (b1->slen < pos || pos < 0) return BSTR_ERR; - if (b2->slen == 0) return pos; - - l = b1->slen - b2->slen + 1; - - /* No space to find such a string? */ - if (l <= pos) return BSTR_ERR; - - /* An obvious alias case */ - if (b1->data == b2->data && pos == 0) return BSTR_OK; - - i = pos; - j = 0; - - d0 = b2->data; - d1 = b1->data; - ll = b2->slen; - - for (;;) { - if (d0[j] == d1[i + j] || downcase (d0[j]) == downcase (d1[i + j])) { - j ++; - if (j >= ll) return i; - } else { - i ++; - if (i >= l) break; - j=0; - } - } - - return BSTR_ERR; -} - -/* int binstrrcaseless (const_bstring b1, int pos, const_bstring b2) - * - * Search for the bstring b2 in b1 starting from position pos, and searching - * backward but without regard to case. If it is found then return with the - * first position where it is found, otherwise return BSTR_ERR. Note that - * this is just a brute force string searcher that does not attempt clever - * things like the Boyer-Moore search algorithm. Because of this there are - * many degenerate cases where this can take much longer than it needs to. - */ -int binstrrcaseless (const_bstring b1, int pos, const_bstring b2) { -int j, i, l; -unsigned char * d0, * d1; - - if (b1 == NULL || b1->data == NULL || b1->slen < 0 || - b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; - if (b1->slen == pos && b2->slen == 0) return pos; - if (b1->slen < pos || pos < 0) return BSTR_ERR; - if (b2->slen == 0) return pos; - - /* Obvious alias case */ - if (b1->data == b2->data && pos == 0 && b2->slen <= b1->slen) return BSTR_OK; - - i = pos; - if ((l = b1->slen - b2->slen) < 0) return BSTR_ERR; - - /* If no space to find such a string then snap back */ - if (l + 1 <= i) i = l; - j = 0; - - d0 = b2->data; - d1 = b1->data; - l = b2->slen; - - for (;;) { - if (d0[j] == d1[i + j] || downcase (d0[j]) == downcase (d1[i + j])) { - j ++; - if (j >= l) return i; - } else { - i --; - if (i < 0) break; - j=0; - } - } - - return BSTR_ERR; -} - - -/* int bstrchrp (const_bstring b, int c, int pos) - * - * Search for the character c in b forwards from the position pos - * (inclusive). - */ -int bstrchrp (const_bstring b, int c, int pos) { -unsigned char * p; - - if (b == NULL || b->data == NULL || b->slen <= pos || pos < 0) return BSTR_ERR; - p = (unsigned char *) bstr__memchr ((b->data + pos), (unsigned char) c, (b->slen - pos)); - if (p) return (int) (p - b->data); - return BSTR_ERR; -} - -/* int bstrrchrp (const_bstring b, int c, int pos) - * - * Search for the character c in b backwards from the position pos in string - * (inclusive). - */ -int bstrrchrp (const_bstring b, int c, int pos) { -int i; - - if (b == NULL || b->data == NULL || b->slen <= pos || pos < 0) return BSTR_ERR; - for (i=pos; i >= 0; i--) { - if (b->data[i] == (unsigned char) c) return i; - } - return BSTR_ERR; -} - -#if !defined (BSTRLIB_AGGRESSIVE_MEMORY_FOR_SPEED_TRADEOFF) -#define LONG_LOG_BITS_QTY (3) -#define LONG_BITS_QTY (1 << LONG_LOG_BITS_QTY) -#define LONG_TYPE unsigned char - -#define CFCLEN ((1 << CHAR_BIT) / LONG_BITS_QTY) -struct charField { LONG_TYPE content[CFCLEN]; }; -#define testInCharField(cf,c) ((cf)->content[(c) >> LONG_LOG_BITS_QTY] & (((long)1) << ((c) & (LONG_BITS_QTY-1)))) -#define setInCharField(cf,idx) { \ - unsigned int c = (unsigned int) (idx); \ - (cf)->content[c >> LONG_LOG_BITS_QTY] |= (LONG_TYPE) (1ul << (c & (LONG_BITS_QTY-1))); \ -} - -#else - -#define CFCLEN (1 << CHAR_BIT) -struct charField { unsigned char content[CFCLEN]; }; -#define testInCharField(cf,c) ((cf)->content[(unsigned char) (c)]) -#define setInCharField(cf,idx) (cf)->content[(unsigned int) (idx)] = ~0 - -#endif - -/* Convert a bstring to charField */ -static int buildCharField (struct charField * cf, const_bstring b) { -int i; - if (b == NULL || b->data == NULL || b->slen <= 0) return BSTR_ERR; - memset ((void *) cf->content, 0, sizeof (struct charField)); - for (i=0; i < b->slen; i++) { - setInCharField (cf, b->data[i]); - } - return BSTR_OK; -} - -static void invertCharField (struct charField * cf) { -int i; - for (i=0; i < CFCLEN; i++) cf->content[i] = ~cf->content[i]; -} - -/* Inner engine for binchr */ -static int binchrCF (const unsigned char * data, int len, int pos, const struct charField * cf) { -int i; - for (i=pos; i < len; i++) { - unsigned char c = (unsigned char) data[i]; - if (testInCharField (cf, c)) return i; - } - return BSTR_ERR; -} - -/* int binchr (const_bstring b0, int pos, const_bstring b1); - * - * Search for the first position in b0 starting from pos or after, in which - * one of the characters in b1 is found and return it. If such a position - * does not exist in b0, then BSTR_ERR is returned. - */ -int binchr (const_bstring b0, int pos, const_bstring b1) { -struct charField chrs; - if (pos < 0 || b0 == NULL || b0->data == NULL || - b0->slen <= pos) return BSTR_ERR; - if (1 == b1->slen) return bstrchrp (b0, b1->data[0], pos); - if (0 > buildCharField (&chrs, b1)) return BSTR_ERR; - return binchrCF (b0->data, b0->slen, pos, &chrs); -} - -/* Inner engine for binchrr */ -static int binchrrCF (const unsigned char * data, int pos, const struct charField * cf) { -int i; - for (i=pos; i >= 0; i--) { - unsigned int c = (unsigned int) data[i]; - if (testInCharField (cf, c)) return i; - } - return BSTR_ERR; -} - -/* int binchrr (const_bstring b0, int pos, const_bstring b1); - * - * Search for the last position in b0 no greater than pos, in which one of - * the characters in b1 is found and return it. If such a position does not - * exist in b0, then BSTR_ERR is returned. - */ -int binchrr (const_bstring b0, int pos, const_bstring b1) { -struct charField chrs; - if (pos < 0 || b0 == NULL || b0->data == NULL || b1 == NULL || - b0->slen < pos) return BSTR_ERR; - if (pos == b0->slen) pos--; - if (1 == b1->slen) return bstrrchrp (b0, b1->data[0], pos); - if (0 > buildCharField (&chrs, b1)) return BSTR_ERR; - return binchrrCF (b0->data, pos, &chrs); -} - -/* int bninchr (const_bstring b0, int pos, const_bstring b1); - * - * Search for the first position in b0 starting from pos or after, in which - * none of the characters in b1 is found and return it. If such a position - * does not exist in b0, then BSTR_ERR is returned. - */ -int bninchr (const_bstring b0, int pos, const_bstring b1) { -struct charField chrs; - if (pos < 0 || b0 == NULL || b0->data == NULL || - b0->slen <= pos) return BSTR_ERR; - if (buildCharField (&chrs, b1) < 0) return BSTR_ERR; - invertCharField (&chrs); - return binchrCF (b0->data, b0->slen, pos, &chrs); -} - -/* int bninchrr (const_bstring b0, int pos, const_bstring b1); - * - * Search for the last position in b0 no greater than pos, in which none of - * the characters in b1 is found and return it. If such a position does not - * exist in b0, then BSTR_ERR is returned. - */ -int bninchrr (const_bstring b0, int pos, const_bstring b1) { -struct charField chrs; - if (pos < 0 || b0 == NULL || b0->data == NULL || - b0->slen < pos) return BSTR_ERR; - if (pos == b0->slen) pos--; - if (buildCharField (&chrs, b1) < 0) return BSTR_ERR; - invertCharField (&chrs); - return binchrrCF (b0->data, pos, &chrs); -} - -/* int bsetstr (bstring b0, int pos, bstring b1, unsigned char fill) - * - * Overwrite the string b0 starting at position pos with the string b1. If - * the position pos is past the end of b0, then the character "fill" is - * appended as necessary to make up the gap between the end of b0 and pos. - * If b1 is NULL, it behaves as if it were a 0-length string. - */ -int bsetstr (bstring b0, int pos, const_bstring b1, unsigned char fill) { -int d, newlen; -ptrdiff_t pd; -bstring aux = (bstring) b1; - - if (pos < 0 || b0 == NULL || b0->slen < 0 || NULL == b0->data || - b0->mlen < b0->slen || b0->mlen <= 0) return BSTR_ERR; - if (b1 != NULL && (b1->slen < 0 || b1->data == NULL)) return BSTR_ERR; - - d = pos; - - /* Aliasing case */ - if (NULL != aux) { - if ((pd = (ptrdiff_t) (b1->data - b0->data)) >= 0 && pd < (ptrdiff_t) b0->mlen) { - if (NULL == (aux = bstrcpy (b1))) return BSTR_ERR; - } - d += aux->slen; - } - - /* Increase memory size if necessary */ - if (balloc (b0, d + 1) != BSTR_OK) { - if (aux != b1) bdestroy (aux); - return BSTR_ERR; - } - - newlen = b0->slen; - - /* Fill in "fill" character as necessary */ - if (pos > newlen) { - bstr__memset (b0->data + b0->slen, (int) fill, (size_t) (pos - b0->slen)); - newlen = pos; - } - - /* Copy b1 to position pos in b0. */ - if (aux != NULL) { - bBlockCopy ((char *) (b0->data + pos), (char *) aux->data, aux->slen); - if (aux != b1) bdestroy (aux); - } - - /* Indicate the potentially increased size of b0 */ - if (d > newlen) newlen = d; - - b0->slen = newlen; - b0->data[newlen] = (unsigned char) '\0'; - - return BSTR_OK; -} - -/* int binsert (bstring b1, int pos, bstring b2, unsigned char fill) - * - * Inserts the string b2 into b1 at position pos. If the position pos is - * past the end of b1, then the character "fill" is appended as necessary to - * make up the gap between the end of b1 and pos. Unlike bsetstr, binsert - * does not allow b2 to be NULL. - */ -int binsert (bstring b1, int pos, const_bstring b2, unsigned char fill) { -int d, l; -ptrdiff_t pd; -bstring aux = (bstring) b2; - - if (pos < 0 || b1 == NULL || b2 == NULL || b1->slen < 0 || - b2->slen < 0 || b1->mlen < b1->slen || b1->mlen <= 0) return BSTR_ERR; - - /* Aliasing case */ - if ((pd = (ptrdiff_t) (b2->data - b1->data)) >= 0 && pd < (ptrdiff_t) b1->mlen) { - if (NULL == (aux = bstrcpy (b2))) return BSTR_ERR; - } - - /* Compute the two possible end pointers */ - d = b1->slen + aux->slen; - l = pos + aux->slen; - if ((d|l) < 0) return BSTR_ERR; - - if (l > d) { - /* Inserting past the end of the string */ - if (balloc (b1, l + 1) != BSTR_OK) { - if (aux != b2) bdestroy (aux); - return BSTR_ERR; - } - bstr__memset (b1->data + b1->slen, (int) fill, (size_t) (pos - b1->slen)); - b1->slen = l; - } else { - /* Inserting in the middle of the string */ - if (balloc (b1, d + 1) != BSTR_OK) { - if (aux != b2) bdestroy (aux); - return BSTR_ERR; - } - bBlockCopy (b1->data + l, b1->data + pos, d - l); - b1->slen = d; - } - bBlockCopy (b1->data + pos, aux->data, aux->slen); - b1->data[b1->slen] = (unsigned char) '\0'; - if (aux != b2) bdestroy (aux); - return BSTR_OK; -} - -/* int breplace (bstring b1, int pos, int len, bstring b2, - * unsigned char fill) - * - * Replace a section of a string from pos for a length len with the string b2. - * fill is used is pos > b1->slen. - */ -int breplace (bstring b1, int pos, int len, const_bstring b2, - unsigned char fill) { -int pl, ret; -ptrdiff_t pd; -bstring aux = (bstring) b2; - - if (pos < 0 || len < 0 || (pl = pos + len) < 0 || b1 == NULL || - b2 == NULL || b1->data == NULL || b2->data == NULL || - b1->slen < 0 || b2->slen < 0 || b1->mlen < b1->slen || - b1->mlen <= 0) return BSTR_ERR; - - /* Straddles the end? */ - if (pl >= b1->slen) { - if ((ret = bsetstr (b1, pos, b2, fill)) < 0) return ret; - if (pos + b2->slen < b1->slen) { - b1->slen = pos + b2->slen; - b1->data[b1->slen] = (unsigned char) '\0'; - } - return ret; - } - - /* Aliasing case */ - if ((pd = (ptrdiff_t) (b2->data - b1->data)) >= 0 && pd < (ptrdiff_t) b1->slen) { - if (NULL == (aux = bstrcpy (b2))) return BSTR_ERR; - } - - if (aux->slen > len) { - if (balloc (b1, b1->slen + aux->slen - len) != BSTR_OK) { - if (aux != b2) bdestroy (aux); - return BSTR_ERR; - } - } - - if (aux->slen != len) bstr__memmove (b1->data + pos + aux->slen, b1->data + pos + len, b1->slen - (pos + len)); - bstr__memcpy (b1->data + pos, aux->data, aux->slen); - b1->slen += aux->slen - len; - b1->data[b1->slen] = (unsigned char) '\0'; - if (aux != b2) bdestroy (aux); - return BSTR_OK; -} - -/* - * findreplaceengine is used to implement bfindreplace and - * bfindreplacecaseless. It works by breaking the three cases of - * expansion, reduction and replacement, and solving each of these - * in the most efficient way possible. - */ - -typedef int (*instr_fnptr) (const_bstring s1, int pos, const_bstring s2); - -#define INITIAL_STATIC_FIND_INDEX_COUNT 32 - -static int findreplaceengine (bstring b, const_bstring find, const_bstring repl, int pos, instr_fnptr instr) { -int i, ret, slen, mlen, delta, acc; -int * d; -int static_d[INITIAL_STATIC_FIND_INDEX_COUNT+1]; /* This +1 is unnecessary, but it shuts up LINT. */ -ptrdiff_t pd; -bstring auxf = (bstring) find; -bstring auxr = (bstring) repl; - - if (b == NULL || b->data == NULL || find == NULL || - find->data == NULL || repl == NULL || repl->data == NULL || - pos < 0 || find->slen <= 0 || b->mlen < 0 || b->slen > b->mlen || - b->mlen <= 0 || b->slen < 0 || repl->slen < 0) return BSTR_ERR; - if (pos > b->slen - find->slen) return BSTR_OK; - - /* Alias with find string */ - pd = (ptrdiff_t) (find->data - b->data); - if ((ptrdiff_t) (pos - find->slen) < pd && pd < (ptrdiff_t) b->slen) { - if (NULL == (auxf = bstrcpy (find))) return BSTR_ERR; - } - - /* Alias with repl string */ - pd = (ptrdiff_t) (repl->data - b->data); - if ((ptrdiff_t) (pos - repl->slen) < pd && pd < (ptrdiff_t) b->slen) { - if (NULL == (auxr = bstrcpy (repl))) { - if (auxf != find) bdestroy (auxf); - return BSTR_ERR; - } - } - - delta = auxf->slen - auxr->slen; - - /* in-place replacement since find and replace strings are of equal - length */ - if (delta == 0) { - while ((pos = instr (b, pos, auxf)) >= 0) { - bstr__memcpy (b->data + pos, auxr->data, auxr->slen); - pos += auxf->slen; - } - if (auxf != find) bdestroy (auxf); - if (auxr != repl) bdestroy (auxr); - return BSTR_OK; - } - - /* shrinking replacement since auxf->slen > auxr->slen */ - if (delta > 0) { - acc = 0; - - while ((i = instr (b, pos, auxf)) >= 0) { - if (acc && i > pos) - bstr__memmove (b->data + pos - acc, b->data + pos, i - pos); - if (auxr->slen) - bstr__memcpy (b->data + i - acc, auxr->data, auxr->slen); - acc += delta; - pos = i + auxf->slen; - } - - if (acc) { - i = b->slen; - if (i > pos) - bstr__memmove (b->data + pos - acc, b->data + pos, i - pos); - b->slen -= acc; - b->data[b->slen] = (unsigned char) '\0'; - } - - if (auxf != find) bdestroy (auxf); - if (auxr != repl) bdestroy (auxr); - return BSTR_OK; - } - - /* expanding replacement since find->slen < repl->slen. Its a lot - more complicated. This works by first finding all the matches and - storing them to a growable array, then doing at most one resize of - the destination bstring and then performing the direct memory transfers - of the string segment pieces to form the final result. The growable - array of matches uses a deferred doubling reallocing strategy. What - this means is that it starts as a reasonably fixed sized auto array in - the hopes that many if not most cases will never need to grow this - array. But it switches as soon as the bounds of the array will be - exceeded. An extra find result is always appended to this array that - corresponds to the end of the destination string, so slen is checked - against mlen - 1 rather than mlen before resizing. - */ - - mlen = INITIAL_STATIC_FIND_INDEX_COUNT; - d = (int *) static_d; /* Avoid malloc for trivial/initial cases */ - acc = slen = 0; - - while ((pos = instr (b, pos, auxf)) >= 0) { - if (slen >= mlen - 1) { - int sl, *t; - - mlen += mlen; - sl = sizeof (int *) * mlen; - if (static_d == d) d = NULL; /* static_d cannot be realloced */ - if (mlen <= 0 || sl < mlen || NULL == (t = (int *) bstr__realloc (d, sl))) { - ret = BSTR_ERR; - goto done; - } - if (NULL == d) bstr__memcpy (t, static_d, sizeof (static_d)); - d = t; - } - d[slen] = pos; - slen++; - acc -= delta; - pos += auxf->slen; - if (pos < 0 || acc < 0) { - ret = BSTR_ERR; - goto done; - } - } - - /* slen <= INITIAL_STATIC_INDEX_COUNT-1 or mlen-1 here. */ - d[slen] = b->slen; - - if (BSTR_OK == (ret = balloc (b, b->slen + acc + 1))) { - b->slen += acc; - for (i = slen-1; i >= 0; i--) { - int s, l; - s = d[i] + auxf->slen; - l = d[i+1] - s; /* d[slen] may be accessed here. */ - if (l) { - bstr__memmove (b->data + s + acc, b->data + s, l); - } - if (auxr->slen) { - bstr__memmove (b->data + s + acc - auxr->slen, - auxr->data, auxr->slen); - } - acc += delta; - } - b->data[b->slen] = (unsigned char) '\0'; - } - - done:; - if (static_d == d) d = NULL; - bstr__free (d); - if (auxf != find) bdestroy (auxf); - if (auxr != repl) bdestroy (auxr); - return ret; -} - -/* int bfindreplace (bstring b, const_bstring find, const_bstring repl, - * int pos) - * - * Replace all occurrences of a find string with a replace string after a - * given point in a bstring. - */ -int bfindreplace (bstring b, const_bstring find, const_bstring repl, int pos) { - return findreplaceengine (b, find, repl, pos, binstr); -} - -/* int bfindreplacecaseless (bstring b, const_bstring find, const_bstring repl, - * int pos) - * - * Replace all occurrences of a find string, ignoring case, with a replace - * string after a given point in a bstring. - */ -int bfindreplacecaseless (bstring b, const_bstring find, const_bstring repl, int pos) { - return findreplaceengine (b, find, repl, pos, binstrcaseless); -} - -/* int binsertch (bstring b, int pos, int len, unsigned char fill) - * - * Inserts the character fill repeatedly into b at position pos for a - * length len. If the position pos is past the end of b, then the - * character "fill" is appended as necessary to make up the gap between the - * end of b and the position pos + len. - */ -int binsertch (bstring b, int pos, int len, unsigned char fill) { -int d, l, i; - - if (pos < 0 || b == NULL || b->slen < 0 || b->mlen < b->slen || - b->mlen <= 0 || len < 0) return BSTR_ERR; - - /* Compute the two possible end pointers */ - d = b->slen + len; - l = pos + len; - if ((d|l) < 0) return BSTR_ERR; - - if (l > d) { - /* Inserting past the end of the string */ - if (balloc (b, l + 1) != BSTR_OK) return BSTR_ERR; - pos = b->slen; - b->slen = l; - } else { - /* Inserting in the middle of the string */ - if (balloc (b, d + 1) != BSTR_OK) return BSTR_ERR; - for (i = d - 1; i >= l; i--) { - b->data[i] = b->data[i - len]; - } - b->slen = d; - } - - for (i=pos; i < l; i++) b->data[i] = fill; - b->data[b->slen] = (unsigned char) '\0'; - return BSTR_OK; -} - -/* int bpattern (bstring b, int len) - * - * Replicate the bstring, b in place, end to end repeatedly until it - * surpasses len characters, then chop the result to exactly len characters. - * This function operates in-place. The function will return with BSTR_ERR - * if b is NULL or of length 0, otherwise BSTR_OK is returned. - */ -int bpattern (bstring b, int len) { -int i, d; - - d = blength (b); - if (d <= 0 || len < 0 || balloc (b, len + 1) != BSTR_OK) return BSTR_ERR; - if (len > 0) { - if (d == 1) return bsetstr (b, len, NULL, b->data[0]); - for (i = d; i < len; i++) b->data[i] = b->data[i - d]; - } - b->data[len] = (unsigned char) '\0'; - b->slen = len; - return BSTR_OK; -} - -#define BS_BUFF_SZ (1024) - -/* int breada (bstring b, bNread readPtr, void * parm) - * - * Use a finite buffer fread-like function readPtr to concatenate to the - * bstring b the entire contents of file-like source data in a roughly - * efficient way. - */ -int breada (bstring b, bNread readPtr, void * parm) { -int i, l, n; - - if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen || - b->mlen <= 0 || readPtr == NULL) return BSTR_ERR; - - i = b->slen; - for (n=i+16; ; n += ((n < BS_BUFF_SZ) ? n : BS_BUFF_SZ)) { - if (BSTR_OK != balloc (b, n + 1)) return BSTR_ERR; - l = (int) readPtr ((void *) (b->data + i), 1, n - i, parm); - i += l; - b->slen = i; - if (i < n) break; - } - - b->data[i] = (unsigned char) '\0'; - return BSTR_OK; -} - -/* bstring bread (bNread readPtr, void * parm) - * - * Use a finite buffer fread-like function readPtr to create a bstring - * filled with the entire contents of file-like source data in a roughly - * efficient way. - */ -bstring bread (bNread readPtr, void * parm) { -bstring buff; - - if (0 > breada (buff = bfromcstr (""), readPtr, parm)) { - bdestroy (buff); - return NULL; - } - return buff; -} - -/* int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator) - * - * Use an fgetc-like single character stream reading function (getcPtr) to - * obtain a sequence of characters which are concatenated to the end of the - * bstring b. The stream read is terminated by the passed in terminator - * parameter. - * - * If getcPtr returns with a negative number, or the terminator character - * (which is appended) is read, then the stream reading is halted and the - * function returns with a partial result in b. If there is an empty partial - * result, 1 is returned. If no characters are read, or there is some other - * detectable error, BSTR_ERR is returned. - */ -int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator) { -int c, d, e; - - if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen || - b->mlen <= 0 || getcPtr == NULL) return BSTR_ERR; - d = 0; - e = b->mlen - 2; - - while ((c = getcPtr (parm)) >= 0) { - if (d > e) { - b->slen = d; - if (balloc (b, d + 2) != BSTR_OK) return BSTR_ERR; - e = b->mlen - 2; - } - b->data[d] = (unsigned char) c; - d++; - if (c == terminator) break; - } - - b->data[d] = (unsigned char) '\0'; - b->slen = d; - - return d == 0 && c < 0; -} - -/* int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator) - * - * Use an fgetc-like single character stream reading function (getcPtr) to - * obtain a sequence of characters which are concatenated to the end of the - * bstring b. The stream read is terminated by the passed in terminator - * parameter. - * - * If getcPtr returns with a negative number, or the terminator character - * (which is appended) is read, then the stream reading is halted and the - * function returns with a partial result concatentated to b. If there is - * an empty partial result, 1 is returned. If no characters are read, or - * there is some other detectable error, BSTR_ERR is returned. - */ -int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator) { -int c, d, e; - - if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen || - b->mlen <= 0 || getcPtr == NULL) return BSTR_ERR; - d = b->slen; - e = b->mlen - 2; - - while ((c = getcPtr (parm)) >= 0) { - if (d > e) { - b->slen = d; - if (balloc (b, d + 2) != BSTR_OK) return BSTR_ERR; - e = b->mlen - 2; - } - b->data[d] = (unsigned char) c; - d++; - if (c == terminator) break; - } - - b->data[d] = (unsigned char) '\0'; - b->slen = d; - - return d == 0 && c < 0; -} - -/* bstring bgets (bNgetc getcPtr, void * parm, char terminator) - * - * Use an fgetc-like single character stream reading function (getcPtr) to - * obtain a sequence of characters which are concatenated into a bstring. - * The stream read is terminated by the passed in terminator function. - * - * If getcPtr returns with a negative number, or the terminator character - * (which is appended) is read, then the stream reading is halted and the - * result obtained thus far is returned. If no characters are read, or - * there is some other detectable error, NULL is returned. - */ -bstring bgets (bNgetc getcPtr, void * parm, char terminator) { -bstring buff; - - if (0 > bgetsa (buff = bfromcstr (""), getcPtr, parm, terminator) || 0 >= buff->slen) { - bdestroy (buff); - buff = NULL; - } - return buff; -} - -struct bStream { - bstring buff; /* Buffer for over-reads */ - void * parm; /* The stream handle for core stream */ - bNread readFnPtr; /* fread compatible fnptr for core stream */ - int isEOF; /* track file's EOF state */ - int maxBuffSz; -}; - -/* struct bStream * bsopen (bNread readPtr, void * parm) - * - * Wrap a given open stream (described by a fread compatible function - * pointer and stream handle) into an open bStream suitable for the bstring - * library streaming functions. - */ -struct bStream * bsopen (bNread readPtr, void * parm) { -struct bStream * s; - - if (readPtr == NULL) return NULL; - s = (struct bStream *) bstr__alloc (sizeof (struct bStream)); - if (s == NULL) return NULL; - s->parm = parm; - s->buff = bfromcstr (""); - s->readFnPtr = readPtr; - s->maxBuffSz = BS_BUFF_SZ; - s->isEOF = 0; - return s; -} - -/* int bsbufflength (struct bStream * s, int sz) - * - * Set the length of the buffer used by the bStream. If sz is zero, the - * length is not set. This function returns with the previous length. - */ -int bsbufflength (struct bStream * s, int sz) { -int oldSz; - if (s == NULL || sz < 0) return BSTR_ERR; - oldSz = s->maxBuffSz; - if (sz > 0) s->maxBuffSz = sz; - return oldSz; -} - -int bseof (const struct bStream * s) { - if (s == NULL || s->readFnPtr == NULL) return BSTR_ERR; - return s->isEOF && (s->buff->slen == 0); -} - -/* void * bsclose (struct bStream * s) - * - * Close the bStream, and return the handle to the stream that was originally - * used to open the given stream. - */ -void * bsclose (struct bStream * s) { -void * parm; - if (s == NULL) return NULL; - s->readFnPtr = NULL; - if (s->buff) bdestroy (s->buff); - s->buff = NULL; - parm = s->parm; - s->parm = NULL; - s->isEOF = 1; - bstr__free (s); - return parm; -} - -/* int bsreadlna (bstring r, struct bStream * s, char terminator) - * - * Read a bstring terminated by the terminator character or the end of the - * stream from the bStream (s) and return it into the parameter r. This - * function may read additional characters from the core stream that are not - * returned, but will be retained for subsequent read operations. - */ -int bsreadlna (bstring r, struct bStream * s, char terminator) { -int i, l, ret, rlo; -char * b; -struct tagbstring x; - - if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0 || - r->slen < 0 || r->mlen < r->slen) return BSTR_ERR; - l = s->buff->slen; - if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; - b = (char *) s->buff->data; - x.data = (unsigned char *) b; - - /* First check if the current buffer holds the terminator */ - b[l] = terminator; /* Set sentinel */ - for (i=0; b[i] != terminator; i++) ; - if (i < l) { - x.slen = i + 1; - ret = bconcat (r, &x); - s->buff->slen = l; - if (BSTR_OK == ret) bdelete (s->buff, 0, i + 1); - return BSTR_OK; - } - - rlo = r->slen; - - /* If not then just concatenate the entire buffer to the output */ - x.slen = l; - if (BSTR_OK != bconcat (r, &x)) return BSTR_ERR; - - /* Perform direct in-place reads into the destination to allow for - the minimum of data-copies */ - for (;;) { - if (BSTR_OK != balloc (r, r->slen + s->maxBuffSz + 1)) return BSTR_ERR; - b = (char *) (r->data + r->slen); - l = (int) s->readFnPtr (b, 1, s->maxBuffSz, s->parm); - if (l <= 0) { - r->data[r->slen] = (unsigned char) '\0'; - s->buff->slen = 0; - s->isEOF = 1; - /* If nothing was read return with an error message */ - return BSTR_ERR & -(r->slen == rlo); - } - b[l] = terminator; /* Set sentinel */ - for (i=0; b[i] != terminator; i++) ; - if (i < l) break; - r->slen += l; - } - - /* Terminator found, push over-read back to buffer */ - i++; - r->slen += i; - s->buff->slen = l - i; - bstr__memcpy (s->buff->data, b + i, l - i); - r->data[r->slen] = (unsigned char) '\0'; - return BSTR_OK; -} - -/* int bsreadlnsa (bstring r, struct bStream * s, bstring term) - * - * Read a bstring terminated by any character in the term string or the end - * of the stream from the bStream (s) and return it into the parameter r. - * This function may read additional characters from the core stream that - * are not returned, but will be retained for subsequent read operations. - */ -int bsreadlnsa (bstring r, struct bStream * s, const_bstring term) { -int i, l, ret, rlo; -unsigned char * b; -struct tagbstring x; -struct charField cf; - - if (s == NULL || s->buff == NULL || r == NULL || term == NULL || - term->data == NULL || r->mlen <= 0 || r->slen < 0 || - r->mlen < r->slen) return BSTR_ERR; - if (term->slen == 1) return bsreadlna (r, s, term->data[0]); - if (term->slen < 1 || buildCharField (&cf, term)) return BSTR_ERR; - - l = s->buff->slen; - if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; - b = (unsigned char *) s->buff->data; - x.data = b; - - /* First check if the current buffer holds the terminator */ - b[l] = term->data[0]; /* Set sentinel */ - for (i=0; !testInCharField (&cf, b[i]); i++) ; - if (i < l) { - x.slen = i + 1; - ret = bconcat (r, &x); - s->buff->slen = l; - if (BSTR_OK == ret) bdelete (s->buff, 0, i + 1); - return BSTR_OK; - } - - rlo = r->slen; - - /* If not then just concatenate the entire buffer to the output */ - x.slen = l; - if (BSTR_OK != bconcat (r, &x)) return BSTR_ERR; - - /* Perform direct in-place reads into the destination to allow for - the minimum of data-copies */ - for (;;) { - if (BSTR_OK != balloc (r, r->slen + s->maxBuffSz + 1)) return BSTR_ERR; - b = (unsigned char *) (r->data + r->slen); - l = (int) s->readFnPtr (b, 1, s->maxBuffSz, s->parm); - if (l <= 0) { - r->data[r->slen] = (unsigned char) '\0'; - s->buff->slen = 0; - s->isEOF = 1; - /* If nothing was read return with an error message */ - return BSTR_ERR & -(r->slen == rlo); - } - - b[l] = term->data[0]; /* Set sentinel */ - for (i=0; !testInCharField (&cf, b[i]); i++) ; - if (i < l) break; - r->slen += l; - } - - /* Terminator found, push over-read back to buffer */ - i++; - r->slen += i; - s->buff->slen = l - i; - bstr__memcpy (s->buff->data, b + i, l - i); - r->data[r->slen] = (unsigned char) '\0'; - return BSTR_OK; -} - -/* int bsreada (bstring r, struct bStream * s, int n) - * - * Read a bstring of length n (or, if it is fewer, as many bytes as is - * remaining) from the bStream. This function may read additional - * characters from the core stream that are not returned, but will be - * retained for subsequent read operations. This function will not read - * additional characters from the core stream beyond virtual stream pointer. - */ -int bsreada (bstring r, struct bStream * s, int n) { -int l, ret, orslen; -char * b; -struct tagbstring x; - - if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0 - || r->slen < 0 || r->mlen < r->slen || n <= 0) return BSTR_ERR; - - n += r->slen; - if (n <= 0) return BSTR_ERR; - - l = s->buff->slen; - - orslen = r->slen; - - if (0 == l) { - if (s->isEOF) return BSTR_ERR; - if (r->mlen > n) { - l = (int) s->readFnPtr (r->data + r->slen, 1, n - r->slen, s->parm); - if (0 >= l || l > n - r->slen) { - s->isEOF = 1; - return BSTR_ERR; - } - r->slen += l; - r->data[r->slen] = (unsigned char) '\0'; - return 0; - } - } - - if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; - b = (char *) s->buff->data; - x.data = (unsigned char *) b; - - do { - if (l + r->slen >= n) { - x.slen = n - r->slen; - ret = bconcat (r, &x); - s->buff->slen = l; - if (BSTR_OK == ret) bdelete (s->buff, 0, x.slen); - return BSTR_ERR & -(r->slen == orslen); - } - - x.slen = l; - if (BSTR_OK != bconcat (r, &x)) break; - - l = n - r->slen; - if (l > s->maxBuffSz) l = s->maxBuffSz; - - l = (int) s->readFnPtr (b, 1, l, s->parm); - - } while (l > 0); - if (l < 0) l = 0; - if (l == 0) s->isEOF = 1; - s->buff->slen = l; - return BSTR_ERR & -(r->slen == orslen); -} - -/* int bsreadln (bstring r, struct bStream * s, char terminator) - * - * Read a bstring terminated by the terminator character or the end of the - * stream from the bStream (s) and return it into the parameter r. This - * function may read additional characters from the core stream that are not - * returned, but will be retained for subsequent read operations. - */ -int bsreadln (bstring r, struct bStream * s, char terminator) { - if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0) - return BSTR_ERR; - if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; - r->slen = 0; - return bsreadlna (r, s, terminator); -} - -/* int bsreadlns (bstring r, struct bStream * s, bstring term) - * - * Read a bstring terminated by any character in the term string or the end - * of the stream from the bStream (s) and return it into the parameter r. - * This function may read additional characters from the core stream that - * are not returned, but will be retained for subsequent read operations. - */ -int bsreadlns (bstring r, struct bStream * s, const_bstring term) { - if (s == NULL || s->buff == NULL || r == NULL || term == NULL - || term->data == NULL || r->mlen <= 0) return BSTR_ERR; - if (term->slen == 1) return bsreadln (r, s, term->data[0]); - if (term->slen < 1) return BSTR_ERR; - if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; - r->slen = 0; - return bsreadlnsa (r, s, term); -} - -/* int bsread (bstring r, struct bStream * s, int n) - * - * Read a bstring of length n (or, if it is fewer, as many bytes as is - * remaining) from the bStream. This function may read additional - * characters from the core stream that are not returned, but will be - * retained for subsequent read operations. This function will not read - * additional characters from the core stream beyond virtual stream pointer. - */ -int bsread (bstring r, struct bStream * s, int n) { - if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0 - || n <= 0) return BSTR_ERR; - if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; - r->slen = 0; - return bsreada (r, s, n); -} - -/* int bsunread (struct bStream * s, const_bstring b) - * - * Insert a bstring into the bStream at the current position. These - * characters will be read prior to those that actually come from the core - * stream. - */ -int bsunread (struct bStream * s, const_bstring b) { - if (s == NULL || s->buff == NULL) return BSTR_ERR; - return binsert (s->buff, 0, b, (unsigned char) '?'); -} - -/* int bspeek (bstring r, const struct bStream * s) - * - * Return the currently buffered characters from the bStream that will be - * read prior to reads from the core stream. - */ -int bspeek (bstring r, const struct bStream * s) { - if (s == NULL || s->buff == NULL) return BSTR_ERR; - return bassign (r, s->buff); -} - -/* bstring bjoin (const struct bstrList * bl, const_bstring sep); - * - * Join the entries of a bstrList into one bstring by sequentially - * concatenating them with the sep string in between. If there is an error - * NULL is returned, otherwise a bstring with the correct result is returned. - */ -bstring bjoin (const struct bstrList * bl, const_bstring sep) { -bstring b; -int i, c, v; - - if (bl == NULL || bl->qty < 0) return NULL; - if (sep != NULL && (sep->slen < 0 || sep->data == NULL)) return NULL; - - for (i = 0, c = 1; i < bl->qty; i++) { - v = bl->entry[i]->slen; - if (v < 0) return NULL; /* Invalid input */ - c += v; - if (c < 0) return NULL; /* Wrap around ?? */ - } - - if (sep != NULL) c += (bl->qty - 1) * sep->slen; - - b = (bstring) bstr__alloc (sizeof (struct tagbstring)); - if (NULL == b) return NULL; /* Out of memory */ - b->data = (unsigned char *) bstr__alloc (c); - if (b->data == NULL) { - bstr__free (b); - return NULL; - } - - b->mlen = c; - b->slen = c-1; - - for (i = 0, c = 0; i < bl->qty; i++) { - if (i > 0 && sep != NULL) { - bstr__memcpy (b->data + c, sep->data, sep->slen); - c += sep->slen; - } - v = bl->entry[i]->slen; - bstr__memcpy (b->data + c, bl->entry[i]->data, v); - c += v; - } - b->data[c] = (unsigned char) '\0'; - return b; -} - -#define BSSSC_BUFF_LEN (256) - -/* int bssplitscb (struct bStream * s, const_bstring splitStr, - * int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) - * - * Iterate the set of disjoint sequential substrings read from a stream - * divided by any of the characters in splitStr. An empty splitStr causes - * the whole stream to be iterated once. - * - * Note: At the point of calling the cb function, the bStream pointer is - * pointed exactly at the position right after having read the split - * character. The cb function can act on the stream by causing the bStream - * pointer to move, and bssplitscb will continue by starting the next split - * at the position of the pointer after the return from cb. - * - * However, if the cb causes the bStream s to be destroyed then the cb must - * return with a negative value, otherwise bssplitscb will continue in an - * undefined manner. - */ -int bssplitscb (struct bStream * s, const_bstring splitStr, - int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) { -struct charField chrs; -bstring buff; -int i, p, ret; - - if (cb == NULL || s == NULL || s->readFnPtr == NULL - || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; - - if (NULL == (buff = bfromcstr (""))) return BSTR_ERR; - - if (splitStr->slen == 0) { - while (bsreada (buff, s, BSSSC_BUFF_LEN) >= 0) ; - if ((ret = cb (parm, 0, buff)) > 0) - ret = 0; - } else { - buildCharField (&chrs, splitStr); - ret = p = i = 0; - for (;;) { - if (i >= buff->slen) { - bsreada (buff, s, BSSSC_BUFF_LEN); - if (i >= buff->slen) { - if (0 < (ret = cb (parm, p, buff))) ret = 0; - break; - } - } - if (testInCharField (&chrs, buff->data[i])) { - struct tagbstring t; - unsigned char c; - - blk2tbstr (t, buff->data + i + 1, buff->slen - (i + 1)); - if ((ret = bsunread (s, &t)) < 0) break; - buff->slen = i; - c = buff->data[i]; - buff->data[i] = (unsigned char) '\0'; - if ((ret = cb (parm, p, buff)) < 0) break; - buff->data[i] = c; - buff->slen = 0; - p += i + 1; - i = -1; - } - i++; - } - } - - bdestroy (buff); - return ret; -} - -/* int bssplitstrcb (struct bStream * s, const_bstring splitStr, - * int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) - * - * Iterate the set of disjoint sequential substrings read from a stream - * divided by the entire substring splitStr. An empty splitStr causes - * each character of the stream to be iterated. - * - * Note: At the point of calling the cb function, the bStream pointer is - * pointed exactly at the position right after having read the split - * character. The cb function can act on the stream by causing the bStream - * pointer to move, and bssplitscb will continue by starting the next split - * at the position of the pointer after the return from cb. - * - * However, if the cb causes the bStream s to be destroyed then the cb must - * return with a negative value, otherwise bssplitscb will continue in an - * undefined manner. - */ -int bssplitstrcb (struct bStream * s, const_bstring splitStr, - int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) { -bstring buff; -int i, p, ret; - - if (cb == NULL || s == NULL || s->readFnPtr == NULL - || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; - - if (splitStr->slen == 1) return bssplitscb (s, splitStr, cb, parm); - - if (NULL == (buff = bfromcstr (""))) return BSTR_ERR; - - if (splitStr->slen == 0) { - for (i=0; bsreada (buff, s, BSSSC_BUFF_LEN) >= 0; i++) { - if ((ret = cb (parm, 0, buff)) < 0) { - bdestroy (buff); - return ret; - } - buff->slen = 0; - } - return BSTR_OK; - } else { - ret = p = i = 0; - for (i=p=0;;) { - if ((ret = binstr (buff, 0, splitStr)) >= 0) { - struct tagbstring t; - blk2tbstr (t, buff->data, ret); - i = ret + splitStr->slen; - if ((ret = cb (parm, p, &t)) < 0) break; - p += i; - bdelete (buff, 0, i); - } else { - bsreada (buff, s, BSSSC_BUFF_LEN); - if (bseof (s)) { - if ((ret = cb (parm, p, buff)) > 0) ret = 0; - break; - } - } - } - } - - bdestroy (buff); - return ret; -} - -/* int bstrListCreate (void) - * - * Create a bstrList. - */ -struct bstrList * bstrListCreate (void) { -struct bstrList * sl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList)); - if (sl) { - sl->entry = (bstring *) bstr__alloc (1*sizeof (bstring)); - if (!sl->entry) { - bstr__free (sl); - sl = NULL; - } else { - sl->qty = 0; - sl->mlen = 1; - } - } - return sl; -} - -/* int bstrListDestroy (struct bstrList * sl) - * - * Destroy a bstrList that has been created by bsplit, bsplits or bstrListCreate. - */ -int bstrListDestroy (struct bstrList * sl) { -int i; - if (sl == NULL || sl->qty < 0) return BSTR_ERR; - for (i=0; i < sl->qty; i++) { - if (sl->entry[i]) { - bdestroy (sl->entry[i]); - sl->entry[i] = NULL; - } - } - sl->qty = -1; - sl->mlen = -1; - bstr__free (sl->entry); - sl->entry = NULL; - bstr__free (sl); - return BSTR_OK; -} - -/* int bstrListAlloc (struct bstrList * sl, int msz) - * - * Ensure that there is memory for at least msz number of entries for the - * list. - */ -int bstrListAlloc (struct bstrList * sl, int msz) { -bstring * l; -int smsz; -size_t nsz; - if (!sl || msz <= 0 || !sl->entry || sl->qty < 0 || sl->mlen <= 0 || sl->qty > sl->mlen) return BSTR_ERR; - if (sl->mlen >= msz) return BSTR_OK; - smsz = snapUpSize (msz); - nsz = ((size_t) smsz) * sizeof (bstring); - if (nsz < (size_t) smsz) return BSTR_ERR; - l = (bstring *) bstr__realloc (sl->entry, nsz); - if (!l) { - smsz = msz; - nsz = ((size_t) smsz) * sizeof (bstring); - l = (bstring *) bstr__realloc (sl->entry, nsz); - if (!l) return BSTR_ERR; - } - sl->mlen = smsz; - sl->entry = l; - return BSTR_OK; -} - -/* int bstrListAllocMin (struct bstrList * sl, int msz) - * - * Try to allocate the minimum amount of memory for the list to include at - * least msz entries or sl->qty whichever is greater. - */ -int bstrListAllocMin (struct bstrList * sl, int msz) { -bstring * l; -size_t nsz; - if (!sl || msz <= 0 || !sl->entry || sl->qty < 0 || sl->mlen <= 0 || sl->qty > sl->mlen) return BSTR_ERR; - if (msz < sl->qty) msz = sl->qty; - if (sl->mlen == msz) return BSTR_OK; - nsz = ((size_t) msz) * sizeof (bstring); - if (nsz < (size_t) msz) return BSTR_ERR; - l = (bstring *) bstr__realloc (sl->entry, nsz); - if (!l) return BSTR_ERR; - sl->mlen = msz; - sl->entry = l; - return BSTR_OK; -} - -/* int bsplitcb (const_bstring str, unsigned char splitChar, int pos, - * int (* cb) (void * parm, int ofs, int len), void * parm) - * - * Iterate the set of disjoint sequential substrings over str divided by the - * character in splitChar. - * - * Note: Non-destructive modification of str from within the cb function - * while performing this split is not undefined. bsplitcb behaves in - * sequential lock step with calls to cb. I.e., after returning from a cb - * that return a non-negative integer, bsplitcb continues from the position - * 1 character after the last detected split character and it will halt - * immediately if the length of str falls below this point. However, if the - * cb function destroys str, then it *must* return with a negative value, - * otherwise bsplitcb will continue in an undefined manner. - */ -int bsplitcb (const_bstring str, unsigned char splitChar, int pos, - int (* cb) (void * parm, int ofs, int len), void * parm) { -int i, p, ret; - - if (cb == NULL || str == NULL || pos < 0 || pos > str->slen) - return BSTR_ERR; - - p = pos; - do { - for (i=p; i < str->slen; i++) { - if (str->data[i] == splitChar) break; - } - if ((ret = cb (parm, p, i - p)) < 0) return ret; - p = i + 1; - } while (p <= str->slen); - return BSTR_OK; -} - -/* int bsplitscb (const_bstring str, const_bstring splitStr, int pos, - * int (* cb) (void * parm, int ofs, int len), void * parm) - * - * Iterate the set of disjoint sequential substrings over str divided by any - * of the characters in splitStr. An empty splitStr causes the whole str to - * be iterated once. - * - * Note: Non-destructive modification of str from within the cb function - * while performing this split is not undefined. bsplitscb behaves in - * sequential lock step with calls to cb. I.e., after returning from a cb - * that return a non-negative integer, bsplitscb continues from the position - * 1 character after the last detected split character and it will halt - * immediately if the length of str falls below this point. However, if the - * cb function destroys str, then it *must* return with a negative value, - * otherwise bsplitscb will continue in an undefined manner. - */ -int bsplitscb (const_bstring str, const_bstring splitStr, int pos, - int (* cb) (void * parm, int ofs, int len), void * parm) { -struct charField chrs; -int i, p, ret; - - if (cb == NULL || str == NULL || pos < 0 || pos > str->slen - || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; - if (splitStr->slen == 0) { - if ((ret = cb (parm, 0, str->slen)) > 0) ret = 0; - return ret; - } - - if (splitStr->slen == 1) - return bsplitcb (str, splitStr->data[0], pos, cb, parm); - - buildCharField (&chrs, splitStr); - - p = pos; - do { - for (i=p; i < str->slen; i++) { - if (testInCharField (&chrs, str->data[i])) break; - } - if ((ret = cb (parm, p, i - p)) < 0) return ret; - p = i + 1; - } while (p <= str->slen); - return BSTR_OK; -} - -/* int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos, - * int (* cb) (void * parm, int ofs, int len), void * parm) - * - * Iterate the set of disjoint sequential substrings over str divided by the - * substring splitStr. An empty splitStr causes the whole str to be - * iterated once. - * - * Note: Non-destructive modification of str from within the cb function - * while performing this split is not undefined. bsplitstrcb behaves in - * sequential lock step with calls to cb. I.e., after returning from a cb - * that return a non-negative integer, bsplitscb continues from the position - * 1 character after the last detected split character and it will halt - * immediately if the length of str falls below this point. However, if the - * cb function destroys str, then it *must* return with a negative value, - * otherwise bsplitscb will continue in an undefined manner. - */ -int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos, - int (* cb) (void * parm, int ofs, int len), void * parm) { -int i, p, ret; - - if (cb == NULL || str == NULL || pos < 0 || pos > str->slen - || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; - - if (0 == splitStr->slen) { - for (i=pos; i < str->slen; i++) { - if ((ret = cb (parm, i, 1)) < 0) return ret; - } - return BSTR_OK; - } - - if (splitStr->slen == 1) - return bsplitcb (str, splitStr->data[0], pos, cb, parm); - - for (i=p=pos; i <= str->slen - splitStr->slen; i++) { - if (0 == bstr__memcmp (splitStr->data, str->data + i, splitStr->slen)) { - if ((ret = cb (parm, p, i - p)) < 0) return ret; - i += splitStr->slen; - p = i; - } - } - if ((ret = cb (parm, p, str->slen - p)) < 0) return ret; - return BSTR_OK; -} - -struct genBstrList { - bstring b; - struct bstrList * bl; -}; - -static int bscb (void * parm, int ofs, int len) { -struct genBstrList * g = (struct genBstrList *) parm; - if (g->bl->qty >= g->bl->mlen) { - int mlen = g->bl->mlen * 2; - bstring * tbl; - - while (g->bl->qty >= mlen) { - if (mlen < g->bl->mlen) return BSTR_ERR; - mlen += mlen; - } - - tbl = (bstring *) bstr__realloc (g->bl->entry, sizeof (bstring) * mlen); - if (tbl == NULL) return BSTR_ERR; - - g->bl->entry = tbl; - g->bl->mlen = mlen; - } - - g->bl->entry[g->bl->qty] = bmidstr (g->b, ofs, len); - g->bl->qty++; - return BSTR_OK; -} - -/* struct bstrList * bsplit (const_bstring str, unsigned char splitChar) - * - * Create an array of sequential substrings from str divided by the character - * splitChar. - */ -struct bstrList * bsplit (const_bstring str, unsigned char splitChar) { -struct genBstrList g; - - if (str == NULL || str->data == NULL || str->slen < 0) return NULL; - - g.bl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList)); - if (g.bl == NULL) return NULL; - g.bl->mlen = 4; - g.bl->entry = (bstring *) bstr__alloc (g.bl->mlen * sizeof (bstring)); - if (NULL == g.bl->entry) { - bstr__free (g.bl); - return NULL; - } - - g.b = (bstring) str; - g.bl->qty = 0; - if (bsplitcb (str, splitChar, 0, bscb, &g) < 0) { - bstrListDestroy (g.bl); - return NULL; - } - return g.bl; -} - -/* struct bstrList * bsplitstr (const_bstring str, const_bstring splitStr) - * - * Create an array of sequential substrings from str divided by the entire - * substring splitStr. - */ -struct bstrList * bsplitstr (const_bstring str, const_bstring splitStr) { -struct genBstrList g; - - if (str == NULL || str->data == NULL || str->slen < 0) return NULL; - - g.bl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList)); - if (g.bl == NULL) return NULL; - g.bl->mlen = 4; - g.bl->entry = (bstring *) bstr__alloc (g.bl->mlen * sizeof (bstring)); - if (NULL == g.bl->entry) { - bstr__free (g.bl); - return NULL; - } - - g.b = (bstring) str; - g.bl->qty = 0; - if (bsplitstrcb (str, splitStr, 0, bscb, &g) < 0) { - bstrListDestroy (g.bl); - return NULL; - } - return g.bl; -} - -/* struct bstrList * bsplits (const_bstring str, bstring splitStr) - * - * Create an array of sequential substrings from str divided by any of the - * characters in splitStr. An empty splitStr causes a single entry bstrList - * containing a copy of str to be returned. - */ -struct bstrList * bsplits (const_bstring str, const_bstring splitStr) { -struct genBstrList g; - - if ( str == NULL || str->slen < 0 || str->data == NULL || - splitStr == NULL || splitStr->slen < 0 || splitStr->data == NULL) - return NULL; - - g.bl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList)); - if (g.bl == NULL) return NULL; - g.bl->mlen = 4; - g.bl->entry = (bstring *) bstr__alloc (g.bl->mlen * sizeof (bstring)); - if (NULL == g.bl->entry) { - bstr__free (g.bl); - return NULL; - } - g.b = (bstring) str; - g.bl->qty = 0; - - if (bsplitscb (str, splitStr, 0, bscb, &g) < 0) { - bstrListDestroy (g.bl); - return NULL; - } - return g.bl; -} - -#if defined (__TURBOC__) && !defined (__BORLANDC__) -# ifndef BSTRLIB_NOVSNP -# define BSTRLIB_NOVSNP -# endif -#endif - -/* Give WATCOM C/C++, MSVC some latitude for their non-support of vsnprintf */ -#if defined(__WATCOMC__) || defined(_MSC_VER) -#define exvsnprintf(r,b,n,f,a) {r = _vsnprintf (b,n,f,a);} -#else -#ifdef BSTRLIB_NOVSNP -/* This is just a hack. If you are using a system without a vsnprintf, it is - not recommended that bformat be used at all. */ -#define exvsnprintf(r,b,n,f,a) {vsprintf (b,f,a); r = -1;} -#define START_VSNBUFF (256) -#else - -#if defined(__GNUC__) && !defined(__APPLE__) -/* Something is making gcc complain about this prototype not being here, so - I've just gone ahead and put it in. */ -extern int vsnprintf (char *buf, size_t count, const char *format, va_list arg); -#endif - -#define exvsnprintf(r,b,n,f,a) {r = vsnprintf (b,n,f,a);} -#endif -#endif - -#if !defined (BSTRLIB_NOVSNP) - -#ifndef START_VSNBUFF -#define START_VSNBUFF (16) -#endif - -/* On IRIX vsnprintf returns n-1 when the operation would overflow the target - buffer, WATCOM and MSVC both return -1, while C99 requires that the - returned value be exactly what the length would be if the buffer would be - large enough. This leads to the idea that if the return value is larger - than n, then changing n to the return value will reduce the number of - iterations required. */ - -/* int bformata (bstring b, const char * fmt, ...) - * - * After the first parameter, it takes the same parameters as printf (), but - * rather than outputting results to stdio, it appends the results to - * a bstring which contains what would have been output. Note that if there - * is an early generation of a '\0' character, the bstring will be truncated - * to this end point. - */ -int bformata (bstring b, const char * fmt, ...) { -va_list arglist; -bstring buff; -int n, r; - - if (b == NULL || fmt == NULL || b->data == NULL || b->mlen <= 0 - || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR; - - /* Since the length is not determinable beforehand, a search is - performed using the truncating "vsnprintf" call (to avoid buffer - overflows) on increasing potential sizes for the output result. */ - - if ((n = (int) (2*strlen (fmt))) < START_VSNBUFF) n = START_VSNBUFF; - if (NULL == (buff = bfromcstralloc (n + 2, ""))) { - n = 1; - if (NULL == (buff = bfromcstralloc (n + 2, ""))) return BSTR_ERR; - } - - for (;;) { - va_start (arglist, fmt); - exvsnprintf (r, (char *) buff->data, n + 1, fmt, arglist); - va_end (arglist); - - buff->data[n] = (unsigned char) '\0'; - buff->slen = (int) (strlen) ((char *) buff->data); - - if (buff->slen < n) break; - - if (r > n) n = r; else n += n; - - if (BSTR_OK != balloc (buff, n + 2)) { - bdestroy (buff); - return BSTR_ERR; - } - } - - r = bconcat (b, buff); - bdestroy (buff); - return r; -} - -/* int bassignformat (bstring b, const char * fmt, ...) - * - * After the first parameter, it takes the same parameters as printf (), but - * rather than outputting results to stdio, it outputs the results to - * the bstring parameter b. Note that if there is an early generation of a - * '\0' character, the bstring will be truncated to this end point. - */ -int bassignformat (bstring b, const char * fmt, ...) { -va_list arglist; -bstring buff; -int n, r; - - if (b == NULL || fmt == NULL || b->data == NULL || b->mlen <= 0 - || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR; - - /* Since the length is not determinable beforehand, a search is - performed using the truncating "vsnprintf" call (to avoid buffer - overflows) on increasing potential sizes for the output result. */ - - if ((n = (int) (2*strlen (fmt))) < START_VSNBUFF) n = START_VSNBUFF; - if (NULL == (buff = bfromcstralloc (n + 2, ""))) { - n = 1; - if (NULL == (buff = bfromcstralloc (n + 2, ""))) return BSTR_ERR; - } - - for (;;) { - va_start (arglist, fmt); - exvsnprintf (r, (char *) buff->data, n + 1, fmt, arglist); - va_end (arglist); - - buff->data[n] = (unsigned char) '\0'; - buff->slen = (int) (strlen) ((char *) buff->data); - - if (buff->slen < n) break; - - if (r > n) n = r; else n += n; - - if (BSTR_OK != balloc (buff, n + 2)) { - bdestroy (buff); - return BSTR_ERR; - } - } - - r = bassign (b, buff); - bdestroy (buff); - return r; -} - -/* bstring bformat (const char * fmt, ...) - * - * Takes the same parameters as printf (), but rather than outputting results - * to stdio, it forms a bstring which contains what would have been output. - * Note that if there is an early generation of a '\0' character, the - * bstring will be truncated to this end point. - */ -bstring bformat (const char * fmt, ...) { -va_list arglist; -bstring buff; -int n, r; - - if (fmt == NULL) return NULL; - - /* Since the length is not determinable beforehand, a search is - performed using the truncating "vsnprintf" call (to avoid buffer - overflows) on increasing potential sizes for the output result. */ - - if ((n = (int) (2*strlen (fmt))) < START_VSNBUFF) n = START_VSNBUFF; - if (NULL == (buff = bfromcstralloc (n + 2, ""))) { - n = 1; - if (NULL == (buff = bfromcstralloc (n + 2, ""))) return NULL; - } - - for (;;) { - va_start (arglist, fmt); - exvsnprintf (r, (char *) buff->data, n + 1, fmt, arglist); - va_end (arglist); - - buff->data[n] = (unsigned char) '\0'; - buff->slen = (int) (strlen) ((char *) buff->data); - - if (buff->slen < n) break; - - if (r > n) n = r; else n += n; - - if (BSTR_OK != balloc (buff, n + 2)) { - bdestroy (buff); - return NULL; - } - } - - return buff; -} - -/* int bvcformata (bstring b, int count, const char * fmt, va_list arglist) - * - * The bvcformata function formats data under control of the format control - * string fmt and attempts to append the result to b. The fmt parameter is - * the same as that of the printf function. The variable argument list is - * replaced with arglist, which has been initialized by the va_start macro. - * The size of the appended output is upper bounded by count. If the - * required output exceeds count, the string b is not augmented with any - * contents and a value below BSTR_ERR is returned. If a value below -count - * is returned then it is recommended that the negative of this value be - * used as an update to the count in a subsequent pass. On other errors, - * such as running out of memory, parameter errors or numeric wrap around - * BSTR_ERR is returned. BSTR_OK is returned when the output is successfully - * generated and appended to b. - * - * Note: There is no sanity checking of arglist, and this function is - * destructive of the contents of b from the b->slen point onward. If there - * is an early generation of a '\0' character, the bstring will be truncated - * to this end point. - */ -int bvcformata (bstring b, int count, const char * fmt, va_list arg) { -int n, r, l; - - if (b == NULL || fmt == NULL || count <= 0 || b->data == NULL - || b->mlen <= 0 || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR; - - if (count > (n = b->slen + count) + 2) return BSTR_ERR; - if (BSTR_OK != balloc (b, n + 2)) return BSTR_ERR; - - exvsnprintf (r, (char *) b->data + b->slen, count + 2, fmt, arg); - - /* Did the operation complete successfully within bounds? */ - for (l = b->slen; l <= n; l++) { - if ('\0' == b->data[l]) { - b->slen = l; - return BSTR_OK; - } - } - - /* Abort, since the buffer was not large enough. The return value - tries to help set what the retry length should be. */ - - b->data[b->slen] = '\0'; - if (r > count + 1) { /* Does r specify a particular target length? */ - n = r; - } else { - n = count + count; /* If not, just double the size of count */ - if (count > n) n = INT_MAX; - } - n = -n; - - if (n > BSTR_ERR-1) n = BSTR_ERR-1; - return n; -} - -#endif diff --git a/src/bstrlib.h b/src/bstrlib.h deleted file mode 100644 index c8fa694..0000000 --- a/src/bstrlib.h +++ /dev/null @@ -1,304 +0,0 @@ -/* - * This source file is part of the bstring string library. This code was - * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause - * BSD open source license or GPL v2.0. Refer to the accompanying documentation - * for details on usage and license. - */ - -/* - * bstrlib.h - * - * This file is the header file for the core module for implementing the - * bstring functions. - */ - -#ifndef BSTRLIB_INCLUDE -#define BSTRLIB_INCLUDE - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include -#include -#include - -#if !defined (BSTRLIB_VSNP_OK) && !defined (BSTRLIB_NOVSNP) -# if defined (__TURBOC__) && !defined (__BORLANDC__) -# define BSTRLIB_NOVSNP -# endif -#endif - -#define BSTR_ERR (-1) -#define BSTR_OK (0) -#define BSTR_BS_BUFF_LENGTH_GET (0) - -typedef struct tagbstring * bstring; -typedef const struct tagbstring * const_bstring; - -/* Copy functions */ -#define cstr2bstr bfromcstr -extern bstring bfromcstr (const char * str); -extern bstring bfromcstralloc (int mlen, const char * str); -extern bstring blk2bstr (const void * blk, int len); -extern char * bstr2cstr (const_bstring s, char z); -extern int bcstrfree (char * s); -extern bstring bstrcpy (const_bstring b1); -extern int bassign (bstring a, const_bstring b); -extern int bassignmidstr (bstring a, const_bstring b, int left, int len); -extern int bassigncstr (bstring a, const char * str); -extern int bassignblk (bstring a, const void * s, int len); - -/* Destroy function */ -extern int bdestroy (bstring b); - -/* Space allocation hinting functions */ -extern int balloc (bstring s, int len); -extern int ballocmin (bstring b, int len); - -/* Substring extraction */ -extern bstring bmidstr (const_bstring b, int left, int len); - -/* Various standard manipulations */ -extern int bconcat (bstring b0, const_bstring b1); -extern int bconchar (bstring b0, char c); -extern int bcatcstr (bstring b, const char * s); -extern int bcatblk (bstring b, const void * s, int len); -extern int binsert (bstring s1, int pos, const_bstring s2, unsigned char fill); -extern int binsertch (bstring s1, int pos, int len, unsigned char fill); -extern int breplace (bstring b1, int pos, int len, const_bstring b2, unsigned char fill); -extern int bdelete (bstring s1, int pos, int len); -extern int bsetstr (bstring b0, int pos, const_bstring b1, unsigned char fill); -extern int btrunc (bstring b, int n); - -/* Scan/search functions */ -extern int bstricmp (const_bstring b0, const_bstring b1); -extern int bstrnicmp (const_bstring b0, const_bstring b1, int n); -extern int biseqcaseless (const_bstring b0, const_bstring b1); -extern int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len); -extern int biseq (const_bstring b0, const_bstring b1); -extern int bisstemeqblk (const_bstring b0, const void * blk, int len); -extern int biseqcstr (const_bstring b, const char * s); -extern int biseqcstrcaseless (const_bstring b, const char * s); -extern int bstrcmp (const_bstring b0, const_bstring b1); -extern int bstrncmp (const_bstring b0, const_bstring b1, int n); -extern int binstr (const_bstring s1, int pos, const_bstring s2); -extern int binstrr (const_bstring s1, int pos, const_bstring s2); -extern int binstrcaseless (const_bstring s1, int pos, const_bstring s2); -extern int binstrrcaseless (const_bstring s1, int pos, const_bstring s2); -extern int bstrchrp (const_bstring b, int c, int pos); -extern int bstrrchrp (const_bstring b, int c, int pos); -#define bstrchr(b,c) bstrchrp ((b), (c), 0) -#define bstrrchr(b,c) bstrrchrp ((b), (c), blength(b)-1) -extern int binchr (const_bstring b0, int pos, const_bstring b1); -extern int binchrr (const_bstring b0, int pos, const_bstring b1); -extern int bninchr (const_bstring b0, int pos, const_bstring b1); -extern int bninchrr (const_bstring b0, int pos, const_bstring b1); -extern int bfindreplace (bstring b, const_bstring find, const_bstring repl, int pos); -extern int bfindreplacecaseless (bstring b, const_bstring find, const_bstring repl, int pos); - -/* List of string container functions */ -struct bstrList { - int qty, mlen; - bstring * entry; -}; -extern struct bstrList * bstrListCreate (void); -extern int bstrListDestroy (struct bstrList * sl); -extern int bstrListAlloc (struct bstrList * sl, int msz); -extern int bstrListAllocMin (struct bstrList * sl, int msz); - -/* String split and join functions */ -extern struct bstrList * bsplit (const_bstring str, unsigned char splitChar); -extern struct bstrList * bsplits (const_bstring str, const_bstring splitStr); -extern struct bstrList * bsplitstr (const_bstring str, const_bstring splitStr); -extern bstring bjoin (const struct bstrList * bl, const_bstring sep); -extern int bsplitcb (const_bstring str, unsigned char splitChar, int pos, - int (* cb) (void * parm, int ofs, int len), void * parm); -extern int bsplitscb (const_bstring str, const_bstring splitStr, int pos, - int (* cb) (void * parm, int ofs, int len), void * parm); -extern int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos, - int (* cb) (void * parm, int ofs, int len), void * parm); - -/* Miscellaneous functions */ -extern int bpattern (bstring b, int len); -extern int btoupper (bstring b); -extern int btolower (bstring b); -extern int bltrimws (bstring b); -extern int brtrimws (bstring b); -extern int btrimws (bstring b); - -/* <*>printf format functions */ -#if !defined (BSTRLIB_NOVSNP) -extern bstring bformat (const char * fmt, ...); -extern int bformata (bstring b, const char * fmt, ...); -extern int bassignformat (bstring b, const char * fmt, ...); -extern int bvcformata (bstring b, int count, const char * fmt, va_list arglist); - -#define bvformata(ret, b, fmt, lastarg) { \ -bstring bstrtmp_b = (b); \ -const char * bstrtmp_fmt = (fmt); \ -int bstrtmp_r = BSTR_ERR, bstrtmp_sz = 16; \ - for (;;) { \ - va_list bstrtmp_arglist; \ - va_start (bstrtmp_arglist, lastarg); \ - bstrtmp_r = bvcformata (bstrtmp_b, bstrtmp_sz, bstrtmp_fmt, bstrtmp_arglist); \ - va_end (bstrtmp_arglist); \ - if (bstrtmp_r >= 0) { /* Everything went ok */ \ - bstrtmp_r = BSTR_OK; \ - break; \ - } else if (-bstrtmp_r <= bstrtmp_sz) { /* A real error? */ \ - bstrtmp_r = BSTR_ERR; \ - break; \ - } \ - bstrtmp_sz = -bstrtmp_r; /* Doubled or target size */ \ - } \ - ret = bstrtmp_r; \ -} - -#endif - -typedef int (*bNgetc) (void *parm); -typedef size_t (* bNread) (void *buff, size_t elsize, size_t nelem, void *parm); - -/* Input functions */ -extern bstring bgets (bNgetc getcPtr, void * parm, char terminator); -extern bstring bread (bNread readPtr, void * parm); -extern int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator); -extern int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator); -extern int breada (bstring b, bNread readPtr, void * parm); - -/* Stream functions */ -extern struct bStream * bsopen (bNread readPtr, void * parm); -extern void * bsclose (struct bStream * s); -extern int bsbufflength (struct bStream * s, int sz); -extern int bsreadln (bstring b, struct bStream * s, char terminator); -extern int bsreadlns (bstring r, struct bStream * s, const_bstring term); -extern int bsread (bstring b, struct bStream * s, int n); -extern int bsreadlna (bstring b, struct bStream * s, char terminator); -extern int bsreadlnsa (bstring r, struct bStream * s, const_bstring term); -extern int bsreada (bstring b, struct bStream * s, int n); -extern int bsunread (struct bStream * s, const_bstring b); -extern int bspeek (bstring r, const struct bStream * s); -extern int bssplitscb (struct bStream * s, const_bstring splitStr, - int (* cb) (void * parm, int ofs, const_bstring entry), void * parm); -extern int bssplitstrcb (struct bStream * s, const_bstring splitStr, - int (* cb) (void * parm, int ofs, const_bstring entry), void * parm); -extern int bseof (const struct bStream * s); - -struct tagbstring { - int mlen; - int slen; - unsigned char * data; -}; - -/* Accessor macros */ -#define blengthe(b, e) (((b) == (void *)0 || (b)->slen < 0) ? (int)(e) : ((b)->slen)) -#define blength(b) (blengthe ((b), 0)) -#define bdataofse(b, o, e) (((b) == (void *)0 || (b)->data == (void*)0) ? (char *)(e) : ((char *)(b)->data) + (o)) -#define bdataofs(b, o) (bdataofse ((b), (o), (void *)0)) -#define bdatae(b, e) (bdataofse (b, 0, e)) -#define bdata(b) (bdataofs (b, 0)) -#define bchare(b, p, e) ((((unsigned)(p)) < (unsigned)blength(b)) ? ((b)->data[(p)]) : (e)) -#define bchar(b, p) bchare ((b), (p), '\0') - -/* Static constant string initialization macro */ -#define bsStaticMlen(q,m) {(m), (int) sizeof(q)-1, (unsigned char *) ("" q "")} -#if defined(_MSC_VER) -/* There are many versions of MSVC which emit __LINE__ as a non-constant. */ -# define bsStatic(q) bsStaticMlen(q,-32) -#endif -#ifndef bsStatic -# define bsStatic(q) bsStaticMlen(q,-__LINE__) -#endif - -/* Static constant block parameter pair */ -#define bsStaticBlkParms(q) ((void *)("" q "")), ((int) sizeof(q)-1) - -/* Reference building macros */ -#define cstr2tbstr btfromcstr -#define btfromcstr(t,s) { \ - (t).data = (unsigned char *) (s); \ - (t).slen = ((t).data) ? ((int) (strlen) ((char *)(t).data)) : 0; \ - (t).mlen = -1; \ -} -#define blk2tbstr(t,s,l) { \ - (t).data = (unsigned char *) (s); \ - (t).slen = l; \ - (t).mlen = -1; \ -} -#define btfromblk(t,s,l) blk2tbstr(t,s,l) -#define bmid2tbstr(t,b,p,l) { \ - const_bstring bstrtmp_s = (b); \ - if (bstrtmp_s && bstrtmp_s->data && bstrtmp_s->slen >= 0) { \ - int bstrtmp_left = (p); \ - int bstrtmp_len = (l); \ - if (bstrtmp_left < 0) { \ - bstrtmp_len += bstrtmp_left; \ - bstrtmp_left = 0; \ - } \ - if (bstrtmp_len > bstrtmp_s->slen - bstrtmp_left) \ - bstrtmp_len = bstrtmp_s->slen - bstrtmp_left; \ - if (bstrtmp_len <= 0) { \ - (t).data = (unsigned char *)""; \ - (t).slen = 0; \ - } else { \ - (t).data = bstrtmp_s->data + bstrtmp_left; \ - (t).slen = bstrtmp_len; \ - } \ - } else { \ - (t).data = (unsigned char *)""; \ - (t).slen = 0; \ - } \ - (t).mlen = -__LINE__; \ -} -#define btfromblkltrimws(t,s,l) { \ - int bstrtmp_idx = 0, bstrtmp_len = (l); \ - unsigned char * bstrtmp_s = (s); \ - if (bstrtmp_s && bstrtmp_len >= 0) { \ - for (; bstrtmp_idx < bstrtmp_len; bstrtmp_idx++) { \ - if (!isspace (bstrtmp_s[bstrtmp_idx])) break; \ - } \ - } \ - (t).data = bstrtmp_s + bstrtmp_idx; \ - (t).slen = bstrtmp_len - bstrtmp_idx; \ - (t).mlen = -__LINE__; \ -} -#define btfromblkrtrimws(t,s,l) { \ - int bstrtmp_len = (l) - 1; \ - unsigned char * bstrtmp_s = (s); \ - if (bstrtmp_s && bstrtmp_len >= 0) { \ - for (; bstrtmp_len >= 0; bstrtmp_len--) { \ - if (!isspace (bstrtmp_s[bstrtmp_len])) break; \ - } \ - } \ - (t).data = bstrtmp_s; \ - (t).slen = bstrtmp_len + 1; \ - (t).mlen = -__LINE__; \ -} -#define btfromblktrimws(t,s,l) { \ - int bstrtmp_idx = 0, bstrtmp_len = (l) - 1; \ - unsigned char * bstrtmp_s = (s); \ - if (bstrtmp_s && bstrtmp_len >= 0) { \ - for (; bstrtmp_idx <= bstrtmp_len; bstrtmp_idx++) { \ - if (!isspace (bstrtmp_s[bstrtmp_idx])) break; \ - } \ - for (; bstrtmp_len >= bstrtmp_idx; bstrtmp_len--) { \ - if (!isspace (bstrtmp_s[bstrtmp_len])) break; \ - } \ - } \ - (t).data = bstrtmp_s + bstrtmp_idx; \ - (t).slen = bstrtmp_len + 1 - bstrtmp_idx; \ - (t).mlen = -__LINE__; \ -} - -/* Write protection macros */ -#define bwriteprotect(t) { if ((t).mlen >= 0) (t).mlen = -1; } -#define bwriteallow(t) { if ((t).mlen == -1) (t).mlen = (t).slen + ((t).slen == 0); } -#define biswriteprotected(t) ((t).mlen <= 0) - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/buffer.c b/src/buffer.c new file mode 100644 index 0000000..b81e7fa --- /dev/null +++ b/src/buffer.c @@ -0,0 +1,313 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "buffer.h" + +/* Used as default value for gh_buf->ptr so that people can always + * assume ptr is non-NULL and zero terminated even for new gh_bufs. + */ +unsigned char gh_buf__initbuf[1]; +unsigned char gh_buf__oom[1]; + +#define ENSURE_SIZE(b, d) \ + if ((d) > buf->asize && gh_buf_grow(b, (d)) < 0)\ + return -1; + +void gh_buf_init(gh_buf *buf, int initial_size) +{ + buf->asize = 0; + buf->size = 0; + buf->ptr = gh_buf__initbuf; + + if (initial_size) + gh_buf_grow(buf, initial_size); +} + +int gh_buf_try_grow(gh_buf *buf, int target_size, bool mark_oom) +{ + char *new_ptr; + size_t new_size; + + if (buf->ptr == gh_buf__oom || buf->asize < 0) + return -1; + + if (target_size <= buf->asize) + return 0; + + if (buf->asize == 0) { + new_size = target_size; + new_ptr = NULL; + } else { + new_size = buf->asize; + new_ptr = buf->ptr; + } + + /* grow the buffer size by 1.5, until it's big enough + * to fit our target size */ + while (new_size < target_size) + new_size = (new_size << 1) - (new_size >> 1); + + /* round allocation up to multiple of 8 */ + new_size = (new_size + 7) & ~7; + + new_ptr = realloc(new_ptr, new_size); + + if (!new_ptr) { + if (mark_oom) + buf->ptr = gh_buf__oom; + return -1; + } + + buf->asize = new_size; + buf->ptr = new_ptr; + + /* truncate the existing buffer size if necessary */ + if (buf->size >= buf->asize) + buf->size = buf->asize - 1; + buf->ptr[buf->size] = '\0'; + + return 0; +} + +void gh_buf_free(gh_buf *buf) +{ + if (!buf) return; + + if (buf->asize > 0 && buf->ptr != gh_buf__initbuf && buf->ptr != gh_buf__oom) + free(buf->ptr); + + gh_buf_init(buf, 0); +} + +void gh_buf_clear(gh_buf *buf) +{ + buf->size = 0; + + if (buf->asize > 0) + buf->ptr[0] = '\0'; + + if (buf->asize < 0) { + buf->ptr = gh_buf__initbuf; + buf->asize = 0; + } +} + +int gh_buf_set(gh_buf *buf, const char *data, int len) +{ + if (len == 0 || data == NULL) { + gh_buf_clear(buf); + } else { + if (data != buf->ptr) { + ENSURE_SIZE(buf, len + 1); + memmove(buf->ptr, data, len); + } + buf->size = len; + buf->ptr[buf->size] = '\0'; + } + return 0; +} + +int gh_buf_sets(gh_buf *buf, const char *string) +{ + return gh_buf_set(buf, string, string ? strlen(string) : 0); +} + +int gh_buf_putc(gh_buf *buf, char c) +{ + ENSURE_SIZE(buf, buf->size + 2); + buf->ptr[buf->size++] = c; + buf->ptr[buf->size] = '\0'; + return 0; +} + +int gh_buf_put(gh_buf *buf, const char *data, int len) +{ + ENSURE_SIZE(buf, buf->size + len + 1); + memmove(buf->ptr + buf->size, data, len); + buf->size += len; + buf->ptr[buf->size] = '\0'; + return 0; +} + +int gh_buf_puts(gh_buf *buf, const char *string) +{ + assert(string); + return gh_buf_put(buf, string, strlen(string)); +} + +int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap) +{ + const int expected_size = buf->size + (strlen(format) * 2); + int len; + + ENSURE_SIZE(buf, expected_size); + + while (1) { + va_list args; + va_copy(args, ap); + + len = vsnprintf( + buf->ptr + buf->size, + buf->asize - buf->size, + format, args + ); + + if (len < 0) { + free(buf->ptr); + buf->ptr = gh_buf__oom; + return -1; + } + + if (len + 1 <= buf->asize - buf->size) { + buf->size += len; + break; + } + + ENSURE_SIZE(buf, buf->size + len + 1); + } + + return 0; +} + +int gh_buf_printf(gh_buf *buf, const char *format, ...) +{ + int r; + va_list ap; + + va_start(ap, format); + r = gh_buf_vprintf(buf, format, ap); + va_end(ap); + + return r; +} + +void gh_buf_copy_cstr(char *data, size_t datasize, const gh_buf *buf) +{ + size_t copylen; + + assert(data && datasize && buf); + + data[0] = '\0'; + + if (buf->size == 0 || buf->asize <= 0) + return; + + copylen = buf->size; + if (copylen > datasize - 1) + copylen = datasize - 1; + memmove(data, buf->ptr, copylen); + data[copylen] = '\0'; +} + +void gh_buf_swap(gh_buf *buf_a, gh_buf *buf_b) +{ + gh_buf t = *buf_a; + *buf_a = *buf_b; + *buf_b = t; +} + +char *gh_buf_detach(gh_buf *buf) +{ + char *data = buf->ptr; + + if (buf->asize == 0 || buf->ptr == gh_buf__oom) + return NULL; + + gh_buf_init(buf, 0); + + return data; +} + +void gh_buf_attach(gh_buf *buf, char *ptr, int asize) +{ + gh_buf_free(buf); + + if (ptr) { + buf->ptr = ptr; + buf->size = strlen(ptr); + if (asize) + buf->asize = (asize < buf->size) ? buf->size + 1 : asize; + else /* pass 0 to fall back on strlen + 1 */ + buf->asize = buf->size + 1; + } else { + gh_buf_grow(buf, asize); + } +} + +int gh_buf_cmp(const gh_buf *a, const gh_buf *b) +{ + int result = memcmp(a->ptr, b->ptr, MIN(a->size, b->size)); + return (result != 0) ? result : + (a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0; +} + +int gh_buf_strchr(const gh_buf *buf, int c, int pos) +{ + const char *p = memchr(buf->ptr + pos, c, buf->size - pos); + if (!p) + return -1; + + return (int)(p - p->ptr); +} + +int gh_buf_strrchr(const gh_buf *buf, int c, int pos) +{ + int i; + + for (i = pos; i >= 0; i--) { + if (buf->ptr[i] == (unsigned char) c) + return i; + } + + return -1; +} + +void gh_buf_truncate(gh_buf *buf, size_t len) +{ + assert(buf->asize >= 0); + + if (len < buf->size) { + buf->size = len; + buf->ptr[buf->size] = '\0'; + } +} + +void gh_buf_ltruncate(gh_buf *buf, size_t len) +{ + assert(buf->asize >= 0); + + if (len && len < buf->size) { + memmove(buf->ptr, buf->ptr + len, buf->size - len); + buf->size -= len; + buf->ptr[buf->size] = '\0'; + } +} + +void gh_buf_trim(gh_buf *buf) +{ + size_t i = 0; + + assert(buf->asize >= 0); + + /* ltrim */ + while (i < buf->size && isspace(buf->ptr[i])) + i++; + + gh_buf_truncate(buf, i); + + /* rtrim */ + while (buf->size > 0) { + if (!isspace(buf->ptr[buf->size - 1])) + break; + + buf->size--; + } + + buf->ptr[buf->size] = '\0'; +} diff --git a/src/buffer.h b/src/buffer.h new file mode 100644 index 0000000..2581ee3 --- /dev/null +++ b/src/buffer.h @@ -0,0 +1,119 @@ +#ifndef INCLUDE_buffer_h__ +#define INCLUDE_buffer_h__ + +#include +#include +#include +#include + +typedef struct { + unsigned char *ptr; + int asize, size; +} gh_buf; + +extern unsigned char gh_buf__initbuf[]; +extern unsigned char gh_buf__oom[]; + +#define GH_BUF_INIT { gh_buf__initbuf, 0, 0 } + +/** + * Initialize a gh_buf structure. + * + * For the cases where GH_BUF_INIT cannot be used to do static + * initialization. + */ +extern void gh_buf_init(gh_buf *buf, int initial_size); + +static inline void gh_buf_static(gh_buf *buf, unsigned char *source) +{ + buf->ptr = source; + buf->size = strlen(source); + buf->asize = -1; +} + +/** + * Attempt to grow the buffer to hold at least `target_size` bytes. + * + * If the allocation fails, this will return an error. If mark_oom is true, + * this will mark the buffer as invalid for future operations; if false, + * existing buffer content will be preserved, but calling code must handle + * that buffer was not expanded. + */ +extern int gh_buf_try_grow(gh_buf *buf, int target_size, bool mark_oom); + +/** + * Grow the buffer to hold at least `target_size` bytes. + * + * If the allocation fails, this will return an error and the buffer will be + * marked as invalid for future operations, invaliding contents. + * + * @return 0 on success or -1 on failure + */ +static inline int gh_buf_grow(gh_buf *buf, int target_size) +{ + return gh_buf_try_grow(buf, target_size, true); +} + +extern void gh_buf_free(gh_buf *buf); +extern void gh_buf_swap(gh_buf *buf_a, gh_buf *buf_b); + +/** + * Test if there have been any reallocation failures with this gh_buf. + * + * Any function that writes to a gh_buf can fail due to memory allocation + * issues. If one fails, the gh_buf will be marked with an OOM error and + * further calls to modify the buffer will fail. Check gh_buf_oom() at the + * end of your sequence and it will be true if you ran out of memory at any + * point with that buffer. + * + * @return false if no error, true if allocation error + */ +static inline bool gh_buf_oom(const gh_buf *buf) +{ + return (buf->ptr == gh_buf__oom); +} + + +static inline size_t gh_buf_len(const gh_buf *buf) +{ + return buf->size; +} + +extern int gh_buf_cmp(const gh_buf *a, const gh_buf *b); + +extern void gh_buf_attach(gh_buf *buf, char *ptr, int asize); +extern char *gh_buf_detach(gh_buf *buf); +extern void gh_buf_copy_cstr(char *data, int datasize, const gh_buf *buf); + +static inline const char *gh_buf_cstr(const gh_buf *buf) +{ + return buf->ptr; +} + +#define gh_buf_at(buf, n) ((buf)->ptr[n]) + +/* + * Functions below that return int value error codes will return 0 on + * success or -1 on failure (which generally means an allocation failed). + * Using a gh_buf where the allocation has failed with result in -1 from + * all further calls using that buffer. As a result, you can ignore the + * return code of these functions and call them in a series then just call + * gh_buf_oom at the end. + */ +extern int gh_buf_set(gh_buf *buf, const char *data, int len); +extern int gh_buf_sets(gh_buf *buf, const char *string); +extern int gh_buf_putc(gh_buf *buf, char c); +extern int gh_buf_put(gh_buf *buf, const char *data, int len); +extern int gh_buf_puts(gh_buf *buf, const char *string); +extern int gh_buf_printf(gh_buf *buf, const char *format, ...) + __attribute__((format (printf, 2, 3))); +extern int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap); +extern void gh_buf_clear(gh_buf *buf); + +int gh_buf_strchr(const gh_buf *buf, int c, int pos); +int gh_buf_strrchr(const gh_buf *buf, int c, int pos); +void gh_buf_truncate(gh_buf *buf, int len); +void gh_buf_ltruncate(gh_buf *buf, int len); +void gh_buf_trim(gh_buf *buf); + +#endif diff --git a/src/case_fold_switch.c b/src/case_fold_switch.c deleted file mode 100644 index 70fdd75..0000000 --- a/src/case_fold_switch.c +++ /dev/null @@ -1,2637 +0,0 @@ - switch (c) { - case 0x0041: - bufpush(0x0061); - break; - case 0x0042: - bufpush(0x0062); - break; - case 0x0043: - bufpush(0x0063); - break; - case 0x0044: - bufpush(0x0064); - break; - case 0x0045: - bufpush(0x0065); - break; - case 0x0046: - bufpush(0x0066); - break; - case 0x0047: - bufpush(0x0067); - break; - case 0x0048: - bufpush(0x0068); - break; - case 0x0049: - bufpush(0x0069); - break; - case 0x004A: - bufpush(0x006A); - break; - case 0x004B: - bufpush(0x006B); - break; - case 0x004C: - bufpush(0x006C); - break; - case 0x004D: - bufpush(0x006D); - break; - case 0x004E: - bufpush(0x006E); - break; - case 0x004F: - bufpush(0x006F); - break; - case 0x0050: - bufpush(0x0070); - break; - case 0x0051: - bufpush(0x0071); - break; - case 0x0052: - bufpush(0x0072); - break; - case 0x0053: - bufpush(0x0073); - break; - case 0x0054: - bufpush(0x0074); - break; - case 0x0055: - bufpush(0x0075); - break; - case 0x0056: - bufpush(0x0076); - break; - case 0x0057: - bufpush(0x0077); - break; - case 0x0058: - bufpush(0x0078); - break; - case 0x0059: - bufpush(0x0079); - break; - case 0x005A: - bufpush(0x007A); - break; - case 0x00B5: - bufpush(0x03BC); - break; - case 0x00C0: - bufpush(0x00E0); - break; - case 0x00C1: - bufpush(0x00E1); - break; - case 0x00C2: - bufpush(0x00E2); - break; - case 0x00C3: - bufpush(0x00E3); - break; - case 0x00C4: - bufpush(0x00E4); - break; - case 0x00C5: - bufpush(0x00E5); - break; - case 0x00C6: - bufpush(0x00E6); - break; - case 0x00C7: - bufpush(0x00E7); - break; - case 0x00C8: - bufpush(0x00E8); - break; - case 0x00C9: - bufpush(0x00E9); - break; - case 0x00CA: - bufpush(0x00EA); - break; - case 0x00CB: - bufpush(0x00EB); - break; - case 0x00CC: - bufpush(0x00EC); - break; - case 0x00CD: - bufpush(0x00ED); - break; - case 0x00CE: - bufpush(0x00EE); - break; - case 0x00CF: - bufpush(0x00EF); - break; - case 0x00D0: - bufpush(0x00F0); - break; - case 0x00D1: - bufpush(0x00F1); - break; - case 0x00D2: - bufpush(0x00F2); - break; - case 0x00D3: - bufpush(0x00F3); - break; - case 0x00D4: - bufpush(0x00F4); - break; - case 0x00D5: - bufpush(0x00F5); - break; - case 0x00D6: - bufpush(0x00F6); - break; - case 0x00D8: - bufpush(0x00F8); - break; - case 0x00D9: - bufpush(0x00F9); - break; - case 0x00DA: - bufpush(0x00FA); - break; - case 0x00DB: - bufpush(0x00FB); - break; - case 0x00DC: - bufpush(0x00FC); - break; - case 0x00DD: - bufpush(0x00FD); - break; - case 0x00DE: - bufpush(0x00FE); - break; - case 0x00DF: - bufpush(0x0073); - bufpush(0x0073); - break; - case 0x0100: - bufpush(0x0101); - break; - case 0x0102: - bufpush(0x0103); - break; - case 0x0104: - bufpush(0x0105); - break; - case 0x0106: - bufpush(0x0107); - break; - case 0x0108: - bufpush(0x0109); - break; - case 0x010A: - bufpush(0x010B); - break; - case 0x010C: - bufpush(0x010D); - break; - case 0x010E: - bufpush(0x010F); - break; - case 0x0110: - bufpush(0x0111); - break; - case 0x0112: - bufpush(0x0113); - break; - case 0x0114: - bufpush(0x0115); - break; - case 0x0116: - bufpush(0x0117); - break; - case 0x0118: - bufpush(0x0119); - break; - case 0x011A: - bufpush(0x011B); - break; - case 0x011C: - bufpush(0x011D); - break; - case 0x011E: - bufpush(0x011F); - break; - case 0x0120: - bufpush(0x0121); - break; - case 0x0122: - bufpush(0x0123); - break; - case 0x0124: - bufpush(0x0125); - break; - case 0x0126: - bufpush(0x0127); - break; - case 0x0128: - bufpush(0x0129); - break; - case 0x012A: - bufpush(0x012B); - break; - case 0x012C: - bufpush(0x012D); - break; - case 0x012E: - bufpush(0x012F); - break; - case 0x0130: - bufpush(0x0069); - bufpush(0x0307); - break; - case 0x0132: - bufpush(0x0133); - break; - case 0x0134: - bufpush(0x0135); - break; - case 0x0136: - bufpush(0x0137); - break; - case 0x0139: - bufpush(0x013A); - break; - case 0x013B: - bufpush(0x013C); - break; - case 0x013D: - bufpush(0x013E); - break; - case 0x013F: - bufpush(0x0140); - break; - case 0x0141: - bufpush(0x0142); - break; - case 0x0143: - bufpush(0x0144); - break; - case 0x0145: - bufpush(0x0146); - break; - case 0x0147: - bufpush(0x0148); - break; - case 0x0149: - bufpush(0x02BC); - bufpush(0x006E); - break; - case 0x014A: - bufpush(0x014B); - break; - case 0x014C: - bufpush(0x014D); - break; - case 0x014E: - bufpush(0x014F); - break; - case 0x0150: - bufpush(0x0151); - break; - case 0x0152: - bufpush(0x0153); - break; - case 0x0154: - bufpush(0x0155); - break; - case 0x0156: - bufpush(0x0157); - break; - case 0x0158: - bufpush(0x0159); - break; - case 0x015A: - bufpush(0x015B); - break; - case 0x015C: - bufpush(0x015D); - break; - case 0x015E: - bufpush(0x015F); - break; - case 0x0160: - bufpush(0x0161); - break; - case 0x0162: - bufpush(0x0163); - break; - case 0x0164: - bufpush(0x0165); - break; - case 0x0166: - bufpush(0x0167); - break; - case 0x0168: - bufpush(0x0169); - break; - case 0x016A: - bufpush(0x016B); - break; - case 0x016C: - bufpush(0x016D); - break; - case 0x016E: - bufpush(0x016F); - break; - case 0x0170: - bufpush(0x0171); - break; - case 0x0172: - bufpush(0x0173); - break; - case 0x0174: - bufpush(0x0175); - break; - case 0x0176: - bufpush(0x0177); - break; - case 0x0178: - bufpush(0x00FF); - break; - case 0x0179: - bufpush(0x017A); - break; - case 0x017B: - bufpush(0x017C); - break; - case 0x017D: - bufpush(0x017E); - break; - case 0x017F: - bufpush(0x0073); - break; - case 0x0181: - bufpush(0x0253); - break; - case 0x0182: - bufpush(0x0183); - break; - case 0x0184: - bufpush(0x0185); - break; - case 0x0186: - bufpush(0x0254); - break; - case 0x0187: - bufpush(0x0188); - break; - case 0x0189: - bufpush(0x0256); - break; - case 0x018A: - bufpush(0x0257); - break; - case 0x018B: - bufpush(0x018C); - break; - case 0x018E: - bufpush(0x01DD); - break; - case 0x018F: - bufpush(0x0259); - break; - case 0x0190: - bufpush(0x025B); - break; - case 0x0191: - bufpush(0x0192); - break; - case 0x0193: - bufpush(0x0260); - break; - case 0x0194: - bufpush(0x0263); - break; - case 0x0196: - bufpush(0x0269); - break; - case 0x0197: - bufpush(0x0268); - break; - case 0x0198: - bufpush(0x0199); - break; - case 0x019C: - bufpush(0x026F); - break; - case 0x019D: - bufpush(0x0272); - break; - case 0x019F: - bufpush(0x0275); - break; - case 0x01A0: - bufpush(0x01A1); - break; - case 0x01A2: - bufpush(0x01A3); - break; - case 0x01A4: - bufpush(0x01A5); - break; - case 0x01A6: - bufpush(0x0280); - break; - case 0x01A7: - bufpush(0x01A8); - break; - case 0x01A9: - bufpush(0x0283); - break; - case 0x01AC: - bufpush(0x01AD); - break; - case 0x01AE: - bufpush(0x0288); - break; - case 0x01AF: - bufpush(0x01B0); - break; - case 0x01B1: - bufpush(0x028A); - break; - case 0x01B2: - bufpush(0x028B); - break; - case 0x01B3: - bufpush(0x01B4); - break; - case 0x01B5: - bufpush(0x01B6); - break; - case 0x01B7: - bufpush(0x0292); - break; - case 0x01B8: - bufpush(0x01B9); - break; - case 0x01BC: - bufpush(0x01BD); - break; - case 0x01C4: - bufpush(0x01C6); - break; - case 0x01C5: - bufpush(0x01C6); - break; - case 0x01C7: - bufpush(0x01C9); - break; - case 0x01C8: - bufpush(0x01C9); - break; - case 0x01CA: - bufpush(0x01CC); - break; - case 0x01CB: - bufpush(0x01CC); - break; - case 0x01CD: - bufpush(0x01CE); - break; - case 0x01CF: - bufpush(0x01D0); - break; - case 0x01D1: - bufpush(0x01D2); - break; - case 0x01D3: - bufpush(0x01D4); - break; - case 0x01D5: - bufpush(0x01D6); - break; - case 0x01D7: - bufpush(0x01D8); - break; - case 0x01D9: - bufpush(0x01DA); - break; - case 0x01DB: - bufpush(0x01DC); - break; - case 0x01DE: - bufpush(0x01DF); - break; - case 0x01E0: - bufpush(0x01E1); - break; - case 0x01E2: - bufpush(0x01E3); - break; - case 0x01E4: - bufpush(0x01E5); - break; - case 0x01E6: - bufpush(0x01E7); - break; - case 0x01E8: - bufpush(0x01E9); - break; - case 0x01EA: - bufpush(0x01EB); - break; - case 0x01EC: - bufpush(0x01ED); - break; - case 0x01EE: - bufpush(0x01EF); - break; - case 0x01F0: - bufpush(0x006A); - bufpush(0x030C); - break; - case 0x01F1: - bufpush(0x01F3); - break; - case 0x01F2: - bufpush(0x01F3); - break; - case 0x01F4: - bufpush(0x01F5); - break; - case 0x01F6: - bufpush(0x0195); - break; - case 0x01F7: - bufpush(0x01BF); - break; - case 0x01F8: - bufpush(0x01F9); - break; - case 0x01FA: - bufpush(0x01FB); - break; - case 0x01FC: - bufpush(0x01FD); - break; - case 0x01FE: - bufpush(0x01FF); - break; - case 0x0200: - bufpush(0x0201); - break; - case 0x0202: - bufpush(0x0203); - break; - case 0x0204: - bufpush(0x0205); - break; - case 0x0206: - bufpush(0x0207); - break; - case 0x0208: - bufpush(0x0209); - break; - case 0x020A: - bufpush(0x020B); - break; - case 0x020C: - bufpush(0x020D); - break; - case 0x020E: - bufpush(0x020F); - break; - case 0x0210: - bufpush(0x0211); - break; - case 0x0212: - bufpush(0x0213); - break; - case 0x0214: - bufpush(0x0215); - break; - case 0x0216: - bufpush(0x0217); - break; - case 0x0218: - bufpush(0x0219); - break; - case 0x021A: - bufpush(0x021B); - break; - case 0x021C: - bufpush(0x021D); - break; - case 0x021E: - bufpush(0x021F); - break; - case 0x0220: - bufpush(0x019E); - break; - case 0x0222: - bufpush(0x0223); - break; - case 0x0224: - bufpush(0x0225); - break; - case 0x0226: - bufpush(0x0227); - break; - case 0x0228: - bufpush(0x0229); - break; - case 0x022A: - bufpush(0x022B); - break; - case 0x022C: - bufpush(0x022D); - break; - case 0x022E: - bufpush(0x022F); - break; - case 0x0230: - bufpush(0x0231); - break; - case 0x0232: - bufpush(0x0233); - break; - case 0x0345: - bufpush(0x03B9); - break; - case 0x0386: - bufpush(0x03AC); - break; - case 0x0388: - bufpush(0x03AD); - break; - case 0x0389: - bufpush(0x03AE); - break; - case 0x038A: - bufpush(0x03AF); - break; - case 0x038C: - bufpush(0x03CC); - break; - case 0x038E: - bufpush(0x03CD); - break; - case 0x038F: - bufpush(0x03CE); - break; - case 0x0390: - bufpush(0x03B9); - bufpush(0x0308); - bufpush(0x0301); - break; - case 0x0391: - bufpush(0x03B1); - break; - case 0x0392: - bufpush(0x03B2); - break; - case 0x0393: - bufpush(0x03B3); - break; - case 0x0394: - bufpush(0x03B4); - break; - case 0x0395: - bufpush(0x03B5); - break; - case 0x0396: - bufpush(0x03B6); - break; - case 0x0397: - bufpush(0x03B7); - break; - case 0x0398: - bufpush(0x03B8); - break; - case 0x0399: - bufpush(0x03B9); - break; - case 0x039A: - bufpush(0x03BA); - break; - case 0x039B: - bufpush(0x03BB); - break; - case 0x039C: - bufpush(0x03BC); - break; - case 0x039D: - bufpush(0x03BD); - break; - case 0x039E: - bufpush(0x03BE); - break; - case 0x039F: - bufpush(0x03BF); - break; - case 0x03A0: - bufpush(0x03C0); - break; - case 0x03A1: - bufpush(0x03C1); - break; - case 0x03A3: - bufpush(0x03C3); - break; - case 0x03A4: - bufpush(0x03C4); - break; - case 0x03A5: - bufpush(0x03C5); - break; - case 0x03A6: - bufpush(0x03C6); - break; - case 0x03A7: - bufpush(0x03C7); - break; - case 0x03A8: - bufpush(0x03C8); - break; - case 0x03A9: - bufpush(0x03C9); - break; - case 0x03AA: - bufpush(0x03CA); - break; - case 0x03AB: - bufpush(0x03CB); - break; - case 0x03B0: - bufpush(0x03C5); - bufpush(0x0308); - bufpush(0x0301); - break; - case 0x03C2: - bufpush(0x03C3); - break; - case 0x03D0: - bufpush(0x03B2); - break; - case 0x03D1: - bufpush(0x03B8); - break; - case 0x03D5: - bufpush(0x03C6); - break; - case 0x03D6: - bufpush(0x03C0); - break; - case 0x03D8: - bufpush(0x03D9); - break; - case 0x03DA: - bufpush(0x03DB); - break; - case 0x03DC: - bufpush(0x03DD); - break; - case 0x03DE: - bufpush(0x03DF); - break; - case 0x03E0: - bufpush(0x03E1); - break; - case 0x03E2: - bufpush(0x03E3); - break; - case 0x03E4: - bufpush(0x03E5); - break; - case 0x03E6: - bufpush(0x03E7); - break; - case 0x03E8: - bufpush(0x03E9); - break; - case 0x03EA: - bufpush(0x03EB); - break; - case 0x03EC: - bufpush(0x03ED); - break; - case 0x03EE: - bufpush(0x03EF); - break; - case 0x03F0: - bufpush(0x03BA); - break; - case 0x03F1: - bufpush(0x03C1); - break; - case 0x03F2: - bufpush(0x03C3); - break; - case 0x03F4: - bufpush(0x03B8); - break; - case 0x03F5: - bufpush(0x03B5); - break; - case 0x0400: - bufpush(0x0450); - break; - case 0x0401: - bufpush(0x0451); - break; - case 0x0402: - bufpush(0x0452); - break; - case 0x0403: - bufpush(0x0453); - break; - case 0x0404: - bufpush(0x0454); - break; - case 0x0405: - bufpush(0x0455); - break; - case 0x0406: - bufpush(0x0456); - break; - case 0x0407: - bufpush(0x0457); - break; - case 0x0408: - bufpush(0x0458); - break; - case 0x0409: - bufpush(0x0459); - break; - case 0x040A: - bufpush(0x045A); - break; - case 0x040B: - bufpush(0x045B); - break; - case 0x040C: - bufpush(0x045C); - break; - case 0x040D: - bufpush(0x045D); - break; - case 0x040E: - bufpush(0x045E); - break; - case 0x040F: - bufpush(0x045F); - break; - case 0x0410: - bufpush(0x0430); - break; - case 0x0411: - bufpush(0x0431); - break; - case 0x0412: - bufpush(0x0432); - break; - case 0x0413: - bufpush(0x0433); - break; - case 0x0414: - bufpush(0x0434); - break; - case 0x0415: - bufpush(0x0435); - break; - case 0x0416: - bufpush(0x0436); - break; - case 0x0417: - bufpush(0x0437); - break; - case 0x0418: - bufpush(0x0438); - break; - case 0x0419: - bufpush(0x0439); - break; - case 0x041A: - bufpush(0x043A); - break; - case 0x041B: - bufpush(0x043B); - break; - case 0x041C: - bufpush(0x043C); - break; - case 0x041D: - bufpush(0x043D); - break; - case 0x041E: - bufpush(0x043E); - break; - case 0x041F: - bufpush(0x043F); - break; - case 0x0420: - bufpush(0x0440); - break; - case 0x0421: - bufpush(0x0441); - break; - case 0x0422: - bufpush(0x0442); - break; - case 0x0423: - bufpush(0x0443); - break; - case 0x0424: - bufpush(0x0444); - break; - case 0x0425: - bufpush(0x0445); - break; - case 0x0426: - bufpush(0x0446); - break; - case 0x0427: - bufpush(0x0447); - break; - case 0x0428: - bufpush(0x0448); - break; - case 0x0429: - bufpush(0x0449); - break; - case 0x042A: - bufpush(0x044A); - break; - case 0x042B: - bufpush(0x044B); - break; - case 0x042C: - bufpush(0x044C); - break; - case 0x042D: - bufpush(0x044D); - break; - case 0x042E: - bufpush(0x044E); - break; - case 0x042F: - bufpush(0x044F); - break; - case 0x0460: - bufpush(0x0461); - break; - case 0x0462: - bufpush(0x0463); - break; - case 0x0464: - bufpush(0x0465); - break; - case 0x0466: - bufpush(0x0467); - break; - case 0x0468: - bufpush(0x0469); - break; - case 0x046A: - bufpush(0x046B); - break; - case 0x046C: - bufpush(0x046D); - break; - case 0x046E: - bufpush(0x046F); - break; - case 0x0470: - bufpush(0x0471); - break; - case 0x0472: - bufpush(0x0473); - break; - case 0x0474: - bufpush(0x0475); - break; - case 0x0476: - bufpush(0x0477); - break; - case 0x0478: - bufpush(0x0479); - break; - case 0x047A: - bufpush(0x047B); - break; - case 0x047C: - bufpush(0x047D); - break; - case 0x047E: - bufpush(0x047F); - break; - case 0x0480: - bufpush(0x0481); - break; - case 0x048A: - bufpush(0x048B); - break; - case 0x048C: - bufpush(0x048D); - break; - case 0x048E: - bufpush(0x048F); - break; - case 0x0490: - bufpush(0x0491); - break; - case 0x0492: - bufpush(0x0493); - break; - case 0x0494: - bufpush(0x0495); - break; - case 0x0496: - bufpush(0x0497); - break; - case 0x0498: - bufpush(0x0499); - break; - case 0x049A: - bufpush(0x049B); - break; - case 0x049C: - bufpush(0x049D); - break; - case 0x049E: - bufpush(0x049F); - break; - case 0x04A0: - bufpush(0x04A1); - break; - case 0x04A2: - bufpush(0x04A3); - break; - case 0x04A4: - bufpush(0x04A5); - break; - case 0x04A6: - bufpush(0x04A7); - break; - case 0x04A8: - bufpush(0x04A9); - break; - case 0x04AA: - bufpush(0x04AB); - break; - case 0x04AC: - bufpush(0x04AD); - break; - case 0x04AE: - bufpush(0x04AF); - break; - case 0x04B0: - bufpush(0x04B1); - break; - case 0x04B2: - bufpush(0x04B3); - break; - case 0x04B4: - bufpush(0x04B5); - break; - case 0x04B6: - bufpush(0x04B7); - break; - case 0x04B8: - bufpush(0x04B9); - break; - case 0x04BA: - bufpush(0x04BB); - break; - case 0x04BC: - bufpush(0x04BD); - break; - case 0x04BE: - bufpush(0x04BF); - break; - case 0x04C1: - bufpush(0x04C2); - break; - case 0x04C3: - bufpush(0x04C4); - break; - case 0x04C5: - bufpush(0x04C6); - break; - case 0x04C7: - bufpush(0x04C8); - break; - case 0x04C9: - bufpush(0x04CA); - break; - case 0x04CB: - bufpush(0x04CC); - break; - case 0x04CD: - bufpush(0x04CE); - break; - case 0x04D0: - bufpush(0x04D1); - break; - case 0x04D2: - bufpush(0x04D3); - break; - case 0x04D4: - bufpush(0x04D5); - break; - case 0x04D6: - bufpush(0x04D7); - break; - case 0x04D8: - bufpush(0x04D9); - break; - case 0x04DA: - bufpush(0x04DB); - break; - case 0x04DC: - bufpush(0x04DD); - break; - case 0x04DE: - bufpush(0x04DF); - break; - case 0x04E0: - bufpush(0x04E1); - break; - case 0x04E2: - bufpush(0x04E3); - break; - case 0x04E4: - bufpush(0x04E5); - break; - case 0x04E6: - bufpush(0x04E7); - break; - case 0x04E8: - bufpush(0x04E9); - break; - case 0x04EA: - bufpush(0x04EB); - break; - case 0x04EC: - bufpush(0x04ED); - break; - case 0x04EE: - bufpush(0x04EF); - break; - case 0x04F0: - bufpush(0x04F1); - break; - case 0x04F2: - bufpush(0x04F3); - break; - case 0x04F4: - bufpush(0x04F5); - break; - case 0x04F8: - bufpush(0x04F9); - break; - case 0x0500: - bufpush(0x0501); - break; - case 0x0502: - bufpush(0x0503); - break; - case 0x0504: - bufpush(0x0505); - break; - case 0x0506: - bufpush(0x0507); - break; - case 0x0508: - bufpush(0x0509); - break; - case 0x050A: - bufpush(0x050B); - break; - case 0x050C: - bufpush(0x050D); - break; - case 0x050E: - bufpush(0x050F); - break; - case 0x0531: - bufpush(0x0561); - break; - case 0x0532: - bufpush(0x0562); - break; - case 0x0533: - bufpush(0x0563); - break; - case 0x0534: - bufpush(0x0564); - break; - case 0x0535: - bufpush(0x0565); - break; - case 0x0536: - bufpush(0x0566); - break; - case 0x0537: - bufpush(0x0567); - break; - case 0x0538: - bufpush(0x0568); - break; - case 0x0539: - bufpush(0x0569); - break; - case 0x053A: - bufpush(0x056A); - break; - case 0x053B: - bufpush(0x056B); - break; - case 0x053C: - bufpush(0x056C); - break; - case 0x053D: - bufpush(0x056D); - break; - case 0x053E: - bufpush(0x056E); - break; - case 0x053F: - bufpush(0x056F); - break; - case 0x0540: - bufpush(0x0570); - break; - case 0x0541: - bufpush(0x0571); - break; - case 0x0542: - bufpush(0x0572); - break; - case 0x0543: - bufpush(0x0573); - break; - case 0x0544: - bufpush(0x0574); - break; - case 0x0545: - bufpush(0x0575); - break; - case 0x0546: - bufpush(0x0576); - break; - case 0x0547: - bufpush(0x0577); - break; - case 0x0548: - bufpush(0x0578); - break; - case 0x0549: - bufpush(0x0579); - break; - case 0x054A: - bufpush(0x057A); - break; - case 0x054B: - bufpush(0x057B); - break; - case 0x054C: - bufpush(0x057C); - break; - case 0x054D: - bufpush(0x057D); - break; - case 0x054E: - bufpush(0x057E); - break; - case 0x054F: - bufpush(0x057F); - break; - case 0x0550: - bufpush(0x0580); - break; - case 0x0551: - bufpush(0x0581); - break; - case 0x0552: - bufpush(0x0582); - break; - case 0x0553: - bufpush(0x0583); - break; - case 0x0554: - bufpush(0x0584); - break; - case 0x0555: - bufpush(0x0585); - break; - case 0x0556: - bufpush(0x0586); - break; - case 0x0587: - bufpush(0x0565); - bufpush(0x0582); - break; - case 0x1E00: - bufpush(0x1E01); - break; - case 0x1E02: - bufpush(0x1E03); - break; - case 0x1E04: - bufpush(0x1E05); - break; - case 0x1E06: - bufpush(0x1E07); - break; - case 0x1E08: - bufpush(0x1E09); - break; - case 0x1E0A: - bufpush(0x1E0B); - break; - case 0x1E0C: - bufpush(0x1E0D); - break; - case 0x1E0E: - bufpush(0x1E0F); - break; - case 0x1E10: - bufpush(0x1E11); - break; - case 0x1E12: - bufpush(0x1E13); - break; - case 0x1E14: - bufpush(0x1E15); - break; - case 0x1E16: - bufpush(0x1E17); - break; - case 0x1E18: - bufpush(0x1E19); - break; - case 0x1E1A: - bufpush(0x1E1B); - break; - case 0x1E1C: - bufpush(0x1E1D); - break; - case 0x1E1E: - bufpush(0x1E1F); - break; - case 0x1E20: - bufpush(0x1E21); - break; - case 0x1E22: - bufpush(0x1E23); - break; - case 0x1E24: - bufpush(0x1E25); - break; - case 0x1E26: - bufpush(0x1E27); - break; - case 0x1E28: - bufpush(0x1E29); - break; - case 0x1E2A: - bufpush(0x1E2B); - break; - case 0x1E2C: - bufpush(0x1E2D); - break; - case 0x1E2E: - bufpush(0x1E2F); - break; - case 0x1E30: - bufpush(0x1E31); - break; - case 0x1E32: - bufpush(0x1E33); - break; - case 0x1E34: - bufpush(0x1E35); - break; - case 0x1E36: - bufpush(0x1E37); - break; - case 0x1E38: - bufpush(0x1E39); - break; - case 0x1E3A: - bufpush(0x1E3B); - break; - case 0x1E3C: - bufpush(0x1E3D); - break; - case 0x1E3E: - bufpush(0x1E3F); - break; - case 0x1E40: - bufpush(0x1E41); - break; - case 0x1E42: - bufpush(0x1E43); - break; - case 0x1E44: - bufpush(0x1E45); - break; - case 0x1E46: - bufpush(0x1E47); - break; - case 0x1E48: - bufpush(0x1E49); - break; - case 0x1E4A: - bufpush(0x1E4B); - break; - case 0x1E4C: - bufpush(0x1E4D); - break; - case 0x1E4E: - bufpush(0x1E4F); - break; - case 0x1E50: - bufpush(0x1E51); - break; - case 0x1E52: - bufpush(0x1E53); - break; - case 0x1E54: - bufpush(0x1E55); - break; - case 0x1E56: - bufpush(0x1E57); - break; - case 0x1E58: - bufpush(0x1E59); - break; - case 0x1E5A: - bufpush(0x1E5B); - break; - case 0x1E5C: - bufpush(0x1E5D); - break; - case 0x1E5E: - bufpush(0x1E5F); - break; - case 0x1E60: - bufpush(0x1E61); - break; - case 0x1E62: - bufpush(0x1E63); - break; - case 0x1E64: - bufpush(0x1E65); - break; - case 0x1E66: - bufpush(0x1E67); - break; - case 0x1E68: - bufpush(0x1E69); - break; - case 0x1E6A: - bufpush(0x1E6B); - break; - case 0x1E6C: - bufpush(0x1E6D); - break; - case 0x1E6E: - bufpush(0x1E6F); - break; - case 0x1E70: - bufpush(0x1E71); - break; - case 0x1E72: - bufpush(0x1E73); - break; - case 0x1E74: - bufpush(0x1E75); - break; - case 0x1E76: - bufpush(0x1E77); - break; - case 0x1E78: - bufpush(0x1E79); - break; - case 0x1E7A: - bufpush(0x1E7B); - break; - case 0x1E7C: - bufpush(0x1E7D); - break; - case 0x1E7E: - bufpush(0x1E7F); - break; - case 0x1E80: - bufpush(0x1E81); - break; - case 0x1E82: - bufpush(0x1E83); - break; - case 0x1E84: - bufpush(0x1E85); - break; - case 0x1E86: - bufpush(0x1E87); - break; - case 0x1E88: - bufpush(0x1E89); - break; - case 0x1E8A: - bufpush(0x1E8B); - break; - case 0x1E8C: - bufpush(0x1E8D); - break; - case 0x1E8E: - bufpush(0x1E8F); - break; - case 0x1E90: - bufpush(0x1E91); - break; - case 0x1E92: - bufpush(0x1E93); - break; - case 0x1E94: - bufpush(0x1E95); - break; - case 0x1E96: - bufpush(0x0068); - bufpush(0x0331); - break; - case 0x1E97: - bufpush(0x0074); - bufpush(0x0308); - break; - case 0x1E98: - bufpush(0x0077); - bufpush(0x030A); - break; - case 0x1E99: - bufpush(0x0079); - bufpush(0x030A); - break; - case 0x1E9A: - bufpush(0x0061); - bufpush(0x02BE); - break; - case 0x1E9B: - bufpush(0x1E61); - break; - case 0x1EA0: - bufpush(0x1EA1); - break; - case 0x1EA2: - bufpush(0x1EA3); - break; - case 0x1EA4: - bufpush(0x1EA5); - break; - case 0x1EA6: - bufpush(0x1EA7); - break; - case 0x1EA8: - bufpush(0x1EA9); - break; - case 0x1EAA: - bufpush(0x1EAB); - break; - case 0x1EAC: - bufpush(0x1EAD); - break; - case 0x1EAE: - bufpush(0x1EAF); - break; - case 0x1EB0: - bufpush(0x1EB1); - break; - case 0x1EB2: - bufpush(0x1EB3); - break; - case 0x1EB4: - bufpush(0x1EB5); - break; - case 0x1EB6: - bufpush(0x1EB7); - break; - case 0x1EB8: - bufpush(0x1EB9); - break; - case 0x1EBA: - bufpush(0x1EBB); - break; - case 0x1EBC: - bufpush(0x1EBD); - break; - case 0x1EBE: - bufpush(0x1EBF); - break; - case 0x1EC0: - bufpush(0x1EC1); - break; - case 0x1EC2: - bufpush(0x1EC3); - break; - case 0x1EC4: - bufpush(0x1EC5); - break; - case 0x1EC6: - bufpush(0x1EC7); - break; - case 0x1EC8: - bufpush(0x1EC9); - break; - case 0x1ECA: - bufpush(0x1ECB); - break; - case 0x1ECC: - bufpush(0x1ECD); - break; - case 0x1ECE: - bufpush(0x1ECF); - break; - case 0x1ED0: - bufpush(0x1ED1); - break; - case 0x1ED2: - bufpush(0x1ED3); - break; - case 0x1ED4: - bufpush(0x1ED5); - break; - case 0x1ED6: - bufpush(0x1ED7); - break; - case 0x1ED8: - bufpush(0x1ED9); - break; - case 0x1EDA: - bufpush(0x1EDB); - break; - case 0x1EDC: - bufpush(0x1EDD); - break; - case 0x1EDE: - bufpush(0x1EDF); - break; - case 0x1EE0: - bufpush(0x1EE1); - break; - case 0x1EE2: - bufpush(0x1EE3); - break; - case 0x1EE4: - bufpush(0x1EE5); - break; - case 0x1EE6: - bufpush(0x1EE7); - break; - case 0x1EE8: - bufpush(0x1EE9); - break; - case 0x1EEA: - bufpush(0x1EEB); - break; - case 0x1EEC: - bufpush(0x1EED); - break; - case 0x1EEE: - bufpush(0x1EEF); - break; - case 0x1EF0: - bufpush(0x1EF1); - break; - case 0x1EF2: - bufpush(0x1EF3); - break; - case 0x1EF4: - bufpush(0x1EF5); - break; - case 0x1EF6: - bufpush(0x1EF7); - break; - case 0x1EF8: - bufpush(0x1EF9); - break; - case 0x1F08: - bufpush(0x1F00); - break; - case 0x1F09: - bufpush(0x1F01); - break; - case 0x1F0A: - bufpush(0x1F02); - break; - case 0x1F0B: - bufpush(0x1F03); - break; - case 0x1F0C: - bufpush(0x1F04); - break; - case 0x1F0D: - bufpush(0x1F05); - break; - case 0x1F0E: - bufpush(0x1F06); - break; - case 0x1F0F: - bufpush(0x1F07); - break; - case 0x1F18: - bufpush(0x1F10); - break; - case 0x1F19: - bufpush(0x1F11); - break; - case 0x1F1A: - bufpush(0x1F12); - break; - case 0x1F1B: - bufpush(0x1F13); - break; - case 0x1F1C: - bufpush(0x1F14); - break; - case 0x1F1D: - bufpush(0x1F15); - break; - case 0x1F28: - bufpush(0x1F20); - break; - case 0x1F29: - bufpush(0x1F21); - break; - case 0x1F2A: - bufpush(0x1F22); - break; - case 0x1F2B: - bufpush(0x1F23); - break; - case 0x1F2C: - bufpush(0x1F24); - break; - case 0x1F2D: - bufpush(0x1F25); - break; - case 0x1F2E: - bufpush(0x1F26); - break; - case 0x1F2F: - bufpush(0x1F27); - break; - case 0x1F38: - bufpush(0x1F30); - break; - case 0x1F39: - bufpush(0x1F31); - break; - case 0x1F3A: - bufpush(0x1F32); - break; - case 0x1F3B: - bufpush(0x1F33); - break; - case 0x1F3C: - bufpush(0x1F34); - break; - case 0x1F3D: - bufpush(0x1F35); - break; - case 0x1F3E: - bufpush(0x1F36); - break; - case 0x1F3F: - bufpush(0x1F37); - break; - case 0x1F48: - bufpush(0x1F40); - break; - case 0x1F49: - bufpush(0x1F41); - break; - case 0x1F4A: - bufpush(0x1F42); - break; - case 0x1F4B: - bufpush(0x1F43); - break; - case 0x1F4C: - bufpush(0x1F44); - break; - case 0x1F4D: - bufpush(0x1F45); - break; - case 0x1F50: - bufpush(0x03C5); - bufpush(0x0313); - break; - case 0x1F52: - bufpush(0x03C5); - bufpush(0x0313); - bufpush(0x0300); - break; - case 0x1F54: - bufpush(0x03C5); - bufpush(0x0313); - bufpush(0x0301); - break; - case 0x1F56: - bufpush(0x03C5); - bufpush(0x0313); - bufpush(0x0342); - break; - case 0x1F59: - bufpush(0x1F51); - break; - case 0x1F5B: - bufpush(0x1F53); - break; - case 0x1F5D: - bufpush(0x1F55); - break; - case 0x1F5F: - bufpush(0x1F57); - break; - case 0x1F68: - bufpush(0x1F60); - break; - case 0x1F69: - bufpush(0x1F61); - break; - case 0x1F6A: - bufpush(0x1F62); - break; - case 0x1F6B: - bufpush(0x1F63); - break; - case 0x1F6C: - bufpush(0x1F64); - break; - case 0x1F6D: - bufpush(0x1F65); - break; - case 0x1F6E: - bufpush(0x1F66); - break; - case 0x1F6F: - bufpush(0x1F67); - break; - case 0x1F80: - bufpush(0x1F00); - bufpush(0x03B9); - break; - case 0x1F81: - bufpush(0x1F01); - bufpush(0x03B9); - break; - case 0x1F82: - bufpush(0x1F02); - bufpush(0x03B9); - break; - case 0x1F83: - bufpush(0x1F03); - bufpush(0x03B9); - break; - case 0x1F84: - bufpush(0x1F04); - bufpush(0x03B9); - break; - case 0x1F85: - bufpush(0x1F05); - bufpush(0x03B9); - break; - case 0x1F86: - bufpush(0x1F06); - bufpush(0x03B9); - break; - case 0x1F87: - bufpush(0x1F07); - bufpush(0x03B9); - break; - case 0x1F88: - bufpush(0x1F00); - bufpush(0x03B9); - break; - case 0x1F89: - bufpush(0x1F01); - bufpush(0x03B9); - break; - case 0x1F8A: - bufpush(0x1F02); - bufpush(0x03B9); - break; - case 0x1F8B: - bufpush(0x1F03); - bufpush(0x03B9); - break; - case 0x1F8C: - bufpush(0x1F04); - bufpush(0x03B9); - break; - case 0x1F8D: - bufpush(0x1F05); - bufpush(0x03B9); - break; - case 0x1F8E: - bufpush(0x1F06); - bufpush(0x03B9); - break; - case 0x1F8F: - bufpush(0x1F07); - bufpush(0x03B9); - break; - case 0x1F90: - bufpush(0x1F20); - bufpush(0x03B9); - break; - case 0x1F91: - bufpush(0x1F21); - bufpush(0x03B9); - break; - case 0x1F92: - bufpush(0x1F22); - bufpush(0x03B9); - break; - case 0x1F93: - bufpush(0x1F23); - bufpush(0x03B9); - break; - case 0x1F94: - bufpush(0x1F24); - bufpush(0x03B9); - break; - case 0x1F95: - bufpush(0x1F25); - bufpush(0x03B9); - break; - case 0x1F96: - bufpush(0x1F26); - bufpush(0x03B9); - break; - case 0x1F97: - bufpush(0x1F27); - bufpush(0x03B9); - break; - case 0x1F98: - bufpush(0x1F20); - bufpush(0x03B9); - break; - case 0x1F99: - bufpush(0x1F21); - bufpush(0x03B9); - break; - case 0x1F9A: - bufpush(0x1F22); - bufpush(0x03B9); - break; - case 0x1F9B: - bufpush(0x1F23); - bufpush(0x03B9); - break; - case 0x1F9C: - bufpush(0x1F24); - bufpush(0x03B9); - break; - case 0x1F9D: - bufpush(0x1F25); - bufpush(0x03B9); - break; - case 0x1F9E: - bufpush(0x1F26); - bufpush(0x03B9); - break; - case 0x1F9F: - bufpush(0x1F27); - bufpush(0x03B9); - break; - case 0x1FA0: - bufpush(0x1F60); - bufpush(0x03B9); - break; - case 0x1FA1: - bufpush(0x1F61); - bufpush(0x03B9); - break; - case 0x1FA2: - bufpush(0x1F62); - bufpush(0x03B9); - break; - case 0x1FA3: - bufpush(0x1F63); - bufpush(0x03B9); - break; - case 0x1FA4: - bufpush(0x1F64); - bufpush(0x03B9); - break; - case 0x1FA5: - bufpush(0x1F65); - bufpush(0x03B9); - break; - case 0x1FA6: - bufpush(0x1F66); - bufpush(0x03B9); - break; - case 0x1FA7: - bufpush(0x1F67); - bufpush(0x03B9); - break; - case 0x1FA8: - bufpush(0x1F60); - bufpush(0x03B9); - break; - case 0x1FA9: - bufpush(0x1F61); - bufpush(0x03B9); - break; - case 0x1FAA: - bufpush(0x1F62); - bufpush(0x03B9); - break; - case 0x1FAB: - bufpush(0x1F63); - bufpush(0x03B9); - break; - case 0x1FAC: - bufpush(0x1F64); - bufpush(0x03B9); - break; - case 0x1FAD: - bufpush(0x1F65); - bufpush(0x03B9); - break; - case 0x1FAE: - bufpush(0x1F66); - bufpush(0x03B9); - break; - case 0x1FAF: - bufpush(0x1F67); - bufpush(0x03B9); - break; - case 0x1FB2: - bufpush(0x1F70); - bufpush(0x03B9); - break; - case 0x1FB3: - bufpush(0x03B1); - bufpush(0x03B9); - break; - case 0x1FB4: - bufpush(0x03AC); - bufpush(0x03B9); - break; - case 0x1FB6: - bufpush(0x03B1); - bufpush(0x0342); - break; - case 0x1FB7: - bufpush(0x03B1); - bufpush(0x0342); - bufpush(0x03B9); - break; - case 0x1FB8: - bufpush(0x1FB0); - break; - case 0x1FB9: - bufpush(0x1FB1); - break; - case 0x1FBA: - bufpush(0x1F70); - break; - case 0x1FBB: - bufpush(0x1F71); - break; - case 0x1FBC: - bufpush(0x03B1); - bufpush(0x03B9); - break; - case 0x1FBE: - bufpush(0x03B9); - break; - case 0x1FC2: - bufpush(0x1F74); - bufpush(0x03B9); - break; - case 0x1FC3: - bufpush(0x03B7); - bufpush(0x03B9); - break; - case 0x1FC4: - bufpush(0x03AE); - bufpush(0x03B9); - break; - case 0x1FC6: - bufpush(0x03B7); - bufpush(0x0342); - break; - case 0x1FC7: - bufpush(0x03B7); - bufpush(0x0342); - bufpush(0x03B9); - break; - case 0x1FC8: - bufpush(0x1F72); - break; - case 0x1FC9: - bufpush(0x1F73); - break; - case 0x1FCA: - bufpush(0x1F74); - break; - case 0x1FCB: - bufpush(0x1F75); - break; - case 0x1FCC: - bufpush(0x03B7); - bufpush(0x03B9); - break; - case 0x1FD2: - bufpush(0x03B9); - bufpush(0x0308); - bufpush(0x0300); - break; - case 0x1FD3: - bufpush(0x03B9); - bufpush(0x0308); - bufpush(0x0301); - break; - case 0x1FD6: - bufpush(0x03B9); - bufpush(0x0342); - break; - case 0x1FD7: - bufpush(0x03B9); - bufpush(0x0308); - bufpush(0x0342); - break; - case 0x1FD8: - bufpush(0x1FD0); - break; - case 0x1FD9: - bufpush(0x1FD1); - break; - case 0x1FDA: - bufpush(0x1F76); - break; - case 0x1FDB: - bufpush(0x1F77); - break; - case 0x1FE2: - bufpush(0x03C5); - bufpush(0x0308); - bufpush(0x0300); - break; - case 0x1FE3: - bufpush(0x03C5); - bufpush(0x0308); - bufpush(0x0301); - break; - case 0x1FE4: - bufpush(0x03C1); - bufpush(0x0313); - break; - case 0x1FE6: - bufpush(0x03C5); - bufpush(0x0342); - break; - case 0x1FE7: - bufpush(0x03C5); - bufpush(0x0308); - bufpush(0x0342); - break; - case 0x1FE8: - bufpush(0x1FE0); - break; - case 0x1FE9: - bufpush(0x1FE1); - break; - case 0x1FEA: - bufpush(0x1F7A); - break; - case 0x1FEB: - bufpush(0x1F7B); - break; - case 0x1FEC: - bufpush(0x1FE5); - break; - case 0x1FF2: - bufpush(0x1F7C); - bufpush(0x03B9); - break; - case 0x1FF3: - bufpush(0x03C9); - bufpush(0x03B9); - break; - case 0x1FF4: - bufpush(0x03CE); - bufpush(0x03B9); - break; - case 0x1FF6: - bufpush(0x03C9); - bufpush(0x0342); - break; - case 0x1FF7: - bufpush(0x03C9); - bufpush(0x0342); - bufpush(0x03B9); - break; - case 0x1FF8: - bufpush(0x1F78); - break; - case 0x1FF9: - bufpush(0x1F79); - break; - case 0x1FFA: - bufpush(0x1F7C); - break; - case 0x1FFB: - bufpush(0x1F7D); - break; - case 0x1FFC: - bufpush(0x03C9); - bufpush(0x03B9); - break; - case 0x2126: - bufpush(0x03C9); - break; - case 0x212A: - bufpush(0x006B); - break; - case 0x212B: - bufpush(0x00E5); - break; - case 0x2160: - bufpush(0x2170); - break; - case 0x2161: - bufpush(0x2171); - break; - case 0x2162: - bufpush(0x2172); - break; - case 0x2163: - bufpush(0x2173); - break; - case 0x2164: - bufpush(0x2174); - break; - case 0x2165: - bufpush(0x2175); - break; - case 0x2166: - bufpush(0x2176); - break; - case 0x2167: - bufpush(0x2177); - break; - case 0x2168: - bufpush(0x2178); - break; - case 0x2169: - bufpush(0x2179); - break; - case 0x216A: - bufpush(0x217A); - break; - case 0x216B: - bufpush(0x217B); - break; - case 0x216C: - bufpush(0x217C); - break; - case 0x216D: - bufpush(0x217D); - break; - case 0x216E: - bufpush(0x217E); - break; - case 0x216F: - bufpush(0x217F); - break; - case 0x24B6: - bufpush(0x24D0); - break; - case 0x24B7: - bufpush(0x24D1); - break; - case 0x24B8: - bufpush(0x24D2); - break; - case 0x24B9: - bufpush(0x24D3); - break; - case 0x24BA: - bufpush(0x24D4); - break; - case 0x24BB: - bufpush(0x24D5); - break; - case 0x24BC: - bufpush(0x24D6); - break; - case 0x24BD: - bufpush(0x24D7); - break; - case 0x24BE: - bufpush(0x24D8); - break; - case 0x24BF: - bufpush(0x24D9); - break; - case 0x24C0: - bufpush(0x24DA); - break; - case 0x24C1: - bufpush(0x24DB); - break; - case 0x24C2: - bufpush(0x24DC); - break; - case 0x24C3: - bufpush(0x24DD); - break; - case 0x24C4: - bufpush(0x24DE); - break; - case 0x24C5: - bufpush(0x24DF); - break; - case 0x24C6: - bufpush(0x24E0); - break; - case 0x24C7: - bufpush(0x24E1); - break; - case 0x24C8: - bufpush(0x24E2); - break; - case 0x24C9: - bufpush(0x24E3); - break; - case 0x24CA: - bufpush(0x24E4); - break; - case 0x24CB: - bufpush(0x24E5); - break; - case 0x24CC: - bufpush(0x24E6); - break; - case 0x24CD: - bufpush(0x24E7); - break; - case 0x24CE: - bufpush(0x24E8); - break; - case 0x24CF: - bufpush(0x24E9); - break; - case 0xFB00: - bufpush(0x0066); - bufpush(0x0066); - break; - case 0xFB01: - bufpush(0x0066); - bufpush(0x0069); - break; - case 0xFB02: - bufpush(0x0066); - bufpush(0x006C); - break; - case 0xFB03: - bufpush(0x0066); - bufpush(0x0066); - bufpush(0x0069); - break; - case 0xFB04: - bufpush(0x0066); - bufpush(0x0066); - bufpush(0x006C); - break; - case 0xFB05: - bufpush(0x0073); - bufpush(0x0074); - break; - case 0xFB06: - bufpush(0x0073); - bufpush(0x0074); - break; - case 0xFB13: - bufpush(0x0574); - bufpush(0x0576); - break; - case 0xFB14: - bufpush(0x0574); - bufpush(0x0565); - break; - case 0xFB15: - bufpush(0x0574); - bufpush(0x056B); - break; - case 0xFB16: - bufpush(0x057E); - bufpush(0x0576); - break; - case 0xFB17: - bufpush(0x0574); - bufpush(0x056D); - break; - case 0xFF21: - bufpush(0xFF41); - break; - case 0xFF22: - bufpush(0xFF42); - break; - case 0xFF23: - bufpush(0xFF43); - break; - case 0xFF24: - bufpush(0xFF44); - break; - case 0xFF25: - bufpush(0xFF45); - break; - case 0xFF26: - bufpush(0xFF46); - break; - case 0xFF27: - bufpush(0xFF47); - break; - case 0xFF28: - bufpush(0xFF48); - break; - case 0xFF29: - bufpush(0xFF49); - break; - case 0xFF2A: - bufpush(0xFF4A); - break; - case 0xFF2B: - bufpush(0xFF4B); - break; - case 0xFF2C: - bufpush(0xFF4C); - break; - case 0xFF2D: - bufpush(0xFF4D); - break; - case 0xFF2E: - bufpush(0xFF4E); - break; - case 0xFF2F: - bufpush(0xFF4F); - break; - case 0xFF30: - bufpush(0xFF50); - break; - case 0xFF31: - bufpush(0xFF51); - break; - case 0xFF32: - bufpush(0xFF52); - break; - case 0xFF33: - bufpush(0xFF53); - break; - case 0xFF34: - bufpush(0xFF54); - break; - case 0xFF35: - bufpush(0xFF55); - break; - case 0xFF36: - bufpush(0xFF56); - break; - case 0xFF37: - bufpush(0xFF57); - break; - case 0xFF38: - bufpush(0xFF58); - break; - case 0xFF39: - bufpush(0xFF59); - break; - case 0xFF3A: - bufpush(0xFF5A); - break; - case 0x10400: - bufpush(0x10428); - break; - case 0x10401: - bufpush(0x10429); - break; - case 0x10402: - bufpush(0x1042A); - break; - case 0x10403: - bufpush(0x1042B); - break; - case 0x10404: - bufpush(0x1042C); - break; - case 0x10405: - bufpush(0x1042D); - break; - case 0x10406: - bufpush(0x1042E); - break; - case 0x10407: - bufpush(0x1042F); - break; - case 0x10408: - bufpush(0x10430); - break; - case 0x10409: - bufpush(0x10431); - break; - case 0x1040A: - bufpush(0x10432); - break; - case 0x1040B: - bufpush(0x10433); - break; - case 0x1040C: - bufpush(0x10434); - break; - case 0x1040D: - bufpush(0x10435); - break; - case 0x1040E: - bufpush(0x10436); - break; - case 0x1040F: - bufpush(0x10437); - break; - case 0x10410: - bufpush(0x10438); - break; - case 0x10411: - bufpush(0x10439); - break; - case 0x10412: - bufpush(0x1043A); - break; - case 0x10413: - bufpush(0x1043B); - break; - case 0x10414: - bufpush(0x1043C); - break; - case 0x10415: - bufpush(0x1043D); - break; - case 0x10416: - bufpush(0x1043E); - break; - case 0x10417: - bufpush(0x1043F); - break; - case 0x10418: - bufpush(0x10440); - break; - case 0x10419: - bufpush(0x10441); - break; - case 0x1041A: - bufpush(0x10442); - break; - case 0x1041B: - bufpush(0x10443); - break; - case 0x1041C: - bufpush(0x10444); - break; - case 0x1041D: - bufpush(0x10445); - break; - case 0x1041E: - bufpush(0x10446); - break; - case 0x1041F: - bufpush(0x10447); - break; - case 0x10420: - bufpush(0x10448); - break; - case 0x10421: - bufpush(0x10449); - break; - case 0x10422: - bufpush(0x1044A); - break; - case 0x10423: - bufpush(0x1044B); - break; - case 0x10424: - bufpush(0x1044C); - break; - case 0x10425: - bufpush(0x1044D); - break; - default: - bufpush(c); - } diff --git a/src/case_fold_switch.inc b/src/case_fold_switch.inc new file mode 100644 index 0000000..70fdd75 --- /dev/null +++ b/src/case_fold_switch.inc @@ -0,0 +1,2637 @@ + switch (c) { + case 0x0041: + bufpush(0x0061); + break; + case 0x0042: + bufpush(0x0062); + break; + case 0x0043: + bufpush(0x0063); + break; + case 0x0044: + bufpush(0x0064); + break; + case 0x0045: + bufpush(0x0065); + break; + case 0x0046: + bufpush(0x0066); + break; + case 0x0047: + bufpush(0x0067); + break; + case 0x0048: + bufpush(0x0068); + break; + case 0x0049: + bufpush(0x0069); + break; + case 0x004A: + bufpush(0x006A); + break; + case 0x004B: + bufpush(0x006B); + break; + case 0x004C: + bufpush(0x006C); + break; + case 0x004D: + bufpush(0x006D); + break; + case 0x004E: + bufpush(0x006E); + break; + case 0x004F: + bufpush(0x006F); + break; + case 0x0050: + bufpush(0x0070); + break; + case 0x0051: + bufpush(0x0071); + break; + case 0x0052: + bufpush(0x0072); + break; + case 0x0053: + bufpush(0x0073); + break; + case 0x0054: + bufpush(0x0074); + break; + case 0x0055: + bufpush(0x0075); + break; + case 0x0056: + bufpush(0x0076); + break; + case 0x0057: + bufpush(0x0077); + break; + case 0x0058: + bufpush(0x0078); + break; + case 0x0059: + bufpush(0x0079); + break; + case 0x005A: + bufpush(0x007A); + break; + case 0x00B5: + bufpush(0x03BC); + break; + case 0x00C0: + bufpush(0x00E0); + break; + case 0x00C1: + bufpush(0x00E1); + break; + case 0x00C2: + bufpush(0x00E2); + break; + case 0x00C3: + bufpush(0x00E3); + break; + case 0x00C4: + bufpush(0x00E4); + break; + case 0x00C5: + bufpush(0x00E5); + break; + case 0x00C6: + bufpush(0x00E6); + break; + case 0x00C7: + bufpush(0x00E7); + break; + case 0x00C8: + bufpush(0x00E8); + break; + case 0x00C9: + bufpush(0x00E9); + break; + case 0x00CA: + bufpush(0x00EA); + break; + case 0x00CB: + bufpush(0x00EB); + break; + case 0x00CC: + bufpush(0x00EC); + break; + case 0x00CD: + bufpush(0x00ED); + break; + case 0x00CE: + bufpush(0x00EE); + break; + case 0x00CF: + bufpush(0x00EF); + break; + case 0x00D0: + bufpush(0x00F0); + break; + case 0x00D1: + bufpush(0x00F1); + break; + case 0x00D2: + bufpush(0x00F2); + break; + case 0x00D3: + bufpush(0x00F3); + break; + case 0x00D4: + bufpush(0x00F4); + break; + case 0x00D5: + bufpush(0x00F5); + break; + case 0x00D6: + bufpush(0x00F6); + break; + case 0x00D8: + bufpush(0x00F8); + break; + case 0x00D9: + bufpush(0x00F9); + break; + case 0x00DA: + bufpush(0x00FA); + break; + case 0x00DB: + bufpush(0x00FB); + break; + case 0x00DC: + bufpush(0x00FC); + break; + case 0x00DD: + bufpush(0x00FD); + break; + case 0x00DE: + bufpush(0x00FE); + break; + case 0x00DF: + bufpush(0x0073); + bufpush(0x0073); + break; + case 0x0100: + bufpush(0x0101); + break; + case 0x0102: + bufpush(0x0103); + break; + case 0x0104: + bufpush(0x0105); + break; + case 0x0106: + bufpush(0x0107); + break; + case 0x0108: + bufpush(0x0109); + break; + case 0x010A: + bufpush(0x010B); + break; + case 0x010C: + bufpush(0x010D); + break; + case 0x010E: + bufpush(0x010F); + break; + case 0x0110: + bufpush(0x0111); + break; + case 0x0112: + bufpush(0x0113); + break; + case 0x0114: + bufpush(0x0115); + break; + case 0x0116: + bufpush(0x0117); + break; + case 0x0118: + bufpush(0x0119); + break; + case 0x011A: + bufpush(0x011B); + break; + case 0x011C: + bufpush(0x011D); + break; + case 0x011E: + bufpush(0x011F); + break; + case 0x0120: + bufpush(0x0121); + break; + case 0x0122: + bufpush(0x0123); + break; + case 0x0124: + bufpush(0x0125); + break; + case 0x0126: + bufpush(0x0127); + break; + case 0x0128: + bufpush(0x0129); + break; + case 0x012A: + bufpush(0x012B); + break; + case 0x012C: + bufpush(0x012D); + break; + case 0x012E: + bufpush(0x012F); + break; + case 0x0130: + bufpush(0x0069); + bufpush(0x0307); + break; + case 0x0132: + bufpush(0x0133); + break; + case 0x0134: + bufpush(0x0135); + break; + case 0x0136: + bufpush(0x0137); + break; + case 0x0139: + bufpush(0x013A); + break; + case 0x013B: + bufpush(0x013C); + break; + case 0x013D: + bufpush(0x013E); + break; + case 0x013F: + bufpush(0x0140); + break; + case 0x0141: + bufpush(0x0142); + break; + case 0x0143: + bufpush(0x0144); + break; + case 0x0145: + bufpush(0x0146); + break; + case 0x0147: + bufpush(0x0148); + break; + case 0x0149: + bufpush(0x02BC); + bufpush(0x006E); + break; + case 0x014A: + bufpush(0x014B); + break; + case 0x014C: + bufpush(0x014D); + break; + case 0x014E: + bufpush(0x014F); + break; + case 0x0150: + bufpush(0x0151); + break; + case 0x0152: + bufpush(0x0153); + break; + case 0x0154: + bufpush(0x0155); + break; + case 0x0156: + bufpush(0x0157); + break; + case 0x0158: + bufpush(0x0159); + break; + case 0x015A: + bufpush(0x015B); + break; + case 0x015C: + bufpush(0x015D); + break; + case 0x015E: + bufpush(0x015F); + break; + case 0x0160: + bufpush(0x0161); + break; + case 0x0162: + bufpush(0x0163); + break; + case 0x0164: + bufpush(0x0165); + break; + case 0x0166: + bufpush(0x0167); + break; + case 0x0168: + bufpush(0x0169); + break; + case 0x016A: + bufpush(0x016B); + break; + case 0x016C: + bufpush(0x016D); + break; + case 0x016E: + bufpush(0x016F); + break; + case 0x0170: + bufpush(0x0171); + break; + case 0x0172: + bufpush(0x0173); + break; + case 0x0174: + bufpush(0x0175); + break; + case 0x0176: + bufpush(0x0177); + break; + case 0x0178: + bufpush(0x00FF); + break; + case 0x0179: + bufpush(0x017A); + break; + case 0x017B: + bufpush(0x017C); + break; + case 0x017D: + bufpush(0x017E); + break; + case 0x017F: + bufpush(0x0073); + break; + case 0x0181: + bufpush(0x0253); + break; + case 0x0182: + bufpush(0x0183); + break; + case 0x0184: + bufpush(0x0185); + break; + case 0x0186: + bufpush(0x0254); + break; + case 0x0187: + bufpush(0x0188); + break; + case 0x0189: + bufpush(0x0256); + break; + case 0x018A: + bufpush(0x0257); + break; + case 0x018B: + bufpush(0x018C); + break; + case 0x018E: + bufpush(0x01DD); + break; + case 0x018F: + bufpush(0x0259); + break; + case 0x0190: + bufpush(0x025B); + break; + case 0x0191: + bufpush(0x0192); + break; + case 0x0193: + bufpush(0x0260); + break; + case 0x0194: + bufpush(0x0263); + break; + case 0x0196: + bufpush(0x0269); + break; + case 0x0197: + bufpush(0x0268); + break; + case 0x0198: + bufpush(0x0199); + break; + case 0x019C: + bufpush(0x026F); + break; + case 0x019D: + bufpush(0x0272); + break; + case 0x019F: + bufpush(0x0275); + break; + case 0x01A0: + bufpush(0x01A1); + break; + case 0x01A2: + bufpush(0x01A3); + break; + case 0x01A4: + bufpush(0x01A5); + break; + case 0x01A6: + bufpush(0x0280); + break; + case 0x01A7: + bufpush(0x01A8); + break; + case 0x01A9: + bufpush(0x0283); + break; + case 0x01AC: + bufpush(0x01AD); + break; + case 0x01AE: + bufpush(0x0288); + break; + case 0x01AF: + bufpush(0x01B0); + break; + case 0x01B1: + bufpush(0x028A); + break; + case 0x01B2: + bufpush(0x028B); + break; + case 0x01B3: + bufpush(0x01B4); + break; + case 0x01B5: + bufpush(0x01B6); + break; + case 0x01B7: + bufpush(0x0292); + break; + case 0x01B8: + bufpush(0x01B9); + break; + case 0x01BC: + bufpush(0x01BD); + break; + case 0x01C4: + bufpush(0x01C6); + break; + case 0x01C5: + bufpush(0x01C6); + break; + case 0x01C7: + bufpush(0x01C9); + break; + case 0x01C8: + bufpush(0x01C9); + break; + case 0x01CA: + bufpush(0x01CC); + break; + case 0x01CB: + bufpush(0x01CC); + break; + case 0x01CD: + bufpush(0x01CE); + break; + case 0x01CF: + bufpush(0x01D0); + break; + case 0x01D1: + bufpush(0x01D2); + break; + case 0x01D3: + bufpush(0x01D4); + break; + case 0x01D5: + bufpush(0x01D6); + break; + case 0x01D7: + bufpush(0x01D8); + break; + case 0x01D9: + bufpush(0x01DA); + break; + case 0x01DB: + bufpush(0x01DC); + break; + case 0x01DE: + bufpush(0x01DF); + break; + case 0x01E0: + bufpush(0x01E1); + break; + case 0x01E2: + bufpush(0x01E3); + break; + case 0x01E4: + bufpush(0x01E5); + break; + case 0x01E6: + bufpush(0x01E7); + break; + case 0x01E8: + bufpush(0x01E9); + break; + case 0x01EA: + bufpush(0x01EB); + break; + case 0x01EC: + bufpush(0x01ED); + break; + case 0x01EE: + bufpush(0x01EF); + break; + case 0x01F0: + bufpush(0x006A); + bufpush(0x030C); + break; + case 0x01F1: + bufpush(0x01F3); + break; + case 0x01F2: + bufpush(0x01F3); + break; + case 0x01F4: + bufpush(0x01F5); + break; + case 0x01F6: + bufpush(0x0195); + break; + case 0x01F7: + bufpush(0x01BF); + break; + case 0x01F8: + bufpush(0x01F9); + break; + case 0x01FA: + bufpush(0x01FB); + break; + case 0x01FC: + bufpush(0x01FD); + break; + case 0x01FE: + bufpush(0x01FF); + break; + case 0x0200: + bufpush(0x0201); + break; + case 0x0202: + bufpush(0x0203); + break; + case 0x0204: + bufpush(0x0205); + break; + case 0x0206: + bufpush(0x0207); + break; + case 0x0208: + bufpush(0x0209); + break; + case 0x020A: + bufpush(0x020B); + break; + case 0x020C: + bufpush(0x020D); + break; + case 0x020E: + bufpush(0x020F); + break; + case 0x0210: + bufpush(0x0211); + break; + case 0x0212: + bufpush(0x0213); + break; + case 0x0214: + bufpush(0x0215); + break; + case 0x0216: + bufpush(0x0217); + break; + case 0x0218: + bufpush(0x0219); + break; + case 0x021A: + bufpush(0x021B); + break; + case 0x021C: + bufpush(0x021D); + break; + case 0x021E: + bufpush(0x021F); + break; + case 0x0220: + bufpush(0x019E); + break; + case 0x0222: + bufpush(0x0223); + break; + case 0x0224: + bufpush(0x0225); + break; + case 0x0226: + bufpush(0x0227); + break; + case 0x0228: + bufpush(0x0229); + break; + case 0x022A: + bufpush(0x022B); + break; + case 0x022C: + bufpush(0x022D); + break; + case 0x022E: + bufpush(0x022F); + break; + case 0x0230: + bufpush(0x0231); + break; + case 0x0232: + bufpush(0x0233); + break; + case 0x0345: + bufpush(0x03B9); + break; + case 0x0386: + bufpush(0x03AC); + break; + case 0x0388: + bufpush(0x03AD); + break; + case 0x0389: + bufpush(0x03AE); + break; + case 0x038A: + bufpush(0x03AF); + break; + case 0x038C: + bufpush(0x03CC); + break; + case 0x038E: + bufpush(0x03CD); + break; + case 0x038F: + bufpush(0x03CE); + break; + case 0x0390: + bufpush(0x03B9); + bufpush(0x0308); + bufpush(0x0301); + break; + case 0x0391: + bufpush(0x03B1); + break; + case 0x0392: + bufpush(0x03B2); + break; + case 0x0393: + bufpush(0x03B3); + break; + case 0x0394: + bufpush(0x03B4); + break; + case 0x0395: + bufpush(0x03B5); + break; + case 0x0396: + bufpush(0x03B6); + break; + case 0x0397: + bufpush(0x03B7); + break; + case 0x0398: + bufpush(0x03B8); + break; + case 0x0399: + bufpush(0x03B9); + break; + case 0x039A: + bufpush(0x03BA); + break; + case 0x039B: + bufpush(0x03BB); + break; + case 0x039C: + bufpush(0x03BC); + break; + case 0x039D: + bufpush(0x03BD); + break; + case 0x039E: + bufpush(0x03BE); + break; + case 0x039F: + bufpush(0x03BF); + break; + case 0x03A0: + bufpush(0x03C0); + break; + case 0x03A1: + bufpush(0x03C1); + break; + case 0x03A3: + bufpush(0x03C3); + break; + case 0x03A4: + bufpush(0x03C4); + break; + case 0x03A5: + bufpush(0x03C5); + break; + case 0x03A6: + bufpush(0x03C6); + break; + case 0x03A7: + bufpush(0x03C7); + break; + case 0x03A8: + bufpush(0x03C8); + break; + case 0x03A9: + bufpush(0x03C9); + break; + case 0x03AA: + bufpush(0x03CA); + break; + case 0x03AB: + bufpush(0x03CB); + break; + case 0x03B0: + bufpush(0x03C5); + bufpush(0x0308); + bufpush(0x0301); + break; + case 0x03C2: + bufpush(0x03C3); + break; + case 0x03D0: + bufpush(0x03B2); + break; + case 0x03D1: + bufpush(0x03B8); + break; + case 0x03D5: + bufpush(0x03C6); + break; + case 0x03D6: + bufpush(0x03C0); + break; + case 0x03D8: + bufpush(0x03D9); + break; + case 0x03DA: + bufpush(0x03DB); + break; + case 0x03DC: + bufpush(0x03DD); + break; + case 0x03DE: + bufpush(0x03DF); + break; + case 0x03E0: + bufpush(0x03E1); + break; + case 0x03E2: + bufpush(0x03E3); + break; + case 0x03E4: + bufpush(0x03E5); + break; + case 0x03E6: + bufpush(0x03E7); + break; + case 0x03E8: + bufpush(0x03E9); + break; + case 0x03EA: + bufpush(0x03EB); + break; + case 0x03EC: + bufpush(0x03ED); + break; + case 0x03EE: + bufpush(0x03EF); + break; + case 0x03F0: + bufpush(0x03BA); + break; + case 0x03F1: + bufpush(0x03C1); + break; + case 0x03F2: + bufpush(0x03C3); + break; + case 0x03F4: + bufpush(0x03B8); + break; + case 0x03F5: + bufpush(0x03B5); + break; + case 0x0400: + bufpush(0x0450); + break; + case 0x0401: + bufpush(0x0451); + break; + case 0x0402: + bufpush(0x0452); + break; + case 0x0403: + bufpush(0x0453); + break; + case 0x0404: + bufpush(0x0454); + break; + case 0x0405: + bufpush(0x0455); + break; + case 0x0406: + bufpush(0x0456); + break; + case 0x0407: + bufpush(0x0457); + break; + case 0x0408: + bufpush(0x0458); + break; + case 0x0409: + bufpush(0x0459); + break; + case 0x040A: + bufpush(0x045A); + break; + case 0x040B: + bufpush(0x045B); + break; + case 0x040C: + bufpush(0x045C); + break; + case 0x040D: + bufpush(0x045D); + break; + case 0x040E: + bufpush(0x045E); + break; + case 0x040F: + bufpush(0x045F); + break; + case 0x0410: + bufpush(0x0430); + break; + case 0x0411: + bufpush(0x0431); + break; + case 0x0412: + bufpush(0x0432); + break; + case 0x0413: + bufpush(0x0433); + break; + case 0x0414: + bufpush(0x0434); + break; + case 0x0415: + bufpush(0x0435); + break; + case 0x0416: + bufpush(0x0436); + break; + case 0x0417: + bufpush(0x0437); + break; + case 0x0418: + bufpush(0x0438); + break; + case 0x0419: + bufpush(0x0439); + break; + case 0x041A: + bufpush(0x043A); + break; + case 0x041B: + bufpush(0x043B); + break; + case 0x041C: + bufpush(0x043C); + break; + case 0x041D: + bufpush(0x043D); + break; + case 0x041E: + bufpush(0x043E); + break; + case 0x041F: + bufpush(0x043F); + break; + case 0x0420: + bufpush(0x0440); + break; + case 0x0421: + bufpush(0x0441); + break; + case 0x0422: + bufpush(0x0442); + break; + case 0x0423: + bufpush(0x0443); + break; + case 0x0424: + bufpush(0x0444); + break; + case 0x0425: + bufpush(0x0445); + break; + case 0x0426: + bufpush(0x0446); + break; + case 0x0427: + bufpush(0x0447); + break; + case 0x0428: + bufpush(0x0448); + break; + case 0x0429: + bufpush(0x0449); + break; + case 0x042A: + bufpush(0x044A); + break; + case 0x042B: + bufpush(0x044B); + break; + case 0x042C: + bufpush(0x044C); + break; + case 0x042D: + bufpush(0x044D); + break; + case 0x042E: + bufpush(0x044E); + break; + case 0x042F: + bufpush(0x044F); + break; + case 0x0460: + bufpush(0x0461); + break; + case 0x0462: + bufpush(0x0463); + break; + case 0x0464: + bufpush(0x0465); + break; + case 0x0466: + bufpush(0x0467); + break; + case 0x0468: + bufpush(0x0469); + break; + case 0x046A: + bufpush(0x046B); + break; + case 0x046C: + bufpush(0x046D); + break; + case 0x046E: + bufpush(0x046F); + break; + case 0x0470: + bufpush(0x0471); + break; + case 0x0472: + bufpush(0x0473); + break; + case 0x0474: + bufpush(0x0475); + break; + case 0x0476: + bufpush(0x0477); + break; + case 0x0478: + bufpush(0x0479); + break; + case 0x047A: + bufpush(0x047B); + break; + case 0x047C: + bufpush(0x047D); + break; + case 0x047E: + bufpush(0x047F); + break; + case 0x0480: + bufpush(0x0481); + break; + case 0x048A: + bufpush(0x048B); + break; + case 0x048C: + bufpush(0x048D); + break; + case 0x048E: + bufpush(0x048F); + break; + case 0x0490: + bufpush(0x0491); + break; + case 0x0492: + bufpush(0x0493); + break; + case 0x0494: + bufpush(0x0495); + break; + case 0x0496: + bufpush(0x0497); + break; + case 0x0498: + bufpush(0x0499); + break; + case 0x049A: + bufpush(0x049B); + break; + case 0x049C: + bufpush(0x049D); + break; + case 0x049E: + bufpush(0x049F); + break; + case 0x04A0: + bufpush(0x04A1); + break; + case 0x04A2: + bufpush(0x04A3); + break; + case 0x04A4: + bufpush(0x04A5); + break; + case 0x04A6: + bufpush(0x04A7); + break; + case 0x04A8: + bufpush(0x04A9); + break; + case 0x04AA: + bufpush(0x04AB); + break; + case 0x04AC: + bufpush(0x04AD); + break; + case 0x04AE: + bufpush(0x04AF); + break; + case 0x04B0: + bufpush(0x04B1); + break; + case 0x04B2: + bufpush(0x04B3); + break; + case 0x04B4: + bufpush(0x04B5); + break; + case 0x04B6: + bufpush(0x04B7); + break; + case 0x04B8: + bufpush(0x04B9); + break; + case 0x04BA: + bufpush(0x04BB); + break; + case 0x04BC: + bufpush(0x04BD); + break; + case 0x04BE: + bufpush(0x04BF); + break; + case 0x04C1: + bufpush(0x04C2); + break; + case 0x04C3: + bufpush(0x04C4); + break; + case 0x04C5: + bufpush(0x04C6); + break; + case 0x04C7: + bufpush(0x04C8); + break; + case 0x04C9: + bufpush(0x04CA); + break; + case 0x04CB: + bufpush(0x04CC); + break; + case 0x04CD: + bufpush(0x04CE); + break; + case 0x04D0: + bufpush(0x04D1); + break; + case 0x04D2: + bufpush(0x04D3); + break; + case 0x04D4: + bufpush(0x04D5); + break; + case 0x04D6: + bufpush(0x04D7); + break; + case 0x04D8: + bufpush(0x04D9); + break; + case 0x04DA: + bufpush(0x04DB); + break; + case 0x04DC: + bufpush(0x04DD); + break; + case 0x04DE: + bufpush(0x04DF); + break; + case 0x04E0: + bufpush(0x04E1); + break; + case 0x04E2: + bufpush(0x04E3); + break; + case 0x04E4: + bufpush(0x04E5); + break; + case 0x04E6: + bufpush(0x04E7); + break; + case 0x04E8: + bufpush(0x04E9); + break; + case 0x04EA: + bufpush(0x04EB); + break; + case 0x04EC: + bufpush(0x04ED); + break; + case 0x04EE: + bufpush(0x04EF); + break; + case 0x04F0: + bufpush(0x04F1); + break; + case 0x04F2: + bufpush(0x04F3); + break; + case 0x04F4: + bufpush(0x04F5); + break; + case 0x04F8: + bufpush(0x04F9); + break; + case 0x0500: + bufpush(0x0501); + break; + case 0x0502: + bufpush(0x0503); + break; + case 0x0504: + bufpush(0x0505); + break; + case 0x0506: + bufpush(0x0507); + break; + case 0x0508: + bufpush(0x0509); + break; + case 0x050A: + bufpush(0x050B); + break; + case 0x050C: + bufpush(0x050D); + break; + case 0x050E: + bufpush(0x050F); + break; + case 0x0531: + bufpush(0x0561); + break; + case 0x0532: + bufpush(0x0562); + break; + case 0x0533: + bufpush(0x0563); + break; + case 0x0534: + bufpush(0x0564); + break; + case 0x0535: + bufpush(0x0565); + break; + case 0x0536: + bufpush(0x0566); + break; + case 0x0537: + bufpush(0x0567); + break; + case 0x0538: + bufpush(0x0568); + break; + case 0x0539: + bufpush(0x0569); + break; + case 0x053A: + bufpush(0x056A); + break; + case 0x053B: + bufpush(0x056B); + break; + case 0x053C: + bufpush(0x056C); + break; + case 0x053D: + bufpush(0x056D); + break; + case 0x053E: + bufpush(0x056E); + break; + case 0x053F: + bufpush(0x056F); + break; + case 0x0540: + bufpush(0x0570); + break; + case 0x0541: + bufpush(0x0571); + break; + case 0x0542: + bufpush(0x0572); + break; + case 0x0543: + bufpush(0x0573); + break; + case 0x0544: + bufpush(0x0574); + break; + case 0x0545: + bufpush(0x0575); + break; + case 0x0546: + bufpush(0x0576); + break; + case 0x0547: + bufpush(0x0577); + break; + case 0x0548: + bufpush(0x0578); + break; + case 0x0549: + bufpush(0x0579); + break; + case 0x054A: + bufpush(0x057A); + break; + case 0x054B: + bufpush(0x057B); + break; + case 0x054C: + bufpush(0x057C); + break; + case 0x054D: + bufpush(0x057D); + break; + case 0x054E: + bufpush(0x057E); + break; + case 0x054F: + bufpush(0x057F); + break; + case 0x0550: + bufpush(0x0580); + break; + case 0x0551: + bufpush(0x0581); + break; + case 0x0552: + bufpush(0x0582); + break; + case 0x0553: + bufpush(0x0583); + break; + case 0x0554: + bufpush(0x0584); + break; + case 0x0555: + bufpush(0x0585); + break; + case 0x0556: + bufpush(0x0586); + break; + case 0x0587: + bufpush(0x0565); + bufpush(0x0582); + break; + case 0x1E00: + bufpush(0x1E01); + break; + case 0x1E02: + bufpush(0x1E03); + break; + case 0x1E04: + bufpush(0x1E05); + break; + case 0x1E06: + bufpush(0x1E07); + break; + case 0x1E08: + bufpush(0x1E09); + break; + case 0x1E0A: + bufpush(0x1E0B); + break; + case 0x1E0C: + bufpush(0x1E0D); + break; + case 0x1E0E: + bufpush(0x1E0F); + break; + case 0x1E10: + bufpush(0x1E11); + break; + case 0x1E12: + bufpush(0x1E13); + break; + case 0x1E14: + bufpush(0x1E15); + break; + case 0x1E16: + bufpush(0x1E17); + break; + case 0x1E18: + bufpush(0x1E19); + break; + case 0x1E1A: + bufpush(0x1E1B); + break; + case 0x1E1C: + bufpush(0x1E1D); + break; + case 0x1E1E: + bufpush(0x1E1F); + break; + case 0x1E20: + bufpush(0x1E21); + break; + case 0x1E22: + bufpush(0x1E23); + break; + case 0x1E24: + bufpush(0x1E25); + break; + case 0x1E26: + bufpush(0x1E27); + break; + case 0x1E28: + bufpush(0x1E29); + break; + case 0x1E2A: + bufpush(0x1E2B); + break; + case 0x1E2C: + bufpush(0x1E2D); + break; + case 0x1E2E: + bufpush(0x1E2F); + break; + case 0x1E30: + bufpush(0x1E31); + break; + case 0x1E32: + bufpush(0x1E33); + break; + case 0x1E34: + bufpush(0x1E35); + break; + case 0x1E36: + bufpush(0x1E37); + break; + case 0x1E38: + bufpush(0x1E39); + break; + case 0x1E3A: + bufpush(0x1E3B); + break; + case 0x1E3C: + bufpush(0x1E3D); + break; + case 0x1E3E: + bufpush(0x1E3F); + break; + case 0x1E40: + bufpush(0x1E41); + break; + case 0x1E42: + bufpush(0x1E43); + break; + case 0x1E44: + bufpush(0x1E45); + break; + case 0x1E46: + bufpush(0x1E47); + break; + case 0x1E48: + bufpush(0x1E49); + break; + case 0x1E4A: + bufpush(0x1E4B); + break; + case 0x1E4C: + bufpush(0x1E4D); + break; + case 0x1E4E: + bufpush(0x1E4F); + break; + case 0x1E50: + bufpush(0x1E51); + break; + case 0x1E52: + bufpush(0x1E53); + break; + case 0x1E54: + bufpush(0x1E55); + break; + case 0x1E56: + bufpush(0x1E57); + break; + case 0x1E58: + bufpush(0x1E59); + break; + case 0x1E5A: + bufpush(0x1E5B); + break; + case 0x1E5C: + bufpush(0x1E5D); + break; + case 0x1E5E: + bufpush(0x1E5F); + break; + case 0x1E60: + bufpush(0x1E61); + break; + case 0x1E62: + bufpush(0x1E63); + break; + case 0x1E64: + bufpush(0x1E65); + break; + case 0x1E66: + bufpush(0x1E67); + break; + case 0x1E68: + bufpush(0x1E69); + break; + case 0x1E6A: + bufpush(0x1E6B); + break; + case 0x1E6C: + bufpush(0x1E6D); + break; + case 0x1E6E: + bufpush(0x1E6F); + break; + case 0x1E70: + bufpush(0x1E71); + break; + case 0x1E72: + bufpush(0x1E73); + break; + case 0x1E74: + bufpush(0x1E75); + break; + case 0x1E76: + bufpush(0x1E77); + break; + case 0x1E78: + bufpush(0x1E79); + break; + case 0x1E7A: + bufpush(0x1E7B); + break; + case 0x1E7C: + bufpush(0x1E7D); + break; + case 0x1E7E: + bufpush(0x1E7F); + break; + case 0x1E80: + bufpush(0x1E81); + break; + case 0x1E82: + bufpush(0x1E83); + break; + case 0x1E84: + bufpush(0x1E85); + break; + case 0x1E86: + bufpush(0x1E87); + break; + case 0x1E88: + bufpush(0x1E89); + break; + case 0x1E8A: + bufpush(0x1E8B); + break; + case 0x1E8C: + bufpush(0x1E8D); + break; + case 0x1E8E: + bufpush(0x1E8F); + break; + case 0x1E90: + bufpush(0x1E91); + break; + case 0x1E92: + bufpush(0x1E93); + break; + case 0x1E94: + bufpush(0x1E95); + break; + case 0x1E96: + bufpush(0x0068); + bufpush(0x0331); + break; + case 0x1E97: + bufpush(0x0074); + bufpush(0x0308); + break; + case 0x1E98: + bufpush(0x0077); + bufpush(0x030A); + break; + case 0x1E99: + bufpush(0x0079); + bufpush(0x030A); + break; + case 0x1E9A: + bufpush(0x0061); + bufpush(0x02BE); + break; + case 0x1E9B: + bufpush(0x1E61); + break; + case 0x1EA0: + bufpush(0x1EA1); + break; + case 0x1EA2: + bufpush(0x1EA3); + break; + case 0x1EA4: + bufpush(0x1EA5); + break; + case 0x1EA6: + bufpush(0x1EA7); + break; + case 0x1EA8: + bufpush(0x1EA9); + break; + case 0x1EAA: + bufpush(0x1EAB); + break; + case 0x1EAC: + bufpush(0x1EAD); + break; + case 0x1EAE: + bufpush(0x1EAF); + break; + case 0x1EB0: + bufpush(0x1EB1); + break; + case 0x1EB2: + bufpush(0x1EB3); + break; + case 0x1EB4: + bufpush(0x1EB5); + break; + case 0x1EB6: + bufpush(0x1EB7); + break; + case 0x1EB8: + bufpush(0x1EB9); + break; + case 0x1EBA: + bufpush(0x1EBB); + break; + case 0x1EBC: + bufpush(0x1EBD); + break; + case 0x1EBE: + bufpush(0x1EBF); + break; + case 0x1EC0: + bufpush(0x1EC1); + break; + case 0x1EC2: + bufpush(0x1EC3); + break; + case 0x1EC4: + bufpush(0x1EC5); + break; + case 0x1EC6: + bufpush(0x1EC7); + break; + case 0x1EC8: + bufpush(0x1EC9); + break; + case 0x1ECA: + bufpush(0x1ECB); + break; + case 0x1ECC: + bufpush(0x1ECD); + break; + case 0x1ECE: + bufpush(0x1ECF); + break; + case 0x1ED0: + bufpush(0x1ED1); + break; + case 0x1ED2: + bufpush(0x1ED3); + break; + case 0x1ED4: + bufpush(0x1ED5); + break; + case 0x1ED6: + bufpush(0x1ED7); + break; + case 0x1ED8: + bufpush(0x1ED9); + break; + case 0x1EDA: + bufpush(0x1EDB); + break; + case 0x1EDC: + bufpush(0x1EDD); + break; + case 0x1EDE: + bufpush(0x1EDF); + break; + case 0x1EE0: + bufpush(0x1EE1); + break; + case 0x1EE2: + bufpush(0x1EE3); + break; + case 0x1EE4: + bufpush(0x1EE5); + break; + case 0x1EE6: + bufpush(0x1EE7); + break; + case 0x1EE8: + bufpush(0x1EE9); + break; + case 0x1EEA: + bufpush(0x1EEB); + break; + case 0x1EEC: + bufpush(0x1EED); + break; + case 0x1EEE: + bufpush(0x1EEF); + break; + case 0x1EF0: + bufpush(0x1EF1); + break; + case 0x1EF2: + bufpush(0x1EF3); + break; + case 0x1EF4: + bufpush(0x1EF5); + break; + case 0x1EF6: + bufpush(0x1EF7); + break; + case 0x1EF8: + bufpush(0x1EF9); + break; + case 0x1F08: + bufpush(0x1F00); + break; + case 0x1F09: + bufpush(0x1F01); + break; + case 0x1F0A: + bufpush(0x1F02); + break; + case 0x1F0B: + bufpush(0x1F03); + break; + case 0x1F0C: + bufpush(0x1F04); + break; + case 0x1F0D: + bufpush(0x1F05); + break; + case 0x1F0E: + bufpush(0x1F06); + break; + case 0x1F0F: + bufpush(0x1F07); + break; + case 0x1F18: + bufpush(0x1F10); + break; + case 0x1F19: + bufpush(0x1F11); + break; + case 0x1F1A: + bufpush(0x1F12); + break; + case 0x1F1B: + bufpush(0x1F13); + break; + case 0x1F1C: + bufpush(0x1F14); + break; + case 0x1F1D: + bufpush(0x1F15); + break; + case 0x1F28: + bufpush(0x1F20); + break; + case 0x1F29: + bufpush(0x1F21); + break; + case 0x1F2A: + bufpush(0x1F22); + break; + case 0x1F2B: + bufpush(0x1F23); + break; + case 0x1F2C: + bufpush(0x1F24); + break; + case 0x1F2D: + bufpush(0x1F25); + break; + case 0x1F2E: + bufpush(0x1F26); + break; + case 0x1F2F: + bufpush(0x1F27); + break; + case 0x1F38: + bufpush(0x1F30); + break; + case 0x1F39: + bufpush(0x1F31); + break; + case 0x1F3A: + bufpush(0x1F32); + break; + case 0x1F3B: + bufpush(0x1F33); + break; + case 0x1F3C: + bufpush(0x1F34); + break; + case 0x1F3D: + bufpush(0x1F35); + break; + case 0x1F3E: + bufpush(0x1F36); + break; + case 0x1F3F: + bufpush(0x1F37); + break; + case 0x1F48: + bufpush(0x1F40); + break; + case 0x1F49: + bufpush(0x1F41); + break; + case 0x1F4A: + bufpush(0x1F42); + break; + case 0x1F4B: + bufpush(0x1F43); + break; + case 0x1F4C: + bufpush(0x1F44); + break; + case 0x1F4D: + bufpush(0x1F45); + break; + case 0x1F50: + bufpush(0x03C5); + bufpush(0x0313); + break; + case 0x1F52: + bufpush(0x03C5); + bufpush(0x0313); + bufpush(0x0300); + break; + case 0x1F54: + bufpush(0x03C5); + bufpush(0x0313); + bufpush(0x0301); + break; + case 0x1F56: + bufpush(0x03C5); + bufpush(0x0313); + bufpush(0x0342); + break; + case 0x1F59: + bufpush(0x1F51); + break; + case 0x1F5B: + bufpush(0x1F53); + break; + case 0x1F5D: + bufpush(0x1F55); + break; + case 0x1F5F: + bufpush(0x1F57); + break; + case 0x1F68: + bufpush(0x1F60); + break; + case 0x1F69: + bufpush(0x1F61); + break; + case 0x1F6A: + bufpush(0x1F62); + break; + case 0x1F6B: + bufpush(0x1F63); + break; + case 0x1F6C: + bufpush(0x1F64); + break; + case 0x1F6D: + bufpush(0x1F65); + break; + case 0x1F6E: + bufpush(0x1F66); + break; + case 0x1F6F: + bufpush(0x1F67); + break; + case 0x1F80: + bufpush(0x1F00); + bufpush(0x03B9); + break; + case 0x1F81: + bufpush(0x1F01); + bufpush(0x03B9); + break; + case 0x1F82: + bufpush(0x1F02); + bufpush(0x03B9); + break; + case 0x1F83: + bufpush(0x1F03); + bufpush(0x03B9); + break; + case 0x1F84: + bufpush(0x1F04); + bufpush(0x03B9); + break; + case 0x1F85: + bufpush(0x1F05); + bufpush(0x03B9); + break; + case 0x1F86: + bufpush(0x1F06); + bufpush(0x03B9); + break; + case 0x1F87: + bufpush(0x1F07); + bufpush(0x03B9); + break; + case 0x1F88: + bufpush(0x1F00); + bufpush(0x03B9); + break; + case 0x1F89: + bufpush(0x1F01); + bufpush(0x03B9); + break; + case 0x1F8A: + bufpush(0x1F02); + bufpush(0x03B9); + break; + case 0x1F8B: + bufpush(0x1F03); + bufpush(0x03B9); + break; + case 0x1F8C: + bufpush(0x1F04); + bufpush(0x03B9); + break; + case 0x1F8D: + bufpush(0x1F05); + bufpush(0x03B9); + break; + case 0x1F8E: + bufpush(0x1F06); + bufpush(0x03B9); + break; + case 0x1F8F: + bufpush(0x1F07); + bufpush(0x03B9); + break; + case 0x1F90: + bufpush(0x1F20); + bufpush(0x03B9); + break; + case 0x1F91: + bufpush(0x1F21); + bufpush(0x03B9); + break; + case 0x1F92: + bufpush(0x1F22); + bufpush(0x03B9); + break; + case 0x1F93: + bufpush(0x1F23); + bufpush(0x03B9); + break; + case 0x1F94: + bufpush(0x1F24); + bufpush(0x03B9); + break; + case 0x1F95: + bufpush(0x1F25); + bufpush(0x03B9); + break; + case 0x1F96: + bufpush(0x1F26); + bufpush(0x03B9); + break; + case 0x1F97: + bufpush(0x1F27); + bufpush(0x03B9); + break; + case 0x1F98: + bufpush(0x1F20); + bufpush(0x03B9); + break; + case 0x1F99: + bufpush(0x1F21); + bufpush(0x03B9); + break; + case 0x1F9A: + bufpush(0x1F22); + bufpush(0x03B9); + break; + case 0x1F9B: + bufpush(0x1F23); + bufpush(0x03B9); + break; + case 0x1F9C: + bufpush(0x1F24); + bufpush(0x03B9); + break; + case 0x1F9D: + bufpush(0x1F25); + bufpush(0x03B9); + break; + case 0x1F9E: + bufpush(0x1F26); + bufpush(0x03B9); + break; + case 0x1F9F: + bufpush(0x1F27); + bufpush(0x03B9); + break; + case 0x1FA0: + bufpush(0x1F60); + bufpush(0x03B9); + break; + case 0x1FA1: + bufpush(0x1F61); + bufpush(0x03B9); + break; + case 0x1FA2: + bufpush(0x1F62); + bufpush(0x03B9); + break; + case 0x1FA3: + bufpush(0x1F63); + bufpush(0x03B9); + break; + case 0x1FA4: + bufpush(0x1F64); + bufpush(0x03B9); + break; + case 0x1FA5: + bufpush(0x1F65); + bufpush(0x03B9); + break; + case 0x1FA6: + bufpush(0x1F66); + bufpush(0x03B9); + break; + case 0x1FA7: + bufpush(0x1F67); + bufpush(0x03B9); + break; + case 0x1FA8: + bufpush(0x1F60); + bufpush(0x03B9); + break; + case 0x1FA9: + bufpush(0x1F61); + bufpush(0x03B9); + break; + case 0x1FAA: + bufpush(0x1F62); + bufpush(0x03B9); + break; + case 0x1FAB: + bufpush(0x1F63); + bufpush(0x03B9); + break; + case 0x1FAC: + bufpush(0x1F64); + bufpush(0x03B9); + break; + case 0x1FAD: + bufpush(0x1F65); + bufpush(0x03B9); + break; + case 0x1FAE: + bufpush(0x1F66); + bufpush(0x03B9); + break; + case 0x1FAF: + bufpush(0x1F67); + bufpush(0x03B9); + break; + case 0x1FB2: + bufpush(0x1F70); + bufpush(0x03B9); + break; + case 0x1FB3: + bufpush(0x03B1); + bufpush(0x03B9); + break; + case 0x1FB4: + bufpush(0x03AC); + bufpush(0x03B9); + break; + case 0x1FB6: + bufpush(0x03B1); + bufpush(0x0342); + break; + case 0x1FB7: + bufpush(0x03B1); + bufpush(0x0342); + bufpush(0x03B9); + break; + case 0x1FB8: + bufpush(0x1FB0); + break; + case 0x1FB9: + bufpush(0x1FB1); + break; + case 0x1FBA: + bufpush(0x1F70); + break; + case 0x1FBB: + bufpush(0x1F71); + break; + case 0x1FBC: + bufpush(0x03B1); + bufpush(0x03B9); + break; + case 0x1FBE: + bufpush(0x03B9); + break; + case 0x1FC2: + bufpush(0x1F74); + bufpush(0x03B9); + break; + case 0x1FC3: + bufpush(0x03B7); + bufpush(0x03B9); + break; + case 0x1FC4: + bufpush(0x03AE); + bufpush(0x03B9); + break; + case 0x1FC6: + bufpush(0x03B7); + bufpush(0x0342); + break; + case 0x1FC7: + bufpush(0x03B7); + bufpush(0x0342); + bufpush(0x03B9); + break; + case 0x1FC8: + bufpush(0x1F72); + break; + case 0x1FC9: + bufpush(0x1F73); + break; + case 0x1FCA: + bufpush(0x1F74); + break; + case 0x1FCB: + bufpush(0x1F75); + break; + case 0x1FCC: + bufpush(0x03B7); + bufpush(0x03B9); + break; + case 0x1FD2: + bufpush(0x03B9); + bufpush(0x0308); + bufpush(0x0300); + break; + case 0x1FD3: + bufpush(0x03B9); + bufpush(0x0308); + bufpush(0x0301); + break; + case 0x1FD6: + bufpush(0x03B9); + bufpush(0x0342); + break; + case 0x1FD7: + bufpush(0x03B9); + bufpush(0x0308); + bufpush(0x0342); + break; + case 0x1FD8: + bufpush(0x1FD0); + break; + case 0x1FD9: + bufpush(0x1FD1); + break; + case 0x1FDA: + bufpush(0x1F76); + break; + case 0x1FDB: + bufpush(0x1F77); + break; + case 0x1FE2: + bufpush(0x03C5); + bufpush(0x0308); + bufpush(0x0300); + break; + case 0x1FE3: + bufpush(0x03C5); + bufpush(0x0308); + bufpush(0x0301); + break; + case 0x1FE4: + bufpush(0x03C1); + bufpush(0x0313); + break; + case 0x1FE6: + bufpush(0x03C5); + bufpush(0x0342); + break; + case 0x1FE7: + bufpush(0x03C5); + bufpush(0x0308); + bufpush(0x0342); + break; + case 0x1FE8: + bufpush(0x1FE0); + break; + case 0x1FE9: + bufpush(0x1FE1); + break; + case 0x1FEA: + bufpush(0x1F7A); + break; + case 0x1FEB: + bufpush(0x1F7B); + break; + case 0x1FEC: + bufpush(0x1FE5); + break; + case 0x1FF2: + bufpush(0x1F7C); + bufpush(0x03B9); + break; + case 0x1FF3: + bufpush(0x03C9); + bufpush(0x03B9); + break; + case 0x1FF4: + bufpush(0x03CE); + bufpush(0x03B9); + break; + case 0x1FF6: + bufpush(0x03C9); + bufpush(0x0342); + break; + case 0x1FF7: + bufpush(0x03C9); + bufpush(0x0342); + bufpush(0x03B9); + break; + case 0x1FF8: + bufpush(0x1F78); + break; + case 0x1FF9: + bufpush(0x1F79); + break; + case 0x1FFA: + bufpush(0x1F7C); + break; + case 0x1FFB: + bufpush(0x1F7D); + break; + case 0x1FFC: + bufpush(0x03C9); + bufpush(0x03B9); + break; + case 0x2126: + bufpush(0x03C9); + break; + case 0x212A: + bufpush(0x006B); + break; + case 0x212B: + bufpush(0x00E5); + break; + case 0x2160: + bufpush(0x2170); + break; + case 0x2161: + bufpush(0x2171); + break; + case 0x2162: + bufpush(0x2172); + break; + case 0x2163: + bufpush(0x2173); + break; + case 0x2164: + bufpush(0x2174); + break; + case 0x2165: + bufpush(0x2175); + break; + case 0x2166: + bufpush(0x2176); + break; + case 0x2167: + bufpush(0x2177); + break; + case 0x2168: + bufpush(0x2178); + break; + case 0x2169: + bufpush(0x2179); + break; + case 0x216A: + bufpush(0x217A); + break; + case 0x216B: + bufpush(0x217B); + break; + case 0x216C: + bufpush(0x217C); + break; + case 0x216D: + bufpush(0x217D); + break; + case 0x216E: + bufpush(0x217E); + break; + case 0x216F: + bufpush(0x217F); + break; + case 0x24B6: + bufpush(0x24D0); + break; + case 0x24B7: + bufpush(0x24D1); + break; + case 0x24B8: + bufpush(0x24D2); + break; + case 0x24B9: + bufpush(0x24D3); + break; + case 0x24BA: + bufpush(0x24D4); + break; + case 0x24BB: + bufpush(0x24D5); + break; + case 0x24BC: + bufpush(0x24D6); + break; + case 0x24BD: + bufpush(0x24D7); + break; + case 0x24BE: + bufpush(0x24D8); + break; + case 0x24BF: + bufpush(0x24D9); + break; + case 0x24C0: + bufpush(0x24DA); + break; + case 0x24C1: + bufpush(0x24DB); + break; + case 0x24C2: + bufpush(0x24DC); + break; + case 0x24C3: + bufpush(0x24DD); + break; + case 0x24C4: + bufpush(0x24DE); + break; + case 0x24C5: + bufpush(0x24DF); + break; + case 0x24C6: + bufpush(0x24E0); + break; + case 0x24C7: + bufpush(0x24E1); + break; + case 0x24C8: + bufpush(0x24E2); + break; + case 0x24C9: + bufpush(0x24E3); + break; + case 0x24CA: + bufpush(0x24E4); + break; + case 0x24CB: + bufpush(0x24E5); + break; + case 0x24CC: + bufpush(0x24E6); + break; + case 0x24CD: + bufpush(0x24E7); + break; + case 0x24CE: + bufpush(0x24E8); + break; + case 0x24CF: + bufpush(0x24E9); + break; + case 0xFB00: + bufpush(0x0066); + bufpush(0x0066); + break; + case 0xFB01: + bufpush(0x0066); + bufpush(0x0069); + break; + case 0xFB02: + bufpush(0x0066); + bufpush(0x006C); + break; + case 0xFB03: + bufpush(0x0066); + bufpush(0x0066); + bufpush(0x0069); + break; + case 0xFB04: + bufpush(0x0066); + bufpush(0x0066); + bufpush(0x006C); + break; + case 0xFB05: + bufpush(0x0073); + bufpush(0x0074); + break; + case 0xFB06: + bufpush(0x0073); + bufpush(0x0074); + break; + case 0xFB13: + bufpush(0x0574); + bufpush(0x0576); + break; + case 0xFB14: + bufpush(0x0574); + bufpush(0x0565); + break; + case 0xFB15: + bufpush(0x0574); + bufpush(0x056B); + break; + case 0xFB16: + bufpush(0x057E); + bufpush(0x0576); + break; + case 0xFB17: + bufpush(0x0574); + bufpush(0x056D); + break; + case 0xFF21: + bufpush(0xFF41); + break; + case 0xFF22: + bufpush(0xFF42); + break; + case 0xFF23: + bufpush(0xFF43); + break; + case 0xFF24: + bufpush(0xFF44); + break; + case 0xFF25: + bufpush(0xFF45); + break; + case 0xFF26: + bufpush(0xFF46); + break; + case 0xFF27: + bufpush(0xFF47); + break; + case 0xFF28: + bufpush(0xFF48); + break; + case 0xFF29: + bufpush(0xFF49); + break; + case 0xFF2A: + bufpush(0xFF4A); + break; + case 0xFF2B: + bufpush(0xFF4B); + break; + case 0xFF2C: + bufpush(0xFF4C); + break; + case 0xFF2D: + bufpush(0xFF4D); + break; + case 0xFF2E: + bufpush(0xFF4E); + break; + case 0xFF2F: + bufpush(0xFF4F); + break; + case 0xFF30: + bufpush(0xFF50); + break; + case 0xFF31: + bufpush(0xFF51); + break; + case 0xFF32: + bufpush(0xFF52); + break; + case 0xFF33: + bufpush(0xFF53); + break; + case 0xFF34: + bufpush(0xFF54); + break; + case 0xFF35: + bufpush(0xFF55); + break; + case 0xFF36: + bufpush(0xFF56); + break; + case 0xFF37: + bufpush(0xFF57); + break; + case 0xFF38: + bufpush(0xFF58); + break; + case 0xFF39: + bufpush(0xFF59); + break; + case 0xFF3A: + bufpush(0xFF5A); + break; + case 0x10400: + bufpush(0x10428); + break; + case 0x10401: + bufpush(0x10429); + break; + case 0x10402: + bufpush(0x1042A); + break; + case 0x10403: + bufpush(0x1042B); + break; + case 0x10404: + bufpush(0x1042C); + break; + case 0x10405: + bufpush(0x1042D); + break; + case 0x10406: + bufpush(0x1042E); + break; + case 0x10407: + bufpush(0x1042F); + break; + case 0x10408: + bufpush(0x10430); + break; + case 0x10409: + bufpush(0x10431); + break; + case 0x1040A: + bufpush(0x10432); + break; + case 0x1040B: + bufpush(0x10433); + break; + case 0x1040C: + bufpush(0x10434); + break; + case 0x1040D: + bufpush(0x10435); + break; + case 0x1040E: + bufpush(0x10436); + break; + case 0x1040F: + bufpush(0x10437); + break; + case 0x10410: + bufpush(0x10438); + break; + case 0x10411: + bufpush(0x10439); + break; + case 0x10412: + bufpush(0x1043A); + break; + case 0x10413: + bufpush(0x1043B); + break; + case 0x10414: + bufpush(0x1043C); + break; + case 0x10415: + bufpush(0x1043D); + break; + case 0x10416: + bufpush(0x1043E); + break; + case 0x10417: + bufpush(0x1043F); + break; + case 0x10418: + bufpush(0x10440); + break; + case 0x10419: + bufpush(0x10441); + break; + case 0x1041A: + bufpush(0x10442); + break; + case 0x1041B: + bufpush(0x10443); + break; + case 0x1041C: + bufpush(0x10444); + break; + case 0x1041D: + bufpush(0x10445); + break; + case 0x1041E: + bufpush(0x10446); + break; + case 0x1041F: + bufpush(0x10447); + break; + case 0x10420: + bufpush(0x10448); + break; + case 0x10421: + bufpush(0x10449); + break; + case 0x10422: + bufpush(0x1044A); + break; + case 0x10423: + bufpush(0x1044B); + break; + case 0x10424: + bufpush(0x1044C); + break; + case 0x10425: + bufpush(0x1044D); + break; + default: + bufpush(c); + } diff --git a/src/casefold.c b/src/casefold.c deleted file mode 100644 index 33f18aa..0000000 --- a/src/casefold.c +++ /dev/null @@ -1,2699 +0,0 @@ -#include -#include - - - switch c { - case 0x0041: - bufpush(0x0061); - break; - case 0x0042: - bufpush(0x0062); - break; - case 0x0043: - bufpush(0x0063); - break; - case 0x0044: - bufpush(0x0064); - break; - case 0x0045: - bufpush(0x0065); - break; - case 0x0046: - bufpush(0x0066); - break; - case 0x0047: - bufpush(0x0067); - break; - case 0x0048: - bufpush(0x0068); - break; - case 0x0049: - bufpush(0x0069); - break; - case 0x0049: - bufpush(0x0131); - break; - case 0x004A: - bufpush(0x006A); - break; - case 0x004B: - bufpush(0x006B); - break; - case 0x004C: - bufpush(0x006C); - break; - case 0x004D: - bufpush(0x006D); - break; - case 0x004E: - bufpush(0x006E); - break; - case 0x004F: - bufpush(0x006F); - break; - case 0x0050: - bufpush(0x0070); - break; - case 0x0051: - bufpush(0x0071); - break; - case 0x0052: - bufpush(0x0072); - break; - case 0x0053: - bufpush(0x0073); - break; - case 0x0054: - bufpush(0x0074); - break; - case 0x0055: - bufpush(0x0075); - break; - case 0x0056: - bufpush(0x0076); - break; - case 0x0057: - bufpush(0x0077); - break; - case 0x0058: - bufpush(0x0078); - break; - case 0x0059: - bufpush(0x0079); - break; - case 0x005A: - bufpush(0x007A); - break; - case 0x00B5: - bufpush(0x03BC); - break; - case 0x00C0: - bufpush(0x00E0); - break; - case 0x00C1: - bufpush(0x00E1); - break; - case 0x00C2: - bufpush(0x00E2); - break; - case 0x00C3: - bufpush(0x00E3); - break; - case 0x00C4: - bufpush(0x00E4); - break; - case 0x00C5: - bufpush(0x00E5); - break; - case 0x00C6: - bufpush(0x00E6); - break; - case 0x00C7: - bufpush(0x00E7); - break; - case 0x00C8: - bufpush(0x00E8); - break; - case 0x00C9: - bufpush(0x00E9); - break; - case 0x00CA: - bufpush(0x00EA); - break; - case 0x00CB: - bufpush(0x00EB); - break; - case 0x00CC: - bufpush(0x00EC); - break; - case 0x00CD: - bufpush(0x00ED); - break; - case 0x00CE: - bufpush(0x00EE); - break; - case 0x00CF: - bufpush(0x00EF); - break; - case 0x00D0: - bufpush(0x00F0); - break; - case 0x00D1: - bufpush(0x00F1); - break; - case 0x00D2: - bufpush(0x00F2); - break; - case 0x00D3: - bufpush(0x00F3); - break; - case 0x00D4: - bufpush(0x00F4); - break; - case 0x00D5: - bufpush(0x00F5); - break; - case 0x00D6: - bufpush(0x00F6); - break; - case 0x00D8: - bufpush(0x00F8); - break; - case 0x00D9: - bufpush(0x00F9); - break; - case 0x00DA: - bufpush(0x00FA); - break; - case 0x00DB: - bufpush(0x00FB); - break; - case 0x00DC: - bufpush(0x00FC); - break; - case 0x00DD: - bufpush(0x00FD); - break; - case 0x00DE: - bufpush(0x00FE); - break; - case 0x00DF: - bufpush(0x0073); - bufpush(0x0073); - break; - case 0x0100: - bufpush(0x0101); - break; - case 0x0102: - bufpush(0x0103); - break; - case 0x0104: - bufpush(0x0105); - break; - case 0x0106: - bufpush(0x0107); - break; - case 0x0108: - bufpush(0x0109); - break; - case 0x010A: - bufpush(0x010B); - break; - case 0x010C: - bufpush(0x010D); - break; - case 0x010E: - bufpush(0x010F); - break; - case 0x0110: - bufpush(0x0111); - break; - case 0x0112: - bufpush(0x0113); - break; - case 0x0114: - bufpush(0x0115); - break; - case 0x0116: - bufpush(0x0117); - break; - case 0x0118: - bufpush(0x0119); - break; - case 0x011A: - bufpush(0x011B); - break; - case 0x011C: - bufpush(0x011D); - break; - case 0x011E: - bufpush(0x011F); - break; - case 0x0120: - bufpush(0x0121); - break; - case 0x0122: - bufpush(0x0123); - break; - case 0x0124: - bufpush(0x0125); - break; - case 0x0126: - bufpush(0x0127); - break; - case 0x0128: - bufpush(0x0129); - break; - case 0x012A: - bufpush(0x012B); - break; - case 0x012C: - bufpush(0x012D); - break; - case 0x012E: - bufpush(0x012F); - break; - case 0x0130: - bufpush(0x0069); - bufpush(0x0307); - break; - case 0x0130: - bufpush(0x0069); - break; - case 0x0132: - bufpush(0x0133); - break; - case 0x0134: - bufpush(0x0135); - break; - case 0x0136: - bufpush(0x0137); - break; - case 0x0139: - bufpush(0x013A); - break; - case 0x013B: - bufpush(0x013C); - break; - case 0x013D: - bufpush(0x013E); - break; - case 0x013F: - bufpush(0x0140); - break; - case 0x0141: - bufpush(0x0142); - break; - case 0x0143: - bufpush(0x0144); - break; - case 0x0145: - bufpush(0x0146); - break; - case 0x0147: - bufpush(0x0148); - break; - case 0x0149: - bufpush(0x02BC); - bufpush(0x006E); - break; - case 0x014A: - bufpush(0x014B); - break; - case 0x014C: - bufpush(0x014D); - break; - case 0x014E: - bufpush(0x014F); - break; - case 0x0150: - bufpush(0x0151); - break; - case 0x0152: - bufpush(0x0153); - break; - case 0x0154: - bufpush(0x0155); - break; - case 0x0156: - bufpush(0x0157); - break; - case 0x0158: - bufpush(0x0159); - break; - case 0x015A: - bufpush(0x015B); - break; - case 0x015C: - bufpush(0x015D); - break; - case 0x015E: - bufpush(0x015F); - break; - case 0x0160: - bufpush(0x0161); - break; - case 0x0162: - bufpush(0x0163); - break; - case 0x0164: - bufpush(0x0165); - break; - case 0x0166: - bufpush(0x0167); - break; - case 0x0168: - bufpush(0x0169); - break; - case 0x016A: - bufpush(0x016B); - break; - case 0x016C: - bufpush(0x016D); - break; - case 0x016E: - bufpush(0x016F); - break; - case 0x0170: - bufpush(0x0171); - break; - case 0x0172: - bufpush(0x0173); - break; - case 0x0174: - bufpush(0x0175); - break; - case 0x0176: - bufpush(0x0177); - break; - case 0x0178: - bufpush(0x00FF); - break; - case 0x0179: - bufpush(0x017A); - break; - case 0x017B: - bufpush(0x017C); - break; - case 0x017D: - bufpush(0x017E); - break; - case 0x017F: - bufpush(0x0073); - break; - case 0x0181: - bufpush(0x0253); - break; - case 0x0182: - bufpush(0x0183); - break; - case 0x0184: - bufpush(0x0185); - break; - case 0x0186: - bufpush(0x0254); - break; - case 0x0187: - bufpush(0x0188); - break; - case 0x0189: - bufpush(0x0256); - break; - case 0x018A: - bufpush(0x0257); - break; - case 0x018B: - bufpush(0x018C); - break; - case 0x018E: - bufpush(0x01DD); - break; - case 0x018F: - bufpush(0x0259); - break; - case 0x0190: - bufpush(0x025B); - break; - case 0x0191: - bufpush(0x0192); - break; - case 0x0193: - bufpush(0x0260); - break; - case 0x0194: - bufpush(0x0263); - break; - case 0x0196: - bufpush(0x0269); - break; - case 0x0197: - bufpush(0x0268); - break; - case 0x0198: - bufpush(0x0199); - break; - case 0x019C: - bufpush(0x026F); - break; - case 0x019D: - bufpush(0x0272); - break; - case 0x019F: - bufpush(0x0275); - break; - case 0x01A0: - bufpush(0x01A1); - break; - case 0x01A2: - bufpush(0x01A3); - break; - case 0x01A4: - bufpush(0x01A5); - break; - case 0x01A6: - bufpush(0x0280); - break; - case 0x01A7: - bufpush(0x01A8); - break; - case 0x01A9: - bufpush(0x0283); - break; - case 0x01AC: - bufpush(0x01AD); - break; - case 0x01AE: - bufpush(0x0288); - break; - case 0x01AF: - bufpush(0x01B0); - break; - case 0x01B1: - bufpush(0x028A); - break; - case 0x01B2: - bufpush(0x028B); - break; - case 0x01B3: - bufpush(0x01B4); - break; - case 0x01B5: - bufpush(0x01B6); - break; - case 0x01B7: - bufpush(0x0292); - break; - case 0x01B8: - bufpush(0x01B9); - break; - case 0x01BC: - bufpush(0x01BD); - break; - case 0x01C4: - bufpush(0x01C6); - break; - case 0x01C5: - bufpush(0x01C6); - break; - case 0x01C7: - bufpush(0x01C9); - break; - case 0x01C8: - bufpush(0x01C9); - break; - case 0x01CA: - bufpush(0x01CC); - break; - case 0x01CB: - bufpush(0x01CC); - break; - case 0x01CD: - bufpush(0x01CE); - break; - case 0x01CF: - bufpush(0x01D0); - break; - case 0x01D1: - bufpush(0x01D2); - break; - case 0x01D3: - bufpush(0x01D4); - break; - case 0x01D5: - bufpush(0x01D6); - break; - case 0x01D7: - bufpush(0x01D8); - break; - case 0x01D9: - bufpush(0x01DA); - break; - case 0x01DB: - bufpush(0x01DC); - break; - case 0x01DE: - bufpush(0x01DF); - break; - case 0x01E0: - bufpush(0x01E1); - break; - case 0x01E2: - bufpush(0x01E3); - break; - case 0x01E4: - bufpush(0x01E5); - break; - case 0x01E6: - bufpush(0x01E7); - break; - case 0x01E8: - bufpush(0x01E9); - break; - case 0x01EA: - bufpush(0x01EB); - break; - case 0x01EC: - bufpush(0x01ED); - break; - case 0x01EE: - bufpush(0x01EF); - break; - case 0x01F0: - bufpush(0x006A); - bufpush(0x030C); - break; - case 0x01F1: - bufpush(0x01F3); - break; - case 0x01F2: - bufpush(0x01F3); - break; - case 0x01F4: - bufpush(0x01F5); - break; - case 0x01F6: - bufpush(0x0195); - break; - case 0x01F7: - bufpush(0x01BF); - break; - case 0x01F8: - bufpush(0x01F9); - break; - case 0x01FA: - bufpush(0x01FB); - break; - case 0x01FC: - bufpush(0x01FD); - break; - case 0x01FE: - bufpush(0x01FF); - break; - case 0x0200: - bufpush(0x0201); - break; - case 0x0202: - bufpush(0x0203); - break; - case 0x0204: - bufpush(0x0205); - break; - case 0x0206: - bufpush(0x0207); - break; - case 0x0208: - bufpush(0x0209); - break; - case 0x020A: - bufpush(0x020B); - break; - case 0x020C: - bufpush(0x020D); - break; - case 0x020E: - bufpush(0x020F); - break; - case 0x0210: - bufpush(0x0211); - break; - case 0x0212: - bufpush(0x0213); - break; - case 0x0214: - bufpush(0x0215); - break; - case 0x0216: - bufpush(0x0217); - break; - case 0x0218: - bufpush(0x0219); - break; - case 0x021A: - bufpush(0x021B); - break; - case 0x021C: - bufpush(0x021D); - break; - case 0x021E: - bufpush(0x021F); - break; - case 0x0220: - bufpush(0x019E); - break; - case 0x0222: - bufpush(0x0223); - break; - case 0x0224: - bufpush(0x0225); - break; - case 0x0226: - bufpush(0x0227); - break; - case 0x0228: - bufpush(0x0229); - break; - case 0x022A: - bufpush(0x022B); - break; - case 0x022C: - bufpush(0x022D); - break; - case 0x022E: - bufpush(0x022F); - break; - case 0x0230: - bufpush(0x0231); - break; - case 0x0232: - bufpush(0x0233); - break; - case 0x0345: - bufpush(0x03B9); - break; - case 0x0386: - bufpush(0x03AC); - break; - case 0x0388: - bufpush(0x03AD); - break; - case 0x0389: - bufpush(0x03AE); - break; - case 0x038A: - bufpush(0x03AF); - break; - case 0x038C: - bufpush(0x03CC); - break; - case 0x038E: - bufpush(0x03CD); - break; - case 0x038F: - bufpush(0x03CE); - break; - case 0x0390: - bufpush(0x03B9); - bufpush(0x0308); - bufpush(0x0301); - break; - case 0x0391: - bufpush(0x03B1); - break; - case 0x0392: - bufpush(0x03B2); - break; - case 0x0393: - bufpush(0x03B3); - break; - case 0x0394: - bufpush(0x03B4); - break; - case 0x0395: - bufpush(0x03B5); - break; - case 0x0396: - bufpush(0x03B6); - break; - case 0x0397: - bufpush(0x03B7); - break; - case 0x0398: - bufpush(0x03B8); - break; - case 0x0399: - bufpush(0x03B9); - break; - case 0x039A: - bufpush(0x03BA); - break; - case 0x039B: - bufpush(0x03BB); - break; - case 0x039C: - bufpush(0x03BC); - break; - case 0x039D: - bufpush(0x03BD); - break; - case 0x039E: - bufpush(0x03BE); - break; - case 0x039F: - bufpush(0x03BF); - break; - case 0x03A0: - bufpush(0x03C0); - break; - case 0x03A1: - bufpush(0x03C1); - break; - case 0x03A3: - bufpush(0x03C3); - break; - case 0x03A4: - bufpush(0x03C4); - break; - case 0x03A5: - bufpush(0x03C5); - break; - case 0x03A6: - bufpush(0x03C6); - break; - case 0x03A7: - bufpush(0x03C7); - break; - case 0x03A8: - bufpush(0x03C8); - break; - case 0x03A9: - bufpush(0x03C9); - break; - case 0x03AA: - bufpush(0x03CA); - break; - case 0x03AB: - bufpush(0x03CB); - break; - case 0x03B0: - bufpush(0x03C5); - bufpush(0x0308); - bufpush(0x0301); - break; - case 0x03C2: - bufpush(0x03C3); - break; - case 0x03D0: - bufpush(0x03B2); - break; - case 0x03D1: - bufpush(0x03B8); - break; - case 0x03D5: - bufpush(0x03C6); - break; - case 0x03D6: - bufpush(0x03C0); - break; - case 0x03D8: - bufpush(0x03D9); - break; - case 0x03DA: - bufpush(0x03DB); - break; - case 0x03DC: - bufpush(0x03DD); - break; - case 0x03DE: - bufpush(0x03DF); - break; - case 0x03E0: - bufpush(0x03E1); - break; - case 0x03E2: - bufpush(0x03E3); - break; - case 0x03E4: - bufpush(0x03E5); - break; - case 0x03E6: - bufpush(0x03E7); - break; - case 0x03E8: - bufpush(0x03E9); - break; - case 0x03EA: - bufpush(0x03EB); - break; - case 0x03EC: - bufpush(0x03ED); - break; - case 0x03EE: - bufpush(0x03EF); - break; - case 0x03F0: - bufpush(0x03BA); - break; - case 0x03F1: - bufpush(0x03C1); - break; - case 0x03F2: - bufpush(0x03C3); - break; - case 0x03F4: - bufpush(0x03B8); - break; - case 0x03F5: - bufpush(0x03B5); - break; - case 0x0400: - bufpush(0x0450); - break; - case 0x0401: - bufpush(0x0451); - break; - case 0x0402: - bufpush(0x0452); - break; - case 0x0403: - bufpush(0x0453); - break; - case 0x0404: - bufpush(0x0454); - break; - case 0x0405: - bufpush(0x0455); - break; - case 0x0406: - bufpush(0x0456); - break; - case 0x0407: - bufpush(0x0457); - break; - case 0x0408: - bufpush(0x0458); - break; - case 0x0409: - bufpush(0x0459); - break; - case 0x040A: - bufpush(0x045A); - break; - case 0x040B: - bufpush(0x045B); - break; - case 0x040C: - bufpush(0x045C); - break; - case 0x040D: - bufpush(0x045D); - break; - case 0x040E: - bufpush(0x045E); - break; - case 0x040F: - bufpush(0x045F); - break; - case 0x0410: - bufpush(0x0430); - break; - case 0x0411: - bufpush(0x0431); - break; - case 0x0412: - bufpush(0x0432); - break; - case 0x0413: - bufpush(0x0433); - break; - case 0x0414: - bufpush(0x0434); - break; - case 0x0415: - bufpush(0x0435); - break; - case 0x0416: - bufpush(0x0436); - break; - case 0x0417: - bufpush(0x0437); - break; - case 0x0418: - bufpush(0x0438); - break; - case 0x0419: - bufpush(0x0439); - break; - case 0x041A: - bufpush(0x043A); - break; - case 0x041B: - bufpush(0x043B); - break; - case 0x041C: - bufpush(0x043C); - break; - case 0x041D: - bufpush(0x043D); - break; - case 0x041E: - bufpush(0x043E); - break; - case 0x041F: - bufpush(0x043F); - break; - case 0x0420: - bufpush(0x0440); - break; - case 0x0421: - bufpush(0x0441); - break; - case 0x0422: - bufpush(0x0442); - break; - case 0x0423: - bufpush(0x0443); - break; - case 0x0424: - bufpush(0x0444); - break; - case 0x0425: - bufpush(0x0445); - break; - case 0x0426: - bufpush(0x0446); - break; - case 0x0427: - bufpush(0x0447); - break; - case 0x0428: - bufpush(0x0448); - break; - case 0x0429: - bufpush(0x0449); - break; - case 0x042A: - bufpush(0x044A); - break; - case 0x042B: - bufpush(0x044B); - break; - case 0x042C: - bufpush(0x044C); - break; - case 0x042D: - bufpush(0x044D); - break; - case 0x042E: - bufpush(0x044E); - break; - case 0x042F: - bufpush(0x044F); - break; - case 0x0460: - bufpush(0x0461); - break; - case 0x0462: - bufpush(0x0463); - break; - case 0x0464: - bufpush(0x0465); - break; - case 0x0466: - bufpush(0x0467); - break; - case 0x0468: - bufpush(0x0469); - break; - case 0x046A: - bufpush(0x046B); - break; - case 0x046C: - bufpush(0x046D); - break; - case 0x046E: - bufpush(0x046F); - break; - case 0x0470: - bufpush(0x0471); - break; - case 0x0472: - bufpush(0x0473); - break; - case 0x0474: - bufpush(0x0475); - break; - case 0x0476: - bufpush(0x0477); - break; - case 0x0478: - bufpush(0x0479); - break; - case 0x047A: - bufpush(0x047B); - break; - case 0x047C: - bufpush(0x047D); - break; - case 0x047E: - bufpush(0x047F); - break; - case 0x0480: - bufpush(0x0481); - break; - case 0x048A: - bufpush(0x048B); - break; - case 0x048C: - bufpush(0x048D); - break; - case 0x048E: - bufpush(0x048F); - break; - case 0x0490: - bufpush(0x0491); - break; - case 0x0492: - bufpush(0x0493); - break; - case 0x0494: - bufpush(0x0495); - break; - case 0x0496: - bufpush(0x0497); - break; - case 0x0498: - bufpush(0x0499); - break; - case 0x049A: - bufpush(0x049B); - break; - case 0x049C: - bufpush(0x049D); - break; - case 0x049E: - bufpush(0x049F); - break; - case 0x04A0: - bufpush(0x04A1); - break; - case 0x04A2: - bufpush(0x04A3); - break; - case 0x04A4: - bufpush(0x04A5); - break; - case 0x04A6: - bufpush(0x04A7); - break; - case 0x04A8: - bufpush(0x04A9); - break; - case 0x04AA: - bufpush(0x04AB); - break; - case 0x04AC: - bufpush(0x04AD); - break; - case 0x04AE: - bufpush(0x04AF); - break; - case 0x04B0: - bufpush(0x04B1); - break; - case 0x04B2: - bufpush(0x04B3); - break; - case 0x04B4: - bufpush(0x04B5); - break; - case 0x04B6: - bufpush(0x04B7); - break; - case 0x04B8: - bufpush(0x04B9); - break; - case 0x04BA: - bufpush(0x04BB); - break; - case 0x04BC: - bufpush(0x04BD); - break; - case 0x04BE: - bufpush(0x04BF); - break; - case 0x04C1: - bufpush(0x04C2); - break; - case 0x04C3: - bufpush(0x04C4); - break; - case 0x04C5: - bufpush(0x04C6); - break; - case 0x04C7: - bufpush(0x04C8); - break; - case 0x04C9: - bufpush(0x04CA); - break; - case 0x04CB: - bufpush(0x04CC); - break; - case 0x04CD: - bufpush(0x04CE); - break; - case 0x04D0: - bufpush(0x04D1); - break; - case 0x04D2: - bufpush(0x04D3); - break; - case 0x04D4: - bufpush(0x04D5); - break; - case 0x04D6: - bufpush(0x04D7); - break; - case 0x04D8: - bufpush(0x04D9); - break; - case 0x04DA: - bufpush(0x04DB); - break; - case 0x04DC: - bufpush(0x04DD); - break; - case 0x04DE: - bufpush(0x04DF); - break; - case 0x04E0: - bufpush(0x04E1); - break; - case 0x04E2: - bufpush(0x04E3); - break; - case 0x04E4: - bufpush(0x04E5); - break; - case 0x04E6: - bufpush(0x04E7); - break; - case 0x04E8: - bufpush(0x04E9); - break; - case 0x04EA: - bufpush(0x04EB); - break; - case 0x04EC: - bufpush(0x04ED); - break; - case 0x04EE: - bufpush(0x04EF); - break; - case 0x04F0: - bufpush(0x04F1); - break; - case 0x04F2: - bufpush(0x04F3); - break; - case 0x04F4: - bufpush(0x04F5); - break; - case 0x04F8: - bufpush(0x04F9); - break; - case 0x0500: - bufpush(0x0501); - break; - case 0x0502: - bufpush(0x0503); - break; - case 0x0504: - bufpush(0x0505); - break; - case 0x0506: - bufpush(0x0507); - break; - case 0x0508: - bufpush(0x0509); - break; - case 0x050A: - bufpush(0x050B); - break; - case 0x050C: - bufpush(0x050D); - break; - case 0x050E: - bufpush(0x050F); - break; - case 0x0531: - bufpush(0x0561); - break; - case 0x0532: - bufpush(0x0562); - break; - case 0x0533: - bufpush(0x0563); - break; - case 0x0534: - bufpush(0x0564); - break; - case 0x0535: - bufpush(0x0565); - break; - case 0x0536: - bufpush(0x0566); - break; - case 0x0537: - bufpush(0x0567); - break; - case 0x0538: - bufpush(0x0568); - break; - case 0x0539: - bufpush(0x0569); - break; - case 0x053A: - bufpush(0x056A); - break; - case 0x053B: - bufpush(0x056B); - break; - case 0x053C: - bufpush(0x056C); - break; - case 0x053D: - bufpush(0x056D); - break; - case 0x053E: - bufpush(0x056E); - break; - case 0x053F: - bufpush(0x056F); - break; - case 0x0540: - bufpush(0x0570); - break; - case 0x0541: - bufpush(0x0571); - break; - case 0x0542: - bufpush(0x0572); - break; - case 0x0543: - bufpush(0x0573); - break; - case 0x0544: - bufpush(0x0574); - break; - case 0x0545: - bufpush(0x0575); - break; - case 0x0546: - bufpush(0x0576); - break; - case 0x0547: - bufpush(0x0577); - break; - case 0x0548: - bufpush(0x0578); - break; - case 0x0549: - bufpush(0x0579); - break; - case 0x054A: - bufpush(0x057A); - break; - case 0x054B: - bufpush(0x057B); - break; - case 0x054C: - bufpush(0x057C); - break; - case 0x054D: - bufpush(0x057D); - break; - case 0x054E: - bufpush(0x057E); - break; - case 0x054F: - bufpush(0x057F); - break; - case 0x0550: - bufpush(0x0580); - break; - case 0x0551: - bufpush(0x0581); - break; - case 0x0552: - bufpush(0x0582); - break; - case 0x0553: - bufpush(0x0583); - break; - case 0x0554: - bufpush(0x0584); - break; - case 0x0555: - bufpush(0x0585); - break; - case 0x0556: - bufpush(0x0586); - break; - case 0x0587: - bufpush(0x0565); - bufpush(0x0582); - break; - case 0x1E00: - bufpush(0x1E01); - break; - case 0x1E02: - bufpush(0x1E03); - break; - case 0x1E04: - bufpush(0x1E05); - break; - case 0x1E06: - bufpush(0x1E07); - break; - case 0x1E08: - bufpush(0x1E09); - break; - case 0x1E0A: - bufpush(0x1E0B); - break; - case 0x1E0C: - bufpush(0x1E0D); - break; - case 0x1E0E: - bufpush(0x1E0F); - break; - case 0x1E10: - bufpush(0x1E11); - break; - case 0x1E12: - bufpush(0x1E13); - break; - case 0x1E14: - bufpush(0x1E15); - break; - case 0x1E16: - bufpush(0x1E17); - break; - case 0x1E18: - bufpush(0x1E19); - break; - case 0x1E1A: - bufpush(0x1E1B); - break; - case 0x1E1C: - bufpush(0x1E1D); - break; - case 0x1E1E: - bufpush(0x1E1F); - break; - case 0x1E20: - bufpush(0x1E21); - break; - case 0x1E22: - bufpush(0x1E23); - break; - case 0x1E24: - bufpush(0x1E25); - break; - case 0x1E26: - bufpush(0x1E27); - break; - case 0x1E28: - bufpush(0x1E29); - break; - case 0x1E2A: - bufpush(0x1E2B); - break; - case 0x1E2C: - bufpush(0x1E2D); - break; - case 0x1E2E: - bufpush(0x1E2F); - break; - case 0x1E30: - bufpush(0x1E31); - break; - case 0x1E32: - bufpush(0x1E33); - break; - case 0x1E34: - bufpush(0x1E35); - break; - case 0x1E36: - bufpush(0x1E37); - break; - case 0x1E38: - bufpush(0x1E39); - break; - case 0x1E3A: - bufpush(0x1E3B); - break; - case 0x1E3C: - bufpush(0x1E3D); - break; - case 0x1E3E: - bufpush(0x1E3F); - break; - case 0x1E40: - bufpush(0x1E41); - break; - case 0x1E42: - bufpush(0x1E43); - break; - case 0x1E44: - bufpush(0x1E45); - break; - case 0x1E46: - bufpush(0x1E47); - break; - case 0x1E48: - bufpush(0x1E49); - break; - case 0x1E4A: - bufpush(0x1E4B); - break; - case 0x1E4C: - bufpush(0x1E4D); - break; - case 0x1E4E: - bufpush(0x1E4F); - break; - case 0x1E50: - bufpush(0x1E51); - break; - case 0x1E52: - bufpush(0x1E53); - break; - case 0x1E54: - bufpush(0x1E55); - break; - case 0x1E56: - bufpush(0x1E57); - break; - case 0x1E58: - bufpush(0x1E59); - break; - case 0x1E5A: - bufpush(0x1E5B); - break; - case 0x1E5C: - bufpush(0x1E5D); - break; - case 0x1E5E: - bufpush(0x1E5F); - break; - case 0x1E60: - bufpush(0x1E61); - break; - case 0x1E62: - bufpush(0x1E63); - break; - case 0x1E64: - bufpush(0x1E65); - break; - case 0x1E66: - bufpush(0x1E67); - break; - case 0x1E68: - bufpush(0x1E69); - break; - case 0x1E6A: - bufpush(0x1E6B); - break; - case 0x1E6C: - bufpush(0x1E6D); - break; - case 0x1E6E: - bufpush(0x1E6F); - break; - case 0x1E70: - bufpush(0x1E71); - break; - case 0x1E72: - bufpush(0x1E73); - break; - case 0x1E74: - bufpush(0x1E75); - break; - case 0x1E76: - bufpush(0x1E77); - break; - case 0x1E78: - bufpush(0x1E79); - break; - case 0x1E7A: - bufpush(0x1E7B); - break; - case 0x1E7C: - bufpush(0x1E7D); - break; - case 0x1E7E: - bufpush(0x1E7F); - break; - case 0x1E80: - bufpush(0x1E81); - break; - case 0x1E82: - bufpush(0x1E83); - break; - case 0x1E84: - bufpush(0x1E85); - break; - case 0x1E86: - bufpush(0x1E87); - break; - case 0x1E88: - bufpush(0x1E89); - break; - case 0x1E8A: - bufpush(0x1E8B); - break; - case 0x1E8C: - bufpush(0x1E8D); - break; - case 0x1E8E: - bufpush(0x1E8F); - break; - case 0x1E90: - bufpush(0x1E91); - break; - case 0x1E92: - bufpush(0x1E93); - break; - case 0x1E94: - bufpush(0x1E95); - break; - case 0x1E96: - bufpush(0x0068); - bufpush(0x0331); - break; - case 0x1E97: - bufpush(0x0074); - bufpush(0x0308); - break; - case 0x1E98: - bufpush(0x0077); - bufpush(0x030A); - break; - case 0x1E99: - bufpush(0x0079); - bufpush(0x030A); - break; - case 0x1E9A: - bufpush(0x0061); - bufpush(0x02BE); - break; - case 0x1E9B: - bufpush(0x1E61); - break; - case 0x1EA0: - bufpush(0x1EA1); - break; - case 0x1EA2: - bufpush(0x1EA3); - break; - case 0x1EA4: - bufpush(0x1EA5); - break; - case 0x1EA6: - bufpush(0x1EA7); - break; - case 0x1EA8: - bufpush(0x1EA9); - break; - case 0x1EAA: - bufpush(0x1EAB); - break; - case 0x1EAC: - bufpush(0x1EAD); - break; - case 0x1EAE: - bufpush(0x1EAF); - break; - case 0x1EB0: - bufpush(0x1EB1); - break; - case 0x1EB2: - bufpush(0x1EB3); - break; - case 0x1EB4: - bufpush(0x1EB5); - break; - case 0x1EB6: - bufpush(0x1EB7); - break; - case 0x1EB8: - bufpush(0x1EB9); - break; - case 0x1EBA: - bufpush(0x1EBB); - break; - case 0x1EBC: - bufpush(0x1EBD); - break; - case 0x1EBE: - bufpush(0x1EBF); - break; - case 0x1EC0: - bufpush(0x1EC1); - break; - case 0x1EC2: - bufpush(0x1EC3); - break; - case 0x1EC4: - bufpush(0x1EC5); - break; - case 0x1EC6: - bufpush(0x1EC7); - break; - case 0x1EC8: - bufpush(0x1EC9); - break; - case 0x1ECA: - bufpush(0x1ECB); - break; - case 0x1ECC: - bufpush(0x1ECD); - break; - case 0x1ECE: - bufpush(0x1ECF); - break; - case 0x1ED0: - bufpush(0x1ED1); - break; - case 0x1ED2: - bufpush(0x1ED3); - break; - case 0x1ED4: - bufpush(0x1ED5); - break; - case 0x1ED6: - bufpush(0x1ED7); - break; - case 0x1ED8: - bufpush(0x1ED9); - break; - case 0x1EDA: - bufpush(0x1EDB); - break; - case 0x1EDC: - bufpush(0x1EDD); - break; - case 0x1EDE: - bufpush(0x1EDF); - break; - case 0x1EE0: - bufpush(0x1EE1); - break; - case 0x1EE2: - bufpush(0x1EE3); - break; - case 0x1EE4: - bufpush(0x1EE5); - break; - case 0x1EE6: - bufpush(0x1EE7); - break; - case 0x1EE8: - bufpush(0x1EE9); - break; - case 0x1EEA: - bufpush(0x1EEB); - break; - case 0x1EEC: - bufpush(0x1EED); - break; - case 0x1EEE: - bufpush(0x1EEF); - break; - case 0x1EF0: - bufpush(0x1EF1); - break; - case 0x1EF2: - bufpush(0x1EF3); - break; - case 0x1EF4: - bufpush(0x1EF5); - break; - case 0x1EF6: - bufpush(0x1EF7); - break; - case 0x1EF8: - bufpush(0x1EF9); - break; - case 0x1F08: - bufpush(0x1F00); - break; - case 0x1F09: - bufpush(0x1F01); - break; - case 0x1F0A: - bufpush(0x1F02); - break; - case 0x1F0B: - bufpush(0x1F03); - break; - case 0x1F0C: - bufpush(0x1F04); - break; - case 0x1F0D: - bufpush(0x1F05); - break; - case 0x1F0E: - bufpush(0x1F06); - break; - case 0x1F0F: - bufpush(0x1F07); - break; - case 0x1F18: - bufpush(0x1F10); - break; - case 0x1F19: - bufpush(0x1F11); - break; - case 0x1F1A: - bufpush(0x1F12); - break; - case 0x1F1B: - bufpush(0x1F13); - break; - case 0x1F1C: - bufpush(0x1F14); - break; - case 0x1F1D: - bufpush(0x1F15); - break; - case 0x1F28: - bufpush(0x1F20); - break; - case 0x1F29: - bufpush(0x1F21); - break; - case 0x1F2A: - bufpush(0x1F22); - break; - case 0x1F2B: - bufpush(0x1F23); - break; - case 0x1F2C: - bufpush(0x1F24); - break; - case 0x1F2D: - bufpush(0x1F25); - break; - case 0x1F2E: - bufpush(0x1F26); - break; - case 0x1F2F: - bufpush(0x1F27); - break; - case 0x1F38: - bufpush(0x1F30); - break; - case 0x1F39: - bufpush(0x1F31); - break; - case 0x1F3A: - bufpush(0x1F32); - break; - case 0x1F3B: - bufpush(0x1F33); - break; - case 0x1F3C: - bufpush(0x1F34); - break; - case 0x1F3D: - bufpush(0x1F35); - break; - case 0x1F3E: - bufpush(0x1F36); - break; - case 0x1F3F: - bufpush(0x1F37); - break; - case 0x1F48: - bufpush(0x1F40); - break; - case 0x1F49: - bufpush(0x1F41); - break; - case 0x1F4A: - bufpush(0x1F42); - break; - case 0x1F4B: - bufpush(0x1F43); - break; - case 0x1F4C: - bufpush(0x1F44); - break; - case 0x1F4D: - bufpush(0x1F45); - break; - case 0x1F50: - bufpush(0x03C5); - bufpush(0x0313); - break; - case 0x1F52: - bufpush(0x03C5); - bufpush(0x0313); - bufpush(0x0300); - break; - case 0x1F54: - bufpush(0x03C5); - bufpush(0x0313); - bufpush(0x0301); - break; - case 0x1F56: - bufpush(0x03C5); - bufpush(0x0313); - bufpush(0x0342); - break; - case 0x1F59: - bufpush(0x1F51); - break; - case 0x1F5B: - bufpush(0x1F53); - break; - case 0x1F5D: - bufpush(0x1F55); - break; - case 0x1F5F: - bufpush(0x1F57); - break; - case 0x1F68: - bufpush(0x1F60); - break; - case 0x1F69: - bufpush(0x1F61); - break; - case 0x1F6A: - bufpush(0x1F62); - break; - case 0x1F6B: - bufpush(0x1F63); - break; - case 0x1F6C: - bufpush(0x1F64); - break; - case 0x1F6D: - bufpush(0x1F65); - break; - case 0x1F6E: - bufpush(0x1F66); - break; - case 0x1F6F: - bufpush(0x1F67); - break; - case 0x1F80: - bufpush(0x1F00); - bufpush(0x03B9); - break; - case 0x1F81: - bufpush(0x1F01); - bufpush(0x03B9); - break; - case 0x1F82: - bufpush(0x1F02); - bufpush(0x03B9); - break; - case 0x1F83: - bufpush(0x1F03); - bufpush(0x03B9); - break; - case 0x1F84: - bufpush(0x1F04); - bufpush(0x03B9); - break; - case 0x1F85: - bufpush(0x1F05); - bufpush(0x03B9); - break; - case 0x1F86: - bufpush(0x1F06); - bufpush(0x03B9); - break; - case 0x1F87: - bufpush(0x1F07); - bufpush(0x03B9); - break; - case 0x1F88: - bufpush(0x1F00); - bufpush(0x03B9); - break; - case 0x: - break; - case 0x1F89: - bufpush(0x1F01); - bufpush(0x03B9); - break; - case 0x: - break; - case 0x1F8A: - bufpush(0x1F02); - bufpush(0x03B9); - break; - case 0x: - break; - case 0x1F8B: - bufpush(0x1F03); - bufpush(0x03B9); - break; - case 0x: - break; - case 0x1F8C: - bufpush(0x1F04); - bufpush(0x03B9); - break; - case 0x: - break; - case 0x1F8D: - bufpush(0x1F05); - bufpush(0x03B9); - break; - case 0x: - break; - case 0x1F8E: - bufpush(0x1F06); - bufpush(0x03B9); - break; - case 0x: - break; - case 0x1F8F: - bufpush(0x1F07); - bufpush(0x03B9); - break; - case 0x: - break; - case 0x1F90: - bufpush(0x1F20); - bufpush(0x03B9); - break; - case 0x1F91: - bufpush(0x1F21); - bufpush(0x03B9); - break; - case 0x1F92: - bufpush(0x1F22); - bufpush(0x03B9); - break; - case 0x1F93: - bufpush(0x1F23); - bufpush(0x03B9); - break; - case 0x1F94: - bufpush(0x1F24); - bufpush(0x03B9); - break; - case 0x1F95: - bufpush(0x1F25); - bufpush(0x03B9); - break; - case 0x1F96: - bufpush(0x1F26); - bufpush(0x03B9); - break; - case 0x1F97: - bufpush(0x1F27); - bufpush(0x03B9); - break; - case 0x1F98: - bufpush(0x1F20); - bufpush(0x03B9); - break; - case 0x: - break; - case 0x1F99: - bufpush(0x1F21); - bufpush(0x03B9); - break; - case 0x: - break; - case 0x1F9A: - bufpush(0x1F22); - bufpush(0x03B9); - break; - case 0x: - break; - case 0x1F9B: - bufpush(0x1F23); - bufpush(0x03B9); - break; - case 0x: - break; - case 0x1F9C: - bufpush(0x1F24); - bufpush(0x03B9); - break; - case 0x: - break; - case 0x1F9D: - bufpush(0x1F25); - bufpush(0x03B9); - break; - case 0x: - break; - case 0x1F9E: - bufpush(0x1F26); - bufpush(0x03B9); - break; - case 0x: - break; - case 0x1F9F: - bufpush(0x1F27); - bufpush(0x03B9); - break; - case 0x: - break; - case 0x1FA0: - bufpush(0x1F60); - bufpush(0x03B9); - break; - case 0x1FA1: - bufpush(0x1F61); - bufpush(0x03B9); - break; - case 0x1FA2: - bufpush(0x1F62); - bufpush(0x03B9); - break; - case 0x1FA3: - bufpush(0x1F63); - bufpush(0x03B9); - break; - case 0x1FA4: - bufpush(0x1F64); - bufpush(0x03B9); - break; - case 0x1FA5: - bufpush(0x1F65); - bufpush(0x03B9); - break; - case 0x1FA6: - bufpush(0x1F66); - bufpush(0x03B9); - break; - case 0x1FA7: - bufpush(0x1F67); - bufpush(0x03B9); - break; - case 0x1FA8: - bufpush(0x1F60); - bufpush(0x03B9); - break; - case 0x: - break; - case 0x1FA9: - bufpush(0x1F61); - bufpush(0x03B9); - break; - case 0x: - break; - case 0x1FAA: - bufpush(0x1F62); - bufpush(0x03B9); - break; - case 0x: - break; - case 0x1FAB: - bufpush(0x1F63); - bufpush(0x03B9); - break; - case 0x: - break; - case 0x1FAC: - bufpush(0x1F64); - bufpush(0x03B9); - break; - case 0x: - break; - case 0x1FAD: - bufpush(0x1F65); - bufpush(0x03B9); - break; - case 0x: - break; - case 0x1FAE: - bufpush(0x1F66); - bufpush(0x03B9); - break; - case 0x: - break; - case 0x1FAF: - bufpush(0x1F67); - bufpush(0x03B9); - break; - case 0x: - break; - case 0x1FB2: - bufpush(0x1F70); - bufpush(0x03B9); - break; - case 0x1FB3: - bufpush(0x03B1); - bufpush(0x03B9); - break; - case 0x1FB4: - bufpush(0x03AC); - bufpush(0x03B9); - break; - case 0x1FB6: - bufpush(0x03B1); - bufpush(0x0342); - break; - case 0x1FB7: - bufpush(0x03B1); - bufpush(0x0342); - bufpush(0x03B9); - break; - case 0x1FB8: - bufpush(0x1FB0); - break; - case 0x1FB9: - bufpush(0x1FB1); - break; - case 0x1FBA: - bufpush(0x1F70); - break; - case 0x1FBB: - bufpush(0x1F71); - break; - case 0x1FBC: - bufpush(0x03B1); - bufpush(0x03B9); - break; - case 0x: - break; - case 0x1FBE: - bufpush(0x03B9); - break; - case 0x1FC2: - bufpush(0x1F74); - bufpush(0x03B9); - break; - case 0x1FC3: - bufpush(0x03B7); - bufpush(0x03B9); - break; - case 0x1FC4: - bufpush(0x03AE); - bufpush(0x03B9); - break; - case 0x1FC6: - bufpush(0x03B7); - bufpush(0x0342); - break; - case 0x1FC7: - bufpush(0x03B7); - bufpush(0x0342); - bufpush(0x03B9); - break; - case 0x1FC8: - bufpush(0x1F72); - break; - case 0x1FC9: - bufpush(0x1F73); - break; - case 0x1FCA: - bufpush(0x1F74); - break; - case 0x1FCB: - bufpush(0x1F75); - break; - case 0x1FCC: - bufpush(0x03B7); - bufpush(0x03B9); - break; - case 0x: - break; - case 0x1FD2: - bufpush(0x03B9); - bufpush(0x0308); - bufpush(0x0300); - break; - case 0x1FD3: - bufpush(0x03B9); - bufpush(0x0308); - bufpush(0x0301); - break; - case 0x1FD6: - bufpush(0x03B9); - bufpush(0x0342); - break; - case 0x1FD7: - bufpush(0x03B9); - bufpush(0x0308); - bufpush(0x0342); - break; - case 0x1FD8: - bufpush(0x1FD0); - break; - case 0x1FD9: - bufpush(0x1FD1); - break; - case 0x1FDA: - bufpush(0x1F76); - break; - case 0x1FDB: - bufpush(0x1F77); - break; - case 0x1FE2: - bufpush(0x03C5); - bufpush(0x0308); - bufpush(0x0300); - break; - case 0x1FE3: - bufpush(0x03C5); - bufpush(0x0308); - bufpush(0x0301); - break; - case 0x1FE4: - bufpush(0x03C1); - bufpush(0x0313); - break; - case 0x1FE6: - bufpush(0x03C5); - bufpush(0x0342); - break; - case 0x1FE7: - bufpush(0x03C5); - bufpush(0x0308); - bufpush(0x0342); - break; - case 0x1FE8: - bufpush(0x1FE0); - break; - case 0x1FE9: - bufpush(0x1FE1); - break; - case 0x1FEA: - bufpush(0x1F7A); - break; - case 0x1FEB: - bufpush(0x1F7B); - break; - case 0x1FEC: - bufpush(0x1FE5); - break; - case 0x1FF2: - bufpush(0x1F7C); - bufpush(0x03B9); - break; - case 0x1FF3: - bufpush(0x03C9); - bufpush(0x03B9); - break; - case 0x1FF4: - bufpush(0x03CE); - bufpush(0x03B9); - break; - case 0x1FF6: - bufpush(0x03C9); - bufpush(0x0342); - break; - case 0x1FF7: - bufpush(0x03C9); - bufpush(0x0342); - bufpush(0x03B9); - break; - case 0x1FF8: - bufpush(0x1F78); - break; - case 0x1FF9: - bufpush(0x1F79); - break; - case 0x1FFA: - bufpush(0x1F7C); - break; - case 0x1FFB: - bufpush(0x1F7D); - break; - case 0x1FFC: - bufpush(0x03C9); - bufpush(0x03B9); - break; - case 0x: - break; - case 0x2126: - bufpush(0x03C9); - break; - case 0x212A: - bufpush(0x006B); - break; - case 0x212B: - bufpush(0x00E5); - break; - case 0x2160: - bufpush(0x2170); - break; - case 0x2161: - bufpush(0x2171); - break; - case 0x2162: - bufpush(0x2172); - break; - case 0x2163: - bufpush(0x2173); - break; - case 0x2164: - bufpush(0x2174); - break; - case 0x2165: - bufpush(0x2175); - break; - case 0x2166: - bufpush(0x2176); - break; - case 0x2167: - bufpush(0x2177); - break; - case 0x2168: - bufpush(0x2178); - break; - case 0x2169: - bufpush(0x2179); - break; - case 0x216A: - bufpush(0x217A); - break; - case 0x216B: - bufpush(0x217B); - break; - case 0x216C: - bufpush(0x217C); - break; - case 0x216D: - bufpush(0x217D); - break; - case 0x216E: - bufpush(0x217E); - break; - case 0x216F: - bufpush(0x217F); - break; - case 0x24B6: - bufpush(0x24D0); - break; - case 0x24B7: - bufpush(0x24D1); - break; - case 0x24B8: - bufpush(0x24D2); - break; - case 0x24B9: - bufpush(0x24D3); - break; - case 0x24BA: - bufpush(0x24D4); - break; - case 0x24BB: - bufpush(0x24D5); - break; - case 0x24BC: - bufpush(0x24D6); - break; - case 0x24BD: - bufpush(0x24D7); - break; - case 0x24BE: - bufpush(0x24D8); - break; - case 0x24BF: - bufpush(0x24D9); - break; - case 0x24C0: - bufpush(0x24DA); - break; - case 0x24C1: - bufpush(0x24DB); - break; - case 0x24C2: - bufpush(0x24DC); - break; - case 0x24C3: - bufpush(0x24DD); - break; - case 0x24C4: - bufpush(0x24DE); - break; - case 0x24C5: - bufpush(0x24DF); - break; - case 0x24C6: - bufpush(0x24E0); - break; - case 0x24C7: - bufpush(0x24E1); - break; - case 0x24C8: - bufpush(0x24E2); - break; - case 0x24C9: - bufpush(0x24E3); - break; - case 0x24CA: - bufpush(0x24E4); - break; - case 0x24CB: - bufpush(0x24E5); - break; - case 0x24CC: - bufpush(0x24E6); - break; - case 0x24CD: - bufpush(0x24E7); - break; - case 0x24CE: - bufpush(0x24E8); - break; - case 0x24CF: - bufpush(0x24E9); - break; - case 0xFB00: - bufpush(0x0066); - bufpush(0x0066); - break; - case 0xFB01: - bufpush(0x0066); - bufpush(0x0069); - break; - case 0xFB02: - bufpush(0x0066); - bufpush(0x006C); - break; - case 0xFB03: - bufpush(0x0066); - bufpush(0x0066); - bufpush(0x0069); - break; - case 0xFB04: - bufpush(0x0066); - bufpush(0x0066); - bufpush(0x006C); - break; - case 0xFB05: - bufpush(0x0073); - bufpush(0x0074); - break; - case 0xFB06: - bufpush(0x0073); - bufpush(0x0074); - break; - case 0xFB13: - bufpush(0x0574); - bufpush(0x0576); - break; - case 0xFB14: - bufpush(0x0574); - bufpush(0x0565); - break; - case 0xFB15: - bufpush(0x0574); - bufpush(0x056B); - break; - case 0xFB16: - bufpush(0x057E); - bufpush(0x0576); - break; - case 0xFB17: - bufpush(0x0574); - bufpush(0x056D); - break; - case 0xFF21: - bufpush(0xFF41); - break; - case 0xFF22: - bufpush(0xFF42); - break; - case 0xFF23: - bufpush(0xFF43); - break; - case 0xFF24: - bufpush(0xFF44); - break; - case 0xFF25: - bufpush(0xFF45); - break; - case 0xFF26: - bufpush(0xFF46); - break; - case 0xFF27: - bufpush(0xFF47); - break; - case 0xFF28: - bufpush(0xFF48); - break; - case 0xFF29: - bufpush(0xFF49); - break; - case 0xFF2A: - bufpush(0xFF4A); - break; - case 0xFF2B: - bufpush(0xFF4B); - break; - case 0xFF2C: - bufpush(0xFF4C); - break; - case 0xFF2D: - bufpush(0xFF4D); - break; - case 0xFF2E: - bufpush(0xFF4E); - break; - case 0xFF2F: - bufpush(0xFF4F); - break; - case 0xFF30: - bufpush(0xFF50); - break; - case 0xFF31: - bufpush(0xFF51); - break; - case 0xFF32: - bufpush(0xFF52); - break; - case 0xFF33: - bufpush(0xFF53); - break; - case 0xFF34: - bufpush(0xFF54); - break; - case 0xFF35: - bufpush(0xFF55); - break; - case 0xFF36: - bufpush(0xFF56); - break; - case 0xFF37: - bufpush(0xFF57); - break; - case 0xFF38: - bufpush(0xFF58); - break; - case 0xFF39: - bufpush(0xFF59); - break; - case 0xFF3A: - bufpush(0xFF5A); - break; - case 0x10400: - bufpush(0x10428); - break; - case 0x10401: - bufpush(0x10429); - break; - case 0x10402: - bufpush(0x1042A); - break; - case 0x10403: - bufpush(0x1042B); - break; - case 0x10404: - bufpush(0x1042C); - break; - case 0x10405: - bufpush(0x1042D); - break; - case 0x10406: - bufpush(0x1042E); - break; - case 0x10407: - bufpush(0x1042F); - break; - case 0x10408: - bufpush(0x10430); - break; - case 0x10409: - bufpush(0x10431); - break; - case 0x1040A: - bufpush(0x10432); - break; - case 0x1040B: - bufpush(0x10433); - break; - case 0x1040C: - bufpush(0x10434); - break; - case 0x1040D: - bufpush(0x10435); - break; - case 0x1040E: - bufpush(0x10436); - break; - case 0x1040F: - bufpush(0x10437); - break; - case 0x10410: - bufpush(0x10438); - break; - case 0x10411: - bufpush(0x10439); - break; - case 0x10412: - bufpush(0x1043A); - break; - case 0x10413: - bufpush(0x1043B); - break; - case 0x10414: - bufpush(0x1043C); - break; - case 0x10415: - bufpush(0x1043D); - break; - case 0x10416: - bufpush(0x1043E); - break; - case 0x10417: - bufpush(0x1043F); - break; - case 0x10418: - bufpush(0x10440); - break; - case 0x10419: - bufpush(0x10441); - break; - case 0x1041A: - bufpush(0x10442); - break; - case 0x1041B: - bufpush(0x10443); - break; - case 0x1041C: - bufpush(0x10444); - break; - case 0x1041D: - bufpush(0x10445); - break; - case 0x1041E: - bufpush(0x10446); - break; - case 0x1041F: - bufpush(0x10447); - break; - case 0x10420: - bufpush(0x10448); - break; - case 0x10421: - bufpush(0x10449); - break; - case 0x10422: - bufpush(0x1044A); - break; - case 0x10423: - bufpush(0x1044B); - break; - case 0x10424: - bufpush(0x1044C); - break; - case 0x10425: - bufpush(0x1044D); - break; - } diff --git a/src/detab.c b/src/detab.c deleted file mode 100644 index e03fcf7..0000000 --- a/src/detab.c +++ /dev/null @@ -1,48 +0,0 @@ -#include "bstrlib.h" - -// UTF-8 aware detab: assumes s has no newlines, or only a final newline. -// Return 0 on success, BSTR_ERR if invalid UTF-8. -extern int bdetab(bstring s, int utf8) -{ - unsigned char c; - int pos = 0; // a count of characters - int byte = 0; // a count of bytes - int high_chars_to_skip = 0; - int numspaces = 0; - while ((c = bchar(s, byte))) { - if (utf8 && high_chars_to_skip > 0) { - if (c >= 0x80) { - high_chars_to_skip--; - byte++; - } else { - return BSTR_ERR; // invalid utf-8 - } - } else if (c == '\t') { - bdelete(s, byte, 1); // delete tab character - numspaces = 4 - (pos % 4); - binsertch(s, byte, numspaces, ' '); - byte += numspaces; - pos += numspaces; - } else if (c <= 0x80 || !utf8) { - byte++; - pos++; - } else { // multibyte utf8 sequences - if (c >> 1 == 0176) { - high_chars_to_skip = 5; - } else if (c >> 2 == 076) { - high_chars_to_skip = 4; - } else if (c >> 3 == 036) { - high_chars_to_skip = 3; - } else if (c >> 4 == 016) { - high_chars_to_skip = 2; - } else if (c >> 5 == 06) { - high_chars_to_skip = 1; - } else { - return BSTR_ERR; // invalid utf-8 - } - pos++; - byte++; - } - } - return 0; -} diff --git a/src/getopt.c b/src/getopt.c deleted file mode 100644 index 321dd9f..0000000 --- a/src/getopt.c +++ /dev/null @@ -1,199 +0,0 @@ -/* $Id: getopt.c 4022 2008-03-31 06:11:07Z rra $ - * - * Replacement implementation of getopt. - * - * This is a replacement implementation for getopt based on the my_getopt - * distribution by Benjamin Sittler. Only the getopt interface is included, - * since remctl doesn't use GNU long options, and the code has been rearranged - * and reworked somewhat to fit with the remctl coding style. - * - * Copyright 1997, 2000, 2001, 2002 Benjamin Sittler - * Copyright 2008 Russ Allbery - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include -#include -#include - -/* - * If we're running the test suite, rename getopt and the global variables to - * avoid conflicts with the system version. - */ -#if TESTING -# define getopt test_getopt -int test_getopt(int, char **, const char *); -# define optind test_optind -# define opterr test_opterr -# define optopt test_optopt -# define optarg test_optarg -#endif - -/* Initialize global interface variables. */ -int optind = 1; -int opterr = 1; -int optopt = 0; -char *optarg = NULL; - -/* - * This is the plain old UNIX getopt, with GNU-style extensions. If you're - * porting some piece of UNIX software, this is all you need. It supports - * GNU-style permutation and optional arguments, but does not support the GNU - * -W extension. - * - * This function is not re-entrant or thread-safe, has static variables, and - * generally isn't a great interface, but normally you only call it once. - */ -int -getopt(int argc, char *argv[], const char *optstring) -{ - const char *p; - size_t offset = 0; - char mode = '\0'; - int colon_mode = 0; - int option = -1; - - /* Holds the current position in the parameter being parsed. */ - static int charind = 0; - - /* - * By default, getopt permutes argv as it scans and leaves all non-options - * at the end. This can be changed with the first character of optstring - * or the environment variable POSIXLY_CORRECT. With a first character of - * '+' or when POSIXLY_CORRECT is set, option processing stops at the - * first non-option. If the first character is '-', each non-option argv - * element is handled as if it were the argument of an option with - * character code 1. mode holds this character. - * - * After the optional leading '+' and '-', optstring may contain ':'. If - * present, missing arguments return ':' instead of '?'. colon_mode holds - * this setting. - */ - if (getenv("POSIXLY_CORRECT") != NULL) { - mode = '+'; - colon_mode = '+'; - } else { - if (optstring[offset] == '+' || optstring[offset] == '-') { - mode = optstring[offset]; - offset++; - } - if (optstring[offset] == ':') { - colon_mode = 1; - offset++; - } - } - - /* - * charind holds where we left off. If it's set, we were in the middle - * of an argv element; if not, we pick up with the next element of - * optind. - */ - optarg = NULL; - if (charind == 0) { - if (optind >= argc) - option = -1; - else if (strcmp(argv[optind], "--") == 0) { - optind++; - option = -1; - } else if (argv[optind][0] != '-' || argv[optind][1] == '\0') { - char *tmp; - int i, j, k, end; - - if (mode == '+') - option = -1; - else if (mode == '-') { - optarg = argv[optind]; - optind++; - option = 1; - } else { - for (i = optind + 1, j = optind; i < argc; i++) - if ((argv[i][0] == '-') && (argv[i][1] != '\0')) { - optind = i; - option = getopt(argc, argv, optstring); - while (i > j) { - --i; - tmp = argv[i]; - end = (charind == 0) ? optind - 1 : optind; - for (k = i; k + 1 <= end; k++) { - argv[k] = argv[k + 1]; - } - argv[end] = tmp; - --optind; - } - break; - } - if (i == argc) - option = -1; - } - return option; - } else { - charind = 1; - } - } - if (charind != 0) { - optopt = argv[optind][charind]; - for (p = optstring + offset; *p != '\0'; p++) - if (optopt == *p) { - p++; - if (*p == ':') { - if (argv[optind][charind + 1] != '\0') { - optarg = &argv[optind][charind + 1]; - optind++; - charind = 0; - } else { - p++; - if (*p != ':') { - charind = 0; - optind++; - if (optind >= argc) { - if (opterr) - fprintf(stderr, "%s: option requires" - " an argument -- %c\n", argv[0], - optopt); - option = colon_mode ? ':' : '?'; - goto done; - } else { - optarg = argv[optind]; - optind++; - } - } - } - } - option = optopt; - } - if (option == -1) { - if (opterr) - fprintf(stderr, "%s: illegal option -- %c\n", argv[0], optopt); - option = '?'; - } - } - -done: - if (charind != 0) { - charind++; - if (argv[optind][charind] == '\0') { - optind++; - charind = 0; - } - } - if (optind > argc) - optind = argc; - return option; -} diff --git a/src/inlines.c b/src/inlines.c index f75c846..4ff45ad 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -2,133 +2,154 @@ #include #include #include -#include "bstrlib.h" +#include + #include "stmd.h" #include "uthash.h" #include "debug.h" #include "scanners.h" #include "utf8.h" +typedef struct Subject { + const gh_buf *buffer; + int pos; + reference** reference_map; + int label_nestlevel; +} subject; + +reference* lookup_reference(reference** refmap, chunk *label); +reference* make_reference(chunk *label, chunk *url, chunk *title); + +static unsigned char *clean_url(chunk *url); +static unsigned char *clean_title(chunk *title); + +inline static unsigned char *chunk_to_cstr(chunk *c); +inline static void chunk_free(chunk *c); +inline static void chunk_trim(chunk *c); + +inline static chunk chunk_literal(const char *data); +inline static chunk chunk_buf_detach(gh_buf *buf); +inline static chunk chunk_buf(const gh_buf *buf, int pos, int len); + +static inl *parse_chunk_inlines(chunk *chunk, reference** refmap); +static inl *parse_inlines_while(subject* subj, int (*f)(subject*)); +static int parse_inline(subject* subj, inl ** last); + extern void free_reference(reference *ref) { - bdestroy(ref->label); - bdestroy(ref->url); - bdestroy(ref->title); - free(ref); + free(ref->label); + free(ref->url); + free(ref->title); + free(ref); } extern void free_reference_map(reference **refmap) { - /* free the hash table contents */ - reference *s; - reference *tmp; - if (refmap != NULL) { - HASH_ITER(hh, *refmap, s, tmp) { - HASH_DEL(*refmap, s); - free_reference(s); - } - free(refmap); - } + /* free the hash table contents */ + reference *s; + reference *tmp; + if (refmap != NULL) { + HASH_ITER(hh, *refmap, s, tmp) { + HASH_DEL(*refmap, s); + free_reference(s); + } + free(refmap); + } } // normalize reference: collapse internal whitespace to single space, // remove leading/trailing whitespace, case fold -static bstring normalize_reference(bstring s) -{ - bstring normalized = case_fold(s); - int pos = 0; - int startpos; - char c; - while ((c = bchar(normalized, pos))) { - if (isspace(c)) { - startpos = pos; - // skip til next non-space - pos++; - while (isspace(bchar(s, pos))) { - pos++; - } - bdelete(normalized, startpos, pos - startpos); - binsertch(normalized, startpos, 1, ' '); - pos = startpos + 1; - } - pos++; - } - btrimws(normalized); - return normalized; +static unsigned char *normalize_reference(chunk *ref) +{ + gh_buf normalized = GH_BUF_INIT; + int r, w; + + utf8proc_case_fold(&normalized, ref->data, ref->len); + gh_buf_trim(&normalized); + + for (r = 0, w = 0; r < normalized.size; ++r) { + if (r && gh_buf_at(&normalized, r - 1) == ' ') { + while (gh_buf_at(&normalized, r) == ' ') + r++; + } + + normalized.ptr[w++] = normalized.ptr[r]; + } + + return gh_buf_detach(&normalized); } // Returns reference if refmap contains a reference with matching // label, otherwise NULL. -extern reference* lookup_reference(reference** refmap, bstring lab) +extern reference* lookup_reference(reference** refmap, chunk *label) { - reference * ref = NULL; - bstring label = normalize_reference(lab); - if (refmap != NULL) { - HASH_FIND_STR(*refmap, (char*) label->data, ref); - } - bdestroy(label); - return ref; + reference *ref = NULL; + unsigned char *norm = normalize_reference(label); + if (refmap != NULL) { + HASH_FIND_STR(*refmap, (char*)norm, ref); + } + free(label); + return ref; } -extern reference* make_reference(bstring label, bstring url, bstring title) +extern reference* make_reference(chunk *label, chunk *url, chunk *title) { - reference * ref; - ref = malloc(sizeof(reference)); - ref->label = normalize_reference(label); - ref->url = bstrcpy(url); - ref->title = bstrcpy(title); - return ref; + reference *ref; + ref = malloc(sizeof(reference)); + ref->label = normalize_reference(label); + ref->url = clean_url(url); + ref->title = clean_title(title); + return ref; } extern void add_reference(reference** refmap, reference* ref) { - reference * t = NULL; - HASH_FIND(hh, *refmap, (char*) ref->label->data, - (unsigned) blength(ref->label), t); - if (t == NULL) { - HASH_ADD_KEYPTR(hh, *refmap, (char*) ref->label->data, - (unsigned) blength(ref->label), ref); - } else { - free_reference(ref); // we free this now since it won't be in the refmap - } + reference * t = NULL; + HASH_FIND(hh, *refmap, (char*)ref->label, (unsigned)strlen(ref->label), t); + + if (t == NULL) { + HASH_ADD_KEYPTR(hh, *refmap, (char*)ref->label, (unsigned)strlen(ref->label), ref); + } else { + free_reference(ref); // we free this now since it won't be in the refmap + } } // Create an inline with a linkable string value. -inline static inl* make_linkable(int t, inl* label, bstring url, bstring title) +inline static inl* make_linkable(int t, inl* label, chunk url, chunk title) { - inl* e = (inl*) malloc(sizeof(inl)); - e->tag = t; - e->content.linkable.label = label; - e->content.linkable.url = url; - e->content.linkable.title = title; - e->next = NULL; - return e; + inl* e = (inl*) malloc(sizeof(inl)); + e->tag = t; + e->content.linkable.label = label; + e->content.linkable.url = chunk_to_cstr(&url); + e->content.linkable.title = chunk_to_cstr(&title); + e->next = NULL; + return e; } inline static inl* make_inlines(int t, inl* contents) { - inl* e = (inl*) malloc(sizeof(inl)); - e->tag = t; - e->content.inlines = contents; - e->next = NULL; - return e; + inl* e = (inl*) malloc(sizeof(inl)); + e->tag = t; + e->content.inlines = contents; + e->next = NULL; + return e; } // Create an inline with a literal string value. -inline static inl* make_literal(int t, bstring s) +inline static inl* make_literal(int t, chunk s) { - inl* e = (inl*) malloc(sizeof(inl)); - e->tag = t; - e->content.literal = s; - e->next = NULL; - return e; + inl* e = (inl*) malloc(sizeof(inl)); + e->tag = t; + e->content.literal = s; + e->next = NULL; + return e; } // Create an inline with no value. inline static inl* make_simple(int t) { - inl* e = (inl*) malloc(sizeof(inl)); - e->tag = t; - e->next = NULL; - return e; + inl* e = (inl*) malloc(sizeof(inl)); + e->tag = t; + e->next = NULL; + return e; } // Macros for creating various kinds of inlines. @@ -139,113 +160,157 @@ inline static inl* make_simple(int t) #define make_linebreak() make_simple(linebreak) #define make_softbreak() make_simple(softbreak) #define make_link(label, url, title) make_linkable(link, label, url, title) -#define make_image(alt, url, title) make_linkable(image, alt, url, title) #define make_emph(contents) make_inlines(emph, contents) #define make_strong(contents) make_inlines(strong, contents) // Free an inline list. extern void free_inlines(inl* e) { - inl * next; - while (e != NULL) { - switch (e->tag){ - case str: - case raw_html: - case code: - case entity: - bdestroy(e->content.literal); - break; - case linebreak: - case softbreak: - break; - case link: - case image: - bdestroy(e->content.linkable.url); - bdestroy(e->content.linkable.title); - free_inlines(e->content.linkable.label); - break; - case emph: - case strong: - free_inlines(e->content.inlines); - break; - default: - break; - } - next = e->next; - free(e); - e = next; - } + inl * next; + while (e != NULL) { + switch (e->tag){ + case str: + case raw_html: + case code: + case entity: + chunk_free(&e->content.literal); + break; + case linebreak: + case softbreak: + break; + case link: + case image: + free(e->content.linkable.url); + free(e->content.linkable.title); + free_inlines(e->content.linkable.label); + break; + case emph: + case strong: + free_inlines(e->content.inlines); + break; + default: + break; + } + next = e->next; + free(e); + e = next; + } } // Append inline list b to the end of inline list a. // Return pointer to head of new list. inline static inl* append_inlines(inl* a, inl* b) { - if (a == NULL) { // NULL acts like an empty list - return b; - } - inl* cur = a; - while (cur->next) { - cur = cur->next; - } - cur->next = b; - return a; + if (a == NULL) { // NULL acts like an empty list + return b; + } + inl* cur = a; + while (cur->next) { + cur = cur->next; + } + cur->next = b; + return a; } // Make a 'subject' from an input string. -static subject* make_subject(bstring s, reference** refmap) +static void init_subject(subject *e, gh_buf *buffer, int input_pos, reference** refmap) { - subject* e = (subject*) malloc(sizeof(subject)); - // remove final whitespace - brtrimws(s); - e->buffer = s; - e->pos = 0; - e->label_nestlevel = 0; - e->reference_map = refmap; - return e; + e->buffer = buffer; + e->pos = input_pos; + e->label_nestlevel = 0; + e->reference_map = refmap; } inline static int isbacktick(int c) { - return (c == '`'); + return (c == '`'); +} + +inline static void chunk_free(chunk *c) +{ + if (c->alloc) + free((char *)c->data); + + c->data = NULL; + c->alloc = 0; + c->len = 0; +} + +inline static void chunk_trim(chunk *c) +{ + while (c->len && isspace(c->data[0])) { + c->data++; + c->len--; + } + + while (c->len > 0) { + if (!isspace(c->data[c->len - 1])) + break; + + c->len--; + } +} + +inline static unsigned char *chunk_to_cstr(chunk *c) +{ + unsigned char *str; + + str = malloc(c->len + 1); + memcpy(str, c->data, c->len); + str[c->len] = 0; + + return str; +} + +inline static chunk chunk_literal(const char *data) +{ + chunk c = {data, strlen(data), 0}; + return c; +} + +inline static chunk chunk_buf(const gh_buf *buf, int pos, int len) +{ + chunk c = {buf->ptr + pos, len, 0}; + return c; +} + +inline static chunk chunk_buf_detach(gh_buf *buf) +{ + chunk c; + + c.len = buf->size; + c.data = gh_buf_detach(buf); + c.alloc = 1; + + return c; } // Return the next character in the subject, without advancing. // Return 0 if at the end of the subject. -#define peek_char(subj) bchar(subj->buffer, subj->pos) +#define peek_char(subj) gh_buf_at((subj)->buffer, (subj)->pos) // Return true if there are more characters in the subject. inline static int is_eof(subject* subj) { - return (subj->pos >= blength(subj->buffer)); + return (subj->pos >= gh_buf_len(subj->buffer)); } // Advance the subject. Doesn't check for eof. -#define advance(subj) subj->pos += 1 +#define advance(subj) (subj)->pos += 1 // Take characters while a predicate holds, and return a string. -inline static bstring take_while(subject* subj, int (*f)(int)) +inline static chunk take_while(subject* subj, int (*f)(int)) { - unsigned char c; - int startpos = subj->pos; - int len = 0; - while ((c = peek_char(subj)) && (*f)(c)) { - advance(subj); - len++; - } - return bmidstr(subj->buffer, startpos, len); -} + unsigned char c; + int startpos = subj->pos; + int len = 0; -// Take one character and return a string, or NULL if eof. -inline static bstring take_one(subject* subj) -{ - int startpos = subj->pos; - if (is_eof(subj)){ - return NULL; - } else { - advance(subj); - return bmidstr(subj->buffer, startpos, 1); - } + while ((c = peek_char(subj)) && (*f)(c)) { + advance(subj); + len++; + } + + return chunk_buf(subj->buffer, startpos, len); } // Try to process a backtick code span that began with a @@ -255,381 +320,406 @@ inline static bstring take_one(subject* subj) // after the closing backticks. static int scan_to_closing_backticks(subject* subj, int openticklength) { - // read non backticks - char c; - while ((c = peek_char(subj)) && c != '`') { - advance(subj); - } - if (is_eof(subj)) { - return 0; // did not find closing ticks, return 0 - } - int numticks = 0; - while (peek_char(subj) == '`') { - advance(subj); - numticks++; - } - if (numticks != openticklength){ - return(scan_to_closing_backticks(subj, openticklength)); - } - return (subj->pos); -} - -// Destructively modify bstring, collapsing consecutive + // read non backticks + char c; + while ((c = peek_char(subj)) && c != '`') { + advance(subj); + } + if (is_eof(subj)) { + return 0; // did not find closing ticks, return 0 + } + int numticks = 0; + while (peek_char(subj) == '`') { + advance(subj); + numticks++; + } + if (numticks != openticklength){ + return(scan_to_closing_backticks(subj, openticklength)); + } + return (subj->pos); +} + +// Destructively modify string, collapsing consecutive // space and newline characters into a single space. -static int normalize_whitespace(bstring s) -{ - bool last_char_was_space = false; - int pos = 0; - char c; - while ((c = bchar(s, pos))) { - switch (c) { - case ' ': - if (last_char_was_space) { - bdelete(s, pos, 1); - } else { - pos++; - } - last_char_was_space = true; - break; - case '\n': - if (last_char_was_space) { - bdelete(s, pos, 1); - } else { - bdelete(s, pos, 1); - binsertch(s, pos, 1, ' '); - pos++; - } - last_char_was_space = true; - break; - default: - pos++; - last_char_was_space = false; - } - } - return 0; +static void normalize_whitespace(gh_buf *s) +{ + /* TODO */ +#if 0 + bool last_char_was_space = false; + int pos = 0; + char c; + while ((c = gh_buf_at(s, pos))) { + switch (c) { + case ' ': + if (last_char_was_space) { + bdelete(s, pos, 1); + } else { + pos++; + } + last_char_was_space = true; + break; + case '\n': + if (last_char_was_space) { + bdelete(s, pos, 1); + } else { + bdelete(s, pos, 1); + binsertch(s, pos, 1, ' '); + pos++; + } + last_char_was_space = true; + break; + default: + pos++; + last_char_was_space = false; + } + } +#endif } // Parse backtick code section or raw backticks, return an inline. // Assumes that the subject has a backtick at the current position. static inl* handle_backticks(subject *subj) { - bstring openticks = take_while(subj, isbacktick); - bstring result; - int ticklength = blength(openticks); - int startpos = subj->pos; - int endpos = scan_to_closing_backticks(subj, ticklength); - if (endpos == 0) { // not found - subj->pos = startpos; // rewind - return make_str(openticks); - } else { - bdestroy(openticks); - result = bmidstr(subj->buffer, startpos, endpos - startpos - ticklength); - btrimws(result); - normalize_whitespace(result); - return make_code(result); - } + chunk openticks = take_while(subj, isbacktick); + int startpos = subj->pos; + int endpos = scan_to_closing_backticks(subj, openticks.len); + + if (endpos == 0) { // not found + subj->pos = startpos; // rewind + return make_str(openticks); + } else { + gh_buf buf = GH_BUF_INIT; + + gh_buf_set(&buf, subj->buffer->ptr + startpos, endpos - startpos - openticks.len); + gh_buf_trim(&buf); + normalize_whitespace(&buf); + + return make_code(chunk_buf_detach(&buf)); + } } // Scan ***, **, or * and return number scanned, or 0. // Don't advance position. static int scan_delims(subject* subj, char c, bool * can_open, bool * can_close) { - int numdelims = 0; - char char_before, char_after; - int startpos = subj->pos; - - char_before = subj->pos == 0 ? '\n' : bchar(subj->buffer, subj->pos - 1); - while (peek_char(subj) == c) { - numdelims++; - advance(subj); - } - char_after = peek_char(subj); - *can_open = numdelims > 0 && numdelims <= 3 && !isspace(char_after); - *can_close = numdelims > 0 && numdelims <= 3 && !isspace(char_before); - if (c == '_') { - *can_open = *can_open && !isalnum(char_before); - *can_close = *can_close && !isalnum(char_after); - } - subj->pos = startpos; - return numdelims; + int numdelims = 0; + char char_before, char_after; + int startpos = subj->pos; + + char_before = subj->pos == 0 ? '\n' : gh_buf_at(subj->buffer, subj->pos - 1); + while (peek_char(subj) == c) { + numdelims++; + advance(subj); + } + char_after = peek_char(subj); + *can_open = numdelims > 0 && numdelims <= 3 && !isspace(char_after); + *can_close = numdelims > 0 && numdelims <= 3 && !isspace(char_before); + if (c == '_') { + *can_open = *can_open && !isalnum(char_before); + *can_close = *can_close && !isalnum(char_after); + } + subj->pos = startpos; + return numdelims; } // Parse strong/emph or a fallback. // Assumes the subject has '_' or '*' at the current position. static inl* handle_strong_emph(subject* subj, char c) { - bool can_open, can_close; - inl * result = NULL; - inl ** last = malloc(sizeof(inl *)); - inl * new; - inl * il; - inl * first_head = NULL; - inl * first_close = NULL; - int first_close_delims = 0; - int numdelims; - - *last = NULL; - - numdelims = scan_delims(subj, c, &can_open, &can_close); - subj->pos += numdelims; - - new = make_str(bmidstr(subj->buffer, subj->pos - numdelims, numdelims)); - *last = new; - first_head = new; - result = new; - - if (!can_open || numdelims == 0) { - goto done; - } - - switch (numdelims) { - case 1: - while (true) { - numdelims = scan_delims(subj, c, &can_open, &can_close); - if (numdelims >= 1 && can_close) { - subj->pos += 1; - first_head->tag = emph; - bdestroy(first_head->content.literal); - first_head->content.inlines = first_head->next; - first_head->next = NULL; - goto done; - } else { - if (!parse_inline(subj, last)) { - goto done; - } - } - } - break; - case 2: - while (true) { - numdelims = scan_delims(subj, c, &can_open, &can_close); - if (numdelims >= 2 && can_close) { - subj->pos += 2; - first_head->tag = strong; - bdestroy(first_head->content.literal); - first_head->content.inlines = first_head->next; - first_head->next = NULL; - goto done; - } else { - if (!parse_inline(subj, last)) { - goto done; - } - } - } - break; - case 3: - while (true) { - numdelims = scan_delims(subj, c, &can_open, &can_close); - if (can_close && numdelims >= 1 && numdelims <= 3 && - numdelims != first_close_delims) { - new = make_str(bmidstr(subj->buffer, subj->pos, numdelims)); - append_inlines(*last, new); - *last = new; - - if (first_close_delims == 1 && numdelims > 2) { - numdelims = 2; - } else if (first_close_delims == 2) { - numdelims = 1; - } else if (numdelims == 3) { - // If we opened with ***, we interpret it as ** followed by * - // giving us - numdelims = 1; - } - - subj->pos += numdelims; - if (first_close) { - first_head->tag = first_close_delims == 1 ? strong : emph; - bdestroy(first_head->content.literal); - first_head->content.inlines = - make_inlines(first_close_delims == 1 ? emph : strong, - first_head->next); - - il = first_head->next; - while (il->next && il->next != first_close) { - il = il->next; - } - il->next = NULL; - - first_head->content.inlines->next = first_close->next; - - il = first_head->content.inlines; - while (il->next && il->next != *last) { - il = il->next; - } - il->next = NULL; - free_inlines(*last); - - first_close->next = NULL; - free_inlines(first_close); - first_head->next = NULL; - goto done; - } else { - first_close = *last; - first_close_delims = numdelims; - } - } else { - if (!parse_inline(subj, last)) { - goto done; - } - } - } - break; - default: - goto done; - } - - done: - free(last); - return result; + bool can_open, can_close; + inl * result = NULL; + inl ** last = malloc(sizeof(inl *)); + inl * new; + inl * il; + inl * first_head = NULL; + inl * first_close = NULL; + int first_close_delims = 0; + int numdelims; + + *last = NULL; + + numdelims = scan_delims(subj, c, &can_open, &can_close); + subj->pos += numdelims; + + new = make_str(chunk_buf(subj->buffer, subj->pos - numdelims, numdelims)); + *last = new; + first_head = new; + result = new; + + if (!can_open || numdelims == 0) { + goto done; + } + + switch (numdelims) { + case 1: + while (true) { + numdelims = scan_delims(subj, c, &can_open, &can_close); + if (numdelims >= 1 && can_close) { + subj->pos += 1; + first_head->tag = emph; + chunk_free(&first_head->content.literal); + first_head->content.inlines = first_head->next; + first_head->next = NULL; + goto done; + } else { + if (!parse_inline(subj, last)) { + goto done; + } + } + } + break; + case 2: + while (true) { + numdelims = scan_delims(subj, c, &can_open, &can_close); + if (numdelims >= 2 && can_close) { + subj->pos += 2; + first_head->tag = strong; + chunk_free(&first_head->content.literal); + first_head->content.inlines = first_head->next; + first_head->next = NULL; + goto done; + } else { + if (!parse_inline(subj, last)) { + goto done; + } + } + } + break; + case 3: + while (true) { + numdelims = scan_delims(subj, c, &can_open, &can_close); + if (can_close && numdelims >= 1 && numdelims <= 3 && + numdelims != first_close_delims) { + new = make_str(chunk_buf(subj->buffer, subj->pos, numdelims)); + append_inlines(*last, new); + *last = new; + if (first_close_delims == 1 && numdelims > 2) { + numdelims = 2; + } else if (first_close_delims == 2) { + numdelims = 1; + } else if (numdelims == 3) { + // If we opened with ***, we interpret it as ** followed by * + // giving us + numdelims = 1; + } + subj->pos += numdelims; + if (first_close) { + first_head->tag = first_close_delims == 1 ? strong : emph; + chunk_free(&first_head->content.literal); + first_head->content.inlines = + make_inlines(first_close_delims == 1 ? emph : strong, + first_head->next); + + il = first_head->next; + while (il->next && il->next != first_close) { + il = il->next; + } + il->next = NULL; + + first_head->content.inlines->next = first_close->next; + + il = first_head->content.inlines; + while (il->next && il->next != *last) { + il = il->next; + } + il->next = NULL; + free_inlines(*last); + + first_close->next = NULL; + free_inlines(first_close); + first_head->next = NULL; + goto done; + } else { + first_close = *last; + first_close_delims = numdelims; + } + } else { + if (!parse_inline(subj, last)) { + goto done; + } + } + } + break; + default: + goto done; + } + +done: + free(last); + return result; } // Parse backslash-escape or just a backslash, returning an inline. static inl* handle_backslash(subject *subj) { - advance(subj); - unsigned char nextchar = peek_char(subj); - if (ispunct(nextchar)) { // only ascii symbols and newline can be escaped - advance(subj); - return make_str(bformat("%c", nextchar)); - } else if (nextchar == '\n') { - advance(subj); - return make_linebreak(); - } else { - return make_str(bfromcstr("\\")); - } + advance(subj); + unsigned char nextchar = peek_char(subj); + if (ispunct(nextchar)) { // only ascii symbols and newline can be escaped + advance(subj); + return make_str(chunk_buf(subj->buffer, subj->pos - 1, 1)); + } else if (nextchar == '\n') { + advance(subj); + return make_linebreak(); + } else { + return make_str(chunk_literal("\\")); + } } // Parse an entity or a regular "&" string. // Assumes the subject has an '&' character at the current position. static inl* handle_entity(subject* subj) { - int match; - inl * result; - match = scan_entity(subj->buffer, subj->pos); - if (match) { - result = make_entity(bmidstr(subj->buffer, subj->pos, match)); - subj->pos += match; - } else { - advance(subj); - result = make_str(bfromcstr("&")); - } - return result; + int match; + inl *result; + match = scan_entity(subj->buffer, subj->pos); + if (match) { + result = make_entity(chunk_buf(subj->buffer, subj->pos, match)); + subj->pos += match; + } else { + advance(subj); + result = make_str(chunk_literal("&")); + } + return result; } // Like make_str, but parses entities. // Returns an inline sequence consisting of str and entity elements. -static inl * make_str_with_entities(bstring s) -{ - inl * result = NULL; - inl * new; - int searchpos; - char c; - subject * subj = make_subject(s, NULL); - - while ((c = peek_char(subj))) { - switch (c) { - case '&': - new = handle_entity(subj); - break; - default: - searchpos = bstrchrp(subj->buffer, '&', subj->pos); - if (searchpos == BSTR_ERR) { - searchpos = blength(subj->buffer); - } - new = make_str(bmidstr(subj->buffer, subj->pos, searchpos - subj->pos)); - subj->pos = searchpos; - } - result = append_inlines(result, new); - } - free(subj); - return result; +static inl *make_str_with_entities(chunk *content) +{ + inl * result = NULL; + inl * new; + int searchpos; + char c; + subject subj; + gh_buf content_buf = GH_BUF_INIT; + + gh_buf_set(&content_buf, content->data, content->len); + init_subject(&subj, &content_buf, 0, NULL); + + while ((c = peek_char(&subj))) { + switch (c) { + case '&': + new = handle_entity(&subj); + break; + default: + searchpos = gh_buf_strchr(subj.buffer, '&', subj.pos); + if (searchpos < 0) { + searchpos = gh_buf_len(subj.buffer); + } + + new = make_str(chunk_buf(subj.buffer, subj.pos, searchpos - subj.pos)); + subj.pos = searchpos; + } + result = append_inlines(result, new); + } + + gh_buf_free(&content_buf); + return result; } // Destructively unescape a string: remove backslashes before punctuation chars. -extern int unescape(bstring url) +extern void unescape_buffer(gh_buf *buf) { - // remove backslashes before punctuation chars: - int searchpos = 0; - while ((searchpos = bstrchrp(url, '\\', searchpos)) != BSTR_ERR) { - if (ispunct(bchar(url, searchpos + 1))) { - bdelete(url, searchpos, 1); - } else { - searchpos++; - } - } - return 0; + int r, w; + + for (r = 0, w = 0; r < buf->size; ++r) { + if (buf->ptr[r] == '\\' && ispunct(buf->ptr[r + 1])) + continue; + + buf->ptr[w++] = buf->ptr[r]; + } + + gh_buf_truncate(buf, w); } // Clean a URL: remove surrounding whitespace and surrounding <>, // and remove \ that escape punctuation. -static int clean_url(bstring url) +static unsigned char *clean_url(chunk *url) { - // remove surrounding <> if any: - int urllength = blength(url); - btrimws(url); - if (bchar(url, 0) == '<' && bchar(url, urllength - 1) == '>') { - bdelete(url, 0, 1); - bdelete(url, urllength - 2, 1); - } - unescape(url); - return 0; + gh_buf buf = GH_BUF_INIT; + + chunk_trim(url); + + if (url->data[0] == '<' && url->data[url->len - 1] == '>') { + gh_buf_set(&buf, url->data + 1, url->len - 2); + } else { + gh_buf_set(&buf, url->data, url->len); + } + + unescape_buffer(&buf); + return gh_buf_detach(&buf); } // Clean a title: remove surrounding quotes and remove \ that escape punctuation. -static int clean_title(bstring title) +static unsigned char *clean_title(chunk *title) { - // remove surrounding quotes if any: - int titlelength = blength(title); - if ((bchar(title, 0) == '\'' && bchar(title, titlelength - 1) == '\'') || - (bchar(title, 0) == '(' && bchar(title, titlelength - 1) == ')') || - (bchar(title, 0) == '"' && bchar(title, titlelength - 1) == '"')) { - bdelete(title, 0, 1); - bdelete(title, titlelength - 2, 1); - } - unescape(title); - return 0; + gh_buf buf = GH_BUF_INIT; + unsigned char first = title->data[0]; + unsigned char last = title->data[title->len - 1]; + + // remove surrounding quotes if any: + if ((first == '\'' && last == '\'') || + (first == '(' && last == ')') || + (first == '"' && last == '"')) { + gh_buf_set(&buf, title->data + 1, title->len - 2); + } else { + gh_buf_set(&buf, title->data, title->len); + } + + unescape_buffer(&buf); + return gh_buf_detach(&buf); } // Parse an autolink or HTML tag. // Assumes the subject has a '<' character at the current position. static inl* handle_pointy_brace(subject* subj) { - int matchlen = 0; - bstring contents; - inl* result; - - advance(subj); // advance past first < - // first try to match a URL autolink - matchlen = scan_autolink_uri(subj->buffer, subj->pos); - if (matchlen > 0) { - contents = bmidstr(subj->buffer, subj->pos, matchlen - 1); - subj->pos += matchlen; - result = make_link(make_str_with_entities(contents), - bstrcpy(contents), bfromcstr("")); - bdestroy(contents); - return result; - } - // next try to match an email autolink - matchlen = scan_autolink_email(subj->buffer, subj->pos); - if (matchlen > 0) { - contents = bmidstr(subj->buffer, subj->pos, matchlen - 1); - subj->pos += matchlen; - result = make_link(make_str_with_entities(contents), - bformat("mailto:%s", contents->data), - bfromcstr("")); - bdestroy(contents); - return result; - } - // finally, try to match an html tag - matchlen = scan_html_tag(subj->buffer, subj->pos); - if (matchlen > 0) { - contents = bmidstr(subj->buffer, subj->pos, matchlen); - binsertch(contents, 0, 1, '<'); - subj->pos += matchlen; - return make_raw_html(contents); - } else {// if nothing matches, just return the opening <: - return make_str(bfromcstr("<")); - } + int matchlen = 0; + chunk contents; + + advance(subj); // advance past first < + + // first try to match a URL autolink + matchlen = scan_autolink_uri(subj->buffer, subj->pos); + if (matchlen > 0) { + contents = chunk_buf(subj->buffer, subj->pos, matchlen - 1); + subj->pos += matchlen; + + return make_link( + make_str_with_entities(&contents), + contents, + chunk_literal("") + ); + } + + // next try to match an email autolink + matchlen = scan_autolink_email(subj->buffer, subj->pos); + if (matchlen > 0) { + gh_buf mail_url = GH_BUF_INIT; + + contents = chunk_buf(subj->buffer, subj->pos, matchlen - 1); + subj->pos += matchlen; + + gh_buf_puts(&mail_url, "mailto:"); + gh_buf_put(&mail_url, contents.data, contents.len); + + return make_link( + make_str_with_entities(&contents), + chunk_buf_detach(&mail_url), + chunk_literal("") + ); + } + + // finally, try to match an html tag + matchlen = scan_html_tag(subj->buffer, subj->pos); + if (matchlen > 0) { + contents = chunk_buf(subj->buffer, subj->pos - 1, matchlen + 1); + subj->pos += matchlen; + return make_raw_html(contents); + } + + // if nothing matches, just return the opening <: + return make_str(chunk_literal("<")); } // Parse a link label. Returns 1 if successful. @@ -641,366 +731,381 @@ static inl* handle_pointy_brace(subject* subj) // markers. So, 2 below contains a link while 1 does not: // 1. [a link `with a ](/url)` character // 2. [a link *with emphasized ](/url) text* -static int link_label(subject* subj, bstring* raw_label) -{ - int nestlevel = 0; - inl* tmp = NULL; - bstring raw; - int startpos = subj->pos; - if (subj->label_nestlevel) { - // if we've already checked to the end of the subject - // for a label, even with a different starting [, we - // know we won't find one here and we can just return. - // Note: nestlevel 1 would be: [foo [bar] - // nestlevel 2 would be: [foo [bar [baz] - subj->label_nestlevel--; - return 0; - } - advance(subj); // advance past [ - char c; - while ((c = peek_char(subj)) && (c != ']' || nestlevel > 0)) { - switch (c) { - case '`': - tmp = handle_backticks(subj); - free_inlines(tmp); - break; - case '<': - tmp = handle_pointy_brace(subj); - free_inlines(tmp); - break; - case '[': // nested [] - nestlevel++; - advance(subj); - break; - case ']': // nested [] - nestlevel--; - advance(subj); - break; - case '\\': - advance(subj); - if (ispunct(peek_char(subj))) { - advance(subj); - } - break; - default: - advance(subj); - } - } - if (c == ']') { - if (raw_label != NULL) { - raw = bmidstr(subj->buffer, startpos + 1, subj->pos - (startpos + 1)); - *raw_label = raw; - } - subj->label_nestlevel = 0; - advance(subj); // advance past ] - return 1; - } else { - if (c == 0) { - subj->label_nestlevel = nestlevel; - } - subj->pos = startpos; // rewind - return 0; - } +static int link_label(subject* subj, chunk *raw_label) +{ + int nestlevel = 0; + inl* tmp = NULL; + int startpos = subj->pos; + + if (subj->label_nestlevel) { + // if we've already checked to the end of the subject + // for a label, even with a different starting [, we + // know we won't find one here and we can just return. + // Note: nestlevel 1 would be: [foo [bar] + // nestlevel 2 would be: [foo [bar [baz] + subj->label_nestlevel--; + return 0; + } + + advance(subj); // advance past [ + char c; + while ((c = peek_char(subj)) && (c != ']' || nestlevel > 0)) { + switch (c) { + case '`': + tmp = handle_backticks(subj); + free_inlines(tmp); + break; + case '<': + tmp = handle_pointy_brace(subj); + free_inlines(tmp); + break; + case '[': // nested [] + nestlevel++; + advance(subj); + break; + case ']': // nested [] + nestlevel--; + advance(subj); + break; + case '\\': + advance(subj); + if (ispunct(peek_char(subj))) { + advance(subj); + } + break; + default: + advance(subj); + } + } + if (c == ']') { + *raw_label = chunk_buf( + subj->buffer, + startpos + 1, + subj->pos - (startpos + 1) + ); + + subj->label_nestlevel = 0; + advance(subj); // advance past ] + return 1; + } else { + if (c == 0) { + subj->label_nestlevel = nestlevel; + } + subj->pos = startpos; // rewind + return 0; + } } // Parse a link or the link portion of an image, or return a fallback. static inl* handle_left_bracket(subject* subj) { - inl* lab = NULL; - inl* result = NULL; - reference* ref; - int n; - int sps; - int found_label; - int endlabel, starturl, endurl, starttitle, endtitle, endall; - bstring url, title, rawlabel, reflabel; - bstring rawlabel2 = NULL; - found_label = link_label(subj, &rawlabel); - endlabel = subj->pos; - if (found_label) { - if (peek_char(subj) == '(' && - ((sps = scan_spacechars(subj->buffer, subj->pos + 1)) > -1) && - ((n = scan_link_url(subj->buffer, subj->pos + 1 + sps)) > -1)) { - // try to parse an explicit link: - starturl = subj->pos + 1 + sps; // after ( - endurl = starturl + n; - starttitle = endurl + scan_spacechars(subj->buffer, endurl); - // ensure there are spaces btw url and title - endtitle = (starttitle == endurl) ? starttitle : - starttitle + scan_link_title(subj->buffer, starttitle); - endall = endtitle + scan_spacechars(subj->buffer, endtitle); - if (bchar(subj->buffer, endall) == ')') { - subj->pos = endall + 1; - url = bmidstr(subj->buffer, starturl, endurl - starturl); - clean_url(url); - title = bmidstr(subj->buffer, starttitle, endtitle - starttitle); - clean_title(title); - lab = parse_inlines(rawlabel, NULL); - bdestroy(rawlabel); - return make_link(lab, url, title); - } else { - // if we get here, we matched a label but didn't get further: - subj->pos = endlabel; - lab = parse_inlines(rawlabel, subj->reference_map); - bdestroy(rawlabel); - result = append_inlines(make_str(bfromcstr("[")), - append_inlines(lab, - make_str(bfromcstr("]")))); - return result; - } - } else { - // Check for reference link. - // First, see if there's another label: - subj->pos = subj->pos + scan_spacechars(subj->buffer, endlabel); - reflabel = rawlabel; - // if followed by a nonempty link label, we change reflabel to it: - if (peek_char(subj) == '[' && - link_label(subj, &rawlabel2)) { - if (blength(rawlabel2) > 0) { - reflabel = rawlabel2; - } - } else { - subj->pos = endlabel; - } - // lookup rawlabel in subject->reference_map: - ref = lookup_reference(subj->reference_map, reflabel); - if (ref != NULL) { // found - lab = parse_inlines(rawlabel, NULL); - result = make_link(lab, bstrcpy(ref->url), bstrcpy(ref->title)); - } else { - subj->pos = endlabel; - lab = parse_inlines(rawlabel, subj->reference_map); - result = append_inlines(make_str(bfromcstr("[")), - append_inlines(lab, make_str(bfromcstr("]")))); - } - bdestroy(rawlabel); - bdestroy(rawlabel2); - return result; - } - } - // If we fall through to here, it means we didn't match a link: - advance(subj); // advance past [ - return make_str(bfromcstr("[")); + inl *lab = NULL; + inl *result = NULL; + reference *ref; + int n; + int sps; + int found_label; + int endlabel, starturl, endurl, starttitle, endtitle, endall; + + chunk rawlabel; + chunk url, title; + + found_label = link_label(subj, &rawlabel); + endlabel = subj->pos; + + if (found_label) { + if (peek_char(subj) == '(' && + ((sps = scan_spacechars(subj->buffer, subj->pos + 1)) > -1) && + ((n = scan_link_url(subj->buffer, subj->pos + 1 + sps)) > -1)) { + + // try to parse an explicit link: + starturl = subj->pos + 1 + sps; // after ( + endurl = starturl + n; + starttitle = endurl + scan_spacechars(subj->buffer, endurl); + + // ensure there are spaces btw url and title + endtitle = (starttitle == endurl) ? starttitle : + starttitle + scan_link_title(subj->buffer, starttitle); + + endall = endtitle + scan_spacechars(subj->buffer, endtitle); + + if (gh_buf_at(subj->buffer, endall) == ')') { + subj->pos = endall + 1; + + url = chunk_buf(subj->buffer, starturl, endurl - starturl); + title = chunk_buf(subj->buffer, starttitle, endtitle - starttitle); + lab = parse_chunk_inlines(&rawlabel, NULL); + + return make_link(lab, url, title); + } else { + // if we get here, we matched a label but didn't get further: + subj->pos = endlabel; + lab = parse_chunk_inlines(&rawlabel, subj->reference_map); + result = append_inlines(make_str(chunk_literal("[")), + append_inlines(lab, + make_str(chunk_literal("]")))); + return result; + } + } else { + chunk rawlabel_tmp; + chunk reflabel; + + // Check for reference link. + // First, see if there's another label: + subj->pos = subj->pos + scan_spacechars(subj->buffer, endlabel); + reflabel = rawlabel; + + // if followed by a nonempty link label, we change reflabel to it: + if (peek_char(subj) == '[' && link_label(subj, &rawlabel_tmp)) { + if (rawlabel_tmp.len > 0) + reflabel = rawlabel_tmp; + } else { + subj->pos = endlabel; + } + + // lookup rawlabel in subject->reference_map: + ref = lookup_reference(subj->reference_map, &reflabel); + if (ref != NULL) { // found + lab = parse_chunk_inlines(&rawlabel, NULL); + result = make_link(lab, chunk_literal(ref->url), chunk_literal(ref->title)); + } else { + subj->pos = endlabel; + lab = parse_chunk_inlines(&rawlabel, subj->reference_map); + result = append_inlines(make_str(chunk_literal("[")), + append_inlines(lab, make_str(chunk_literal("]")))); + } + return result; + } + } + // If we fall through to here, it means we didn't match a link: + advance(subj); // advance past [ + return make_str(chunk_literal("[")); } // Parse a hard or soft linebreak, returning an inline. // Assumes the subject has a newline at the current position. static inl* handle_newline(subject *subj) { - int nlpos = subj->pos; - // skip over newline - advance(subj); - // skip spaces at beginning of line - while (peek_char(subj) == ' ') { - advance(subj); - } - if (nlpos > 1 && - bchar(subj->buffer, nlpos - 1) == ' ' && - bchar(subj->buffer, nlpos - 2) == ' ') { - return make_linebreak(); - } else { - return make_softbreak(); - } + int nlpos = subj->pos; + // skip over newline + advance(subj); + // skip spaces at beginning of line + while (peek_char(subj) == ' ') { + advance(subj); + } + if (nlpos > 1 && + gh_buf_at(subj->buffer, nlpos - 1) == ' ' && + gh_buf_at(subj->buffer, nlpos - 2) == ' ') { + return make_linebreak(); + } else { + return make_softbreak(); + } } inline static int not_eof(subject* subj) { - return !is_eof(subj); + return !is_eof(subj); } // Parse inlines while a predicate is satisfied. Return inlines. extern inl* parse_inlines_while(subject* subj, int (*f)(subject*)) { - inl* result = NULL; - inl** last = &result; - while ((*f)(subj) && parse_inline(subj, last)) { - } - return result; + inl* result = NULL; + inl** last = &result; + while ((*f)(subj) && parse_inline(subj, last)) { + } + return result; +} + +inl *parse_chunk_inlines(chunk *chunk, reference** refmap) +{ + inl *result; + subject subj; + gh_buf full_chunk = GH_BUF_INIT; + + gh_buf_set(&full_chunk, chunk->data, chunk->len); + init_subject(&subj, &full_chunk, 0, refmap); + result = parse_inlines_while(&subj, not_eof); + + gh_buf_free(&full_chunk); + return result; +} + +static int find_special_char(subject *subj) +{ + int n = subj->pos + 1; + int size = (int)gh_buf_len(subj->buffer); + + while (n < size) { + if (strchr("\n\\`&_*[]buffer, n))) + return n; + } + + return -1; } // Parse an inline, advancing subject, and add it to last element. // Adjust tail to point to new last element of list. // Return 0 if no inline can be parsed, 1 otherwise. -extern int parse_inline(subject* subj, inl ** last) -{ - inl* new = NULL; - bstring contents; - bstring special_chars; - unsigned char c; - int endpos; - c = peek_char(subj); - if (c == 0) { - return 0; - } - switch(c){ - case '\n': - new = handle_newline(subj); - break; - case '`': - new = handle_backticks(subj); - break; - case '\\': - new = handle_backslash(subj); - break; - case '&': - new = handle_entity(subj); - break; - case '<': - new = handle_pointy_brace(subj); - break; - case '_': - if (subj->pos > 0 && (isalnum(bchar(subj->buffer, subj->pos - 1)) || - bchar(subj->buffer, subj->pos - 1) == '_')) { - new = make_str(take_one(subj)); - } else { - new = handle_strong_emph(subj, '_'); - } - break; - case '*': - new = handle_strong_emph(subj, '*'); - break; - case '[': - new = handle_left_bracket(subj); - break; - case '!': - advance(subj); - if (peek_char(subj) == '[') { - new = handle_left_bracket(subj); - if (new != NULL && new->tag == link) { - new->tag = image; - } else { - new = append_inlines(make_str(bfromcstr("!")), new); - } - } else { - new = make_str(bfromcstr("!")); - } - break; - default: - // we read until we hit a special character - special_chars = bfromcstr("\n\\`&_*[]buffer, subj->pos, special_chars); - bdestroy(special_chars); - if (endpos == subj->pos) { - // current char is special: read a 1-character str - contents = take_one(subj); - } else if (endpos == BSTR_ERR) { - // special char not found, take whole rest of buffer: - endpos = subj->buffer->slen; - contents = bmidstr(subj->buffer, subj->pos, endpos - subj->pos); - subj->pos = endpos; - } else { - // take buffer from subj->pos to endpos to str. - contents = bmidstr(subj->buffer, subj->pos, endpos - subj->pos); - subj->pos = endpos; - // if we're at a newline, strip trailing spaces. - if (peek_char(subj) == '\n') { - brtrimws(contents); - } - } - new = make_str(contents); - } - if (*last == NULL) { - *last = new; - } else { - append_inlines(*last, new); - } - return 1; -} - -extern inl* parse_inlines(bstring input, reference** refmap) -{ - subject * subj = make_subject(input, refmap); - inl * result = parse_inlines_while(subj, not_eof); - free(subj); - return result; +static int parse_inline(subject* subj, inl ** last) +{ + inl* new = NULL; + chunk contents; + unsigned char c; + int endpos; + c = peek_char(subj); + if (c == 0) { + return 0; + } + switch(c){ + case '\n': + new = handle_newline(subj); + break; + case '`': + new = handle_backticks(subj); + break; + case '\\': + new = handle_backslash(subj); + break; + case '&': + new = handle_entity(subj); + break; + case '<': + new = handle_pointy_brace(subj); + break; + case '_': + if (subj->pos > 0 && (isalnum(gh_buf_at(subj->buffer, subj->pos - 1)) || + gh_buf_at(subj->buffer, subj->pos - 1) == '_')) { + goto text_literal; + } + + new = handle_strong_emph(subj, '_'); + break; + case '*': + new = handle_strong_emph(subj, '*'); + break; + case '[': + new = handle_left_bracket(subj); + break; + case '!': + advance(subj); + if (peek_char(subj) == '[') { + new = handle_left_bracket(subj); + if (new != NULL && new->tag == link) { + new->tag = image; + } else { + new = append_inlines(make_str(chunk_literal("!")), new); + } + } else { + new = make_str(chunk_literal("!")); + } + break; + default: + text_literal: + endpos = find_special_char(subj); + if (endpos < 0) { + endpos = gh_buf_len(subj->buffer); + } + + contents = chunk_buf(subj->buffer, subj->pos, endpos - subj->pos); + subj->pos = endpos; + + // if we're at a newline, strip trailing spaces. + if (peek_char(subj) == '\n') { + chunk_trim(&contents); + } + + new = make_str(contents); + } + if (*last == NULL) { + *last = new; + } else { + append_inlines(*last, new); + } + return 1; +} + +extern inl* parse_inlines(gh_buf *input, int input_pos, reference** refmap) +{ + subject subj; + init_subject(&subj, input, input_pos, refmap); + return parse_inlines_while(&subj, not_eof); } // Parse zero or more space characters, including at most one newline. void spnl(subject* subj) { - bool seen_newline = false; - while (peek_char(subj) == ' ' || - (!seen_newline && - (seen_newline = peek_char(subj) == '\n'))) { - advance(subj); - } + bool seen_newline = false; + while (peek_char(subj) == ' ' || + (!seen_newline && + (seen_newline = peek_char(subj) == '\n'))) { + advance(subj); + } } // Parse reference. Assumes string begins with '[' character. // Modify refmap if a reference is encountered. // Return 0 if no reference found, otherwise position of subject // after reference is parsed. -extern int parse_reference(bstring input, reference** refmap) -{ - subject * subj = make_subject(input, NULL); - bstring lab = NULL; - bstring url = NULL; - bstring title = NULL; - int matchlen = 0; - int beforetitle; - reference * new = NULL; - int newpos; - - // parse label: - if (!link_label(subj, &lab)) { - free(subj); - return 0; - } - // colon: - if (peek_char(subj) == ':') { - advance(subj); - } else { - free(subj); - bdestroy(lab); - return 0; - } - // parse link url: - spnl(subj); - matchlen = scan_link_url(subj->buffer, subj->pos); - if (matchlen) { - url = bmidstr(subj->buffer, subj->pos, matchlen); - clean_url(url); - subj->pos += matchlen; - } else { - free(subj); - bdestroy(lab); - bdestroy(url); - return 0; - } - // parse optional link_title - beforetitle = subj->pos; - spnl(subj); - matchlen = scan_link_title(subj->buffer, subj->pos); - if (matchlen) { - title = bmidstr(subj->buffer, subj->pos, matchlen); - clean_title(title); - subj->pos += matchlen; - } else { - subj->pos = beforetitle; - title = bfromcstr(""); - } - // parse final spaces and newline: - while (peek_char(subj) == ' ') { - advance(subj); - } - if (peek_char(subj) == '\n') { - advance(subj); - } else if (peek_char(subj) != 0) { - free(subj); - bdestroy(lab); - bdestroy(url); - bdestroy(title); - return 0; - } - // insert reference into refmap - new = make_reference(lab, url, title); - add_reference(refmap, new); - - newpos = subj->pos; - free(subj); - bdestroy(lab); - bdestroy(url); - bdestroy(title); - return newpos; +extern int parse_reference(gh_buf *input, int input_pos, reference** refmap) +{ + subject subj; + + chunk lab; + chunk url; + chunk title; + + int matchlen = 0; + int beforetitle; + reference * new = NULL; + + init_subject(&subj, input, input_pos, NULL); + + // parse label: + if (!link_label(&subj, &lab)) + return 0; + + // colon: + if (peek_char(&subj) == ':') { + advance(&subj); + } else { + return 0; + } + + // parse link url: + spnl(&subj); + matchlen = scan_link_url(subj.buffer, subj.pos); + if (matchlen) { + url = chunk_buf(subj.buffer, subj.pos, matchlen); + subj.pos += matchlen; + } else { + return 0; + } + + // parse optional link_title + beforetitle = subj.pos; + spnl(&subj); + matchlen = scan_link_title(subj.buffer, subj.pos); + if (matchlen) { + title = chunk_buf(subj.buffer, subj.pos, matchlen); + subj.pos += matchlen; + } else { + subj.pos = beforetitle; + title = chunk_literal(""); + } + // parse final spaces and newline: + while (peek_char(&subj) == ' ') { + advance(&subj); + } + if (peek_char(&subj) == '\n') { + advance(&subj); + } else if (peek_char(&subj) != 0) { + return 0; + } + // insert reference into refmap + new = make_reference(&lab, &url, &title); + add_reference(refmap, new); + + return subj.pos; } diff --git a/src/main.c b/src/main.c index f0ecb82..9e0a3c8 100644 --- a/src/main.c +++ b/src/main.c @@ -88,7 +88,7 @@ int main(int argc, char *argv[]) { print_blocks(cur, 0); } else { check(blocks_to_html(cur, &html, false) == 0, "could not format as HTML"); - printf("%s", html->data); + // printf("%s", html->data); bdestroy(html); } free_blocks(cur); diff --git a/src/scanners.h b/src/scanners.h index 71e0520..b6e586b 100644 --- a/src/scanners.h +++ b/src/scanners.h @@ -1,15 +1,15 @@ -#include "bstrlib.h" +#include "buffer.h" -int scan_autolink_uri(bstring s, int pos); -int scan_autolink_email(bstring s, int pos); -int scan_html_tag(bstring s, int pos); -int scan_html_block_tag(bstring s, int pos); -int scan_link_url(bstring s, int pos); -int scan_link_title(bstring s, int pos); -int scan_spacechars(bstring s, int pos); -int scan_atx_header_start(bstring s, int pos); -int scan_setext_header_line(bstring s, int pos); -int scan_hrule(bstring s, int pos); -int scan_open_code_fence(bstring s, int pos); -int scan_close_code_fence(bstring s, int pos, int len); -int scan_entity(bstring s, int pos); +int scan_autolink_uri(const gh_buf *s, int pos); +int scan_autolink_email(const gh_buf *s, int pos); +int scan_html_tag(const gh_buf *s, int pos); +int scan_html_block_tag(const gh_buf *s, int pos); +int scan_link_url(const gh_buf *s, int pos); +int scan_link_title(const gh_buf *s, int pos); +int scan_spacechars(const gh_buf *s, int pos); +int scan_atx_header_start(const gh_buf *s, int pos); +int scan_setext_header_line(const gh_buf *s, int pos); +int scan_hrule(const gh_buf *s, int pos); +int scan_open_code_fence(const gh_buf *s, int pos); +int scan_close_code_fence(const gh_buf *s, int pos, int len); +int scan_entity(const gh_buf *s, int pos); diff --git a/src/scanners.re b/src/scanners.re index 305d1ea..7323ef9 100644 --- a/src/scanners.re +++ b/src/scanners.re @@ -1,4 +1,4 @@ -#include "bstrlib.h" +#include "buffer.h" /*!re2c re2c:define:YYCTYPE = "unsigned char"; @@ -55,10 +55,10 @@ */ // Try to match URI autolink after first <, returning number of chars matched. -extern int scan_autolink_uri(bstring s, int pos) +extern int scan_autolink_uri(const gh_buf *s, int pos) { unsigned char * marker = NULL; - unsigned char * p = &(s->data[pos]); + unsigned char * p = &(s->ptr[pos]); unsigned char * start = p; /*!re2c scheme [:]([^\x00-\x20<>\\]|escaped_char)*[>] { return (p - start); } @@ -67,10 +67,10 @@ extern int scan_autolink_uri(bstring s, int pos) } // Try to match email autolink after first <, returning num of chars matched. -extern int scan_autolink_email(bstring s, int pos) +extern int scan_autolink_email(const gh_buf *s, int pos) { unsigned char * marker = NULL; - unsigned char * p = &(s->data[pos]); + unsigned char * p = &(s->ptr[pos]); unsigned char * start = p; /*!re2c [a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+ @@ -83,10 +83,10 @@ extern int scan_autolink_email(bstring s, int pos) } // Try to match an HTML tag after first <, returning num of chars matched. -extern int scan_html_tag(bstring s, int pos) +extern int scan_html_tag(const gh_buf *s, int pos) { unsigned char * marker = NULL; - unsigned char * p = &(s->data[pos]); + unsigned char * p = &(s->ptr[pos]); unsigned char * start = p; /*!re2c htmltag { return (p - start); } @@ -96,10 +96,10 @@ extern int scan_html_tag(bstring s, int pos) // Try to match an HTML block tag including first <, // returning num of chars matched. -extern int scan_html_block_tag(bstring s, int pos) +extern int scan_html_block_tag(const gh_buf *s, int pos) { unsigned char * marker = NULL; - unsigned char * p = &(s->data[pos]); + unsigned char * p = &(s->ptr[pos]); unsigned char * start = p; /*!re2c [<] [/] blocktagname (spacechar | [>]) { return (p - start); } @@ -113,10 +113,10 @@ extern int scan_html_block_tag(bstring s, int pos) // This may optionally be contained in <..>; otherwise // whitespace and unbalanced right parentheses aren't allowed. // Newlines aren't ever allowed. -extern int scan_link_url(bstring s, int pos) +extern int scan_link_url(const gh_buf *s, int pos) { unsigned char * marker = NULL; - unsigned char * p = &(s->data[pos]); + unsigned char * p = &(s->ptr[pos]); unsigned char * start = p; /*!re2c [ \n]* [<] ([^<>\n\\\x00] | escaped_char | [\\])* [>] { return (p - start); } @@ -128,10 +128,10 @@ extern int scan_link_url(bstring s, int pos) // Try to match a link title (in single quotes, in double quotes, or // in parentheses), returning number of chars matched. Allow one // level of internal nesting (quotes within quotes). -extern int scan_link_title(bstring s, int pos) +extern int scan_link_title(const gh_buf *s, int pos) { unsigned char * marker = NULL; - unsigned char * p = &(s->data[pos]); + unsigned char * p = &(s->ptr[pos]); unsigned char * start = p; /*!re2c ["] (escaped_char|[^"\x00])* ["] { return (p - start); } @@ -142,9 +142,9 @@ extern int scan_link_title(bstring s, int pos) } // Match space characters, including newlines. -extern int scan_spacechars(bstring s, int pos) +extern int scan_spacechars(const gh_buf *s, int pos) { - unsigned char * p = &(s->data[pos]); + unsigned char * p = &(s->ptr[pos]); unsigned char * start = p; /*!re2c [ \t\n]* { return (p - start); } @@ -153,10 +153,10 @@ extern int scan_spacechars(bstring s, int pos) } // Match ATX header start. -extern int scan_atx_header_start(bstring s, int pos) +extern int scan_atx_header_start(const gh_buf *s, int pos) { unsigned char * marker = NULL; - unsigned char * p = &(s->data[pos]); + unsigned char * p = &(s->ptr[pos]); unsigned char * start = p; /*!re2c [#]{1,6} ([ ]+|[\n]) { return (p - start); } @@ -166,10 +166,10 @@ extern int scan_atx_header_start(bstring s, int pos) // Match sexext header line. Return 1 for level-1 header, // 2 for level-2, 0 for no match. -extern int scan_setext_header_line(bstring s, int pos) +extern int scan_setext_header_line(const gh_buf *s, int pos) { unsigned char * marker = NULL; - unsigned char * p = &(s->data[pos]); + unsigned char * p = &(s->ptr[pos]); /*!re2c [=]+ [ ]* [\n] { return 1; } [-]+ [ ]* [\n] { return 2; } @@ -180,10 +180,10 @@ extern int scan_setext_header_line(bstring s, int pos) // Scan a horizontal rule line: "...three or more hyphens, asterisks, // or underscores on a line by themselves. If you wish, you may use // spaces between the hyphens or asterisks." -extern int scan_hrule(bstring s, int pos) +extern int scan_hrule(const gh_buf *s, int pos) { unsigned char * marker = NULL; - unsigned char * p = &(s->data[pos]); + unsigned char * p = &(s->ptr[pos]); unsigned char * start = p; /*!re2c ([*][ ]*){3,} [ \t]* [\n] { return (p - start); } @@ -194,10 +194,10 @@ extern int scan_hrule(bstring s, int pos) } // Scan an opening code fence. -extern int scan_open_code_fence(bstring s, int pos) +extern int scan_open_code_fence(const gh_buf *s, int pos) { unsigned char * marker = NULL; - unsigned char * p = &(s->data[pos]); + unsigned char * p = &(s->ptr[pos]); unsigned char * start = p; /*!re2c [`]{3,} / [^`\n\x00]*[\n] { return (p - start); } @@ -207,10 +207,10 @@ extern int scan_open_code_fence(bstring s, int pos) } // Scan a closing code fence with length at least len. -extern int scan_close_code_fence(bstring s, int pos, int len) +extern int scan_close_code_fence(const gh_buf *s, int pos, int len) { unsigned char * marker = NULL; - unsigned char * p = &(s->data[pos]); + unsigned char * p = &(s->ptr[pos]); unsigned char * start = p; /*!re2c ([`]{3,} | [~]{3,}) / spacechar* [\n] @@ -225,10 +225,10 @@ extern int scan_close_code_fence(bstring s, int pos, int len) // Scans an entity. // Returns number of chars matched. -extern int scan_entity(bstring s, int pos) +extern int scan_entity(const gh_buf *s, int pos) { unsigned char * marker = NULL; - unsigned char * p = &(s->data[pos]); + unsigned char * p = &(s->ptr[pos]); unsigned char * start = p; /*!re2c [&] ([#] ([Xx][A-Fa-f0-9]{1,8}|[0-9]{1,8}) |[A-Za-z][A-Za-z0-9]{1,31} ) [;] diff --git a/src/stmd.h b/src/stmd.h index 5e34399..eb1b989 100644 --- a/src/stmd.h +++ b/src/stmd.h @@ -1,38 +1,38 @@ #include -#include "bstrlib.h" +#include "buffer.h" #include "uthash.h" #define VERSION "0.1" #define CODE_INDENT 4 +typedef struct { + const unsigned char *data; + int len; + int alloc; +} chunk; + typedef struct Inline { - enum { str, softbreak, linebreak, code, raw_html, entity, - emph, strong, link, image } tag; - union { - bstring literal; - struct Inline* inlines; - struct { struct Inline* label; - bstring url; - bstring title; - } linkable; - } content; - struct Inline* next; + enum { str, softbreak, linebreak, code, raw_html, entity, + emph, strong, link, image } tag; + union { + chunk literal; + struct Inline *inlines; + struct { + struct Inline *label; + unsigned char *url; + unsigned char *title; + } linkable; + } content; + struct Inline *next; } inl; typedef struct Reference { - bstring label; - bstring url; - bstring title; + unsigned char *label; + unsigned char *url; + unsigned char *title; UT_hash_handle hh; // used by uthash } reference; -typedef struct Subject { - bstring buffer; - int pos; - reference** reference_map; - int label_nestlevel; -} subject; - // Types for blocks struct ListData { @@ -51,7 +51,7 @@ struct FencedCodeData { int fence_length; int fence_offset; char fence_char; - bstring info; + gh_buf info; }; typedef struct Block { @@ -77,7 +77,8 @@ typedef struct Block { struct Block* last_child; struct Block* parent; struct Block* top; - bstring string_content; + gh_buf string_content; + int string_pos; inl* inline_content; union { struct ListData list_data; @@ -89,33 +90,34 @@ typedef struct Block { struct Block * prev; } block; -int parse_inline(subject* subj, inl ** last); -inl* parse_inlines(bstring input, reference** refmap); -inl* parse_inlines_while(subject* subj, int (*f)(subject*)); +inl* parse_inlines(gh_buf *input, int input_pos, reference** refmap); void free_inlines(inl* e); -int parse_reference(bstring input, reference** refmap); + +int parse_reference(gh_buf *input, int input_pos, reference** refmap); void free_reference(reference *ref); void free_reference_map(reference **refmap); -reference* make_reference(bstring label, bstring url, bstring title); -reference* lookup_reference(reference** refmap, bstring label); + void add_reference(reference** refmap, reference* ref); -int unescape(bstring s); +void unescape_buffer(gh_buf *buf); extern block* make_document(); extern block* add_child(block* parent, int block_type, int start_line, int start_column); void free_blocks(block* e); +block *stmd_parse_document(const char *buffer, size_t len); + // FOR NOW: -int process_inlines(block* cur, reference** refmap); -int incorporate_line(bstring ln, int line_number, block** curptr); -int finalize(block* b, int line_number); +void process_inlines(block* cur, reference** refmap); +void incorporate_line(gh_buf *ln, int line_number, block** curptr); +void finalize(block* b, int line_number); void print_inlines(inl* ils, int indent); void print_blocks(block* blk, int indent); -int blocks_to_html(block* b, bstring* result, bool tight); -int inlines_to_html(inl* b, bstring* result); +/* TODO */ +// int blocks_to_html(block* b, bstring* result, bool tight); +// int inlines_to_html(inl* b, bstring* result); -int bdetab(bstring s, int utf8); +void utf8proc_case_fold(gh_buf *dest, const unsigned char *str, int len); diff --git a/src/utf8.c b/src/utf8.c index 4bb3b35..1a5df9e 100644 --- a/src/utf8.c +++ b/src/utf8.c @@ -2,105 +2,142 @@ #include "bstrlib.h" #include "debug.h" -#define advance(s) \ - s++; \ - check(*s >> 6 == 0x02, "UTF-8 decode error on byte %x", *s); - -// Reads a unicode code point from a UTF8-encoded string, and -// puts it in the pointer n. If something illegal -// is encountered, 0xFFFD is emitted. -// Returns a pointer to next position in string, or NULL if no -// more characters remain. -extern unsigned char * from_utf8(unsigned char * s, unsigned int *n) +static const int8_t utf8proc_utf8class[256] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0 }; + +ssize_t utf8proc_charlen(const uint8_t *str, ssize_t str_len) { - int x = 0; - - if (*s == 0) { - return NULL; - } else if (*s < 0x80) { - x = *s; - } else if (*s >> 5 == 0x06) { - x = *s & 0x1F; - advance(s); - x = (x << 6) + (*s & 0x3F); - } else if (*s >> 4 == 0x0E) { - x = *s & 0x0F; - advance(s); - x = (x << 6) + (*s & 0x3F); - advance(s); - x = (x << 6) + (*s & 0x3F); - } else if (*s >> 3 == 0x1E) { - x = *s & 0x07; - advance(s); - x = (x << 6) + (*s & 0x3F); - advance(s); - x = (x << 6) + (*s & 0x3F); - advance(s); - x = (x << 6) + (*s & 0x3F); - } else if (*s >> 2 == 0x3E) { - x = *s & 0x03; - advance(s); - x = (x << 6) + (*s & 0x3F); - advance(s); - x = (x << 6) + (*s & 0x3F); - advance(s); - x = (x << 6) + (*s & 0x3F); - advance(s); - x = (x << 6) + (*s & 0x3F); - } else { - log_err("UTF-8 decode error on byte %x", *s); - goto error; - } - *n = x; - s++; - return s; - error: - *n = 0xFFFD; - return s; + ssize_t length, i; + + if (!str_len) + return 0; + + length = utf8proc_utf8class[str[0]]; + + if (!length) + return -1; + + if (str_len >= 0 && length > str_len) + return -1; + + for (i = 1; i < length; i++) { + if ((str[i] & 0xC0) != 0x80) + return -1; + } + + return length; +} + +ssize_t utf8proc_iterate(const uint8_t *str, ssize_t str_len, int32_t *dst) +{ + ssize_t length; + int32_t uc = -1; + + *dst = -1; + length = utf8proc_charlen(str, str_len); + if (length < 0) + return -1; + + switch (length) { + case 1: + uc = str[0]; + break; + case 2: + uc = ((str[0] & 0x1F) << 6) + (str[1] & 0x3F); + if (uc < 0x80) uc = -1; + break; + case 3: + uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) << 6) + + (str[2] & 0x3F); + if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000) || + (uc >= 0xFDD0 && uc < 0xFDF0)) uc = -1; + break; + case 4: + uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12) + + ((str[2] & 0x3F) << 6) + (str[3] & 0x3F); + if (uc < 0x10000 || uc >= 0x110000) uc = -1; + break; + } + + if (uc < 0 || ((uc & 0xFFFF) >= 0xFFFE)) + return -1; + + *dst = uc; + return length; } -// Converts the unicode code point c to UTF-8, -// putting the result in dest. Returns 0 on success, -1 on error. -extern int to_utf8(unsigned int c, bstring dest) +void utf8_encode_char(int32_t uc, gh_buf *buf) { - if (c < 0x80) { - bconchar(dest, c); - } else if (c < 0x800) { - bconchar(dest, 192 + c/64); - bconchar(dest, 128 + c%64); - } else if (c - 0xd800u < 0x800) { - goto error; - } else if (c < 0x10000) { - bconchar(dest, 224 + c / 4096); - bconchar(dest, 128 + c /64%64); - bconchar(dest, 128 + c%64); - } else if (c < 0x110000) { - bconchar(dest, 240 + c/262144); - bconchar(dest, 128 + c/4096%64); - bconchar(dest, 128 + c/64%64); - bconchar(dest, 128 + c%64); - } else { - goto error; - } - return 0; -error: - return -1; + char dst[4]; + int len = 0; + + if (uc < 0x00) { + assert(false); + } else if (uc < 0x80) { + dst[0] = uc; + len = 1; + } else if (uc < 0x800) { + dst[0] = 0xC0 + (uc >> 6); + dst[1] = 0x80 + (uc & 0x3F); + len = 2; + } else if (uc == 0xFFFF) { + dst[0] = 0xFF; + return 1; + } else if (uc == 0xFFFE) { + dst[0] = 0xFE; + len = 1; + } else if (uc < 0x10000) { + dst[0] = 0xE0 + (uc >> 12); + dst[1] = 0x80 + ((uc >> 6) & 0x3F); + dst[2] = 0x80 + (uc & 0x3F); + len = 3; + } else if (uc < 0x110000) { + dst[0] = 0xF0 + (uc >> 18); + dst[1] = 0x80 + ((uc >> 12) & 0x3F); + dst[2] = 0x80 + ((uc >> 6) & 0x3F); + dst[3] = 0x80 + (uc & 0x3F); + len = 4; + } else { + assert(false); + } + + gh_buf_put(buf, dst, len); } +void utf8proc_case_fold(gh_buf *dest, const unsigned char *str, int len) +{ + int32_t c; + #define bufpush(x) \ - check(to_utf8(x, buf) == 0, "UTF-8 encode error on code point %04x", x) + utf8proc_encode_char(x, dest) -// Returns the case-folded version of the source string, or NULL on error. -extern bstring case_fold(bstring source) -{ - unsigned char * s = source->data; - unsigned int c = 0; - bstring buf = bfromcstr(""); - while ((s = from_utf8(s, &c))) { -#include "case_fold_switch.c" - } - return buf; -error: - return NULL; + while (len > 0) { + ssize_t char_len = utf8proc_iterate(str, len, &c); + + if (char_len < 0) { + bufpush(0xFFFD); + continue; + } + +#include "case_fold_switch.inc" + + str += char_len; + len -= char_len; + } } -- cgit v1.2.3