summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore10
-rw-r--r--LICENSE30
-rw-r--r--Makefile62
-rw-r--r--README.md41
-rw-r--r--TODO5
-rw-r--r--alternative-html-blocks.txt247
-rw-r--r--data/CaseFolding-3.2.0.txt912
-rw-r--r--js/LICENSE30
-rw-r--r--js/bench.js35
-rwxr-xr-xjs/markdown15
-rwxr-xr-xjs/stmd.js1540
-rwxr-xr-xjs/test.js79
-rw-r--r--license.bstrlib.txt29
-rw-r--r--license.uthash.txt21
-rw-r--r--mkcasefold.pl21
-rw-r--r--oldtests/Blockquotes/Indents.html12
-rw-r--r--oldtests/Blockquotes/Indents.markdown5
-rw-r--r--oldtests/Blockquotes/Nesting.html32
-rw-r--r--oldtests/Blockquotes/Nesting.markdown22
-rw-r--r--oldtests/Blockquotes/Separation.html39
-rw-r--r--oldtests/Blockquotes/Separation.markdown29
-rw-r--r--oldtests/Code/BlankLines.html33
-rw-r--r--oldtests/Code/BlankLines.markdown28
-rw-r--r--oldtests/Code/BlankLinesAtEnd.html14
-rw-r--r--oldtests/Code/BlankLinesAtEnd.markdown14
-rw-r--r--oldtests/Code/FenceMatching.html8
-rw-r--r--oldtests/Code/FenceMatching.markdown10
-rw-r--r--oldtests/Code/FencedCodeBlocks.html24
-rw-r--r--oldtests/Code/FencedCodeBlocks.markdown35
-rw-r--r--oldtests/Code/IndentedCodeBlocks.html22
-rw-r--r--oldtests/Code/IndentedCodeBlocks.markdown22
-rw-r--r--oldtests/Code/IndentedFences.html20
-rw-r--r--oldtests/Code/IndentedFences.markdown26
-rw-r--r--oldtests/Code/IndentedInLists.html22
-rw-r--r--oldtests/Code/IndentedInLists.markdown17
-rw-r--r--oldtests/Code/Inline.html13
-rw-r--r--oldtests/Code/Inline.markdown13
-rw-r--r--oldtests/Code/ListBreakAfter.html30
-rw-r--r--oldtests/Code/ListBreakAfter.markdown26
-rw-r--r--oldtests/Code/WhiteLines.html7
-rw-r--r--oldtests/Code/WhiteLines.markdown9
-rw-r--r--oldtests/Emphasis/Escapes.html1
-rw-r--r--oldtests/Emphasis/Escapes.markdown1
-rw-r--r--oldtests/Emphasis/NestedEmphAndStrong.html66
-rw-r--r--oldtests/Emphasis/NestedEmphAndStrong.markdown69
-rw-r--r--oldtests/Emphasis/Pathological.html24
-rw-r--r--oldtests/Emphasis/Pathological.markdown26
-rw-r--r--oldtests/Emphasis/Punctuation.html10
-rw-r--r--oldtests/Emphasis/Punctuation.markdown19
-rw-r--r--oldtests/HTML/Blocks.html18
-rw-r--r--oldtests/HTML/Blocks.markdown26
-rw-r--r--oldtests/HTML/Inline.html8
-rw-r--r--oldtests/HTML/Inline.markdown8
-rw-r--r--oldtests/HTML/UppercaseTags.html4
-rw-r--r--oldtests/HTML/UppercaseTags.markdown5
-rw-r--r--oldtests/Headers/ATX.html14
-rw-r--r--oldtests/Headers/ATX.markdown20
-rw-r--r--oldtests/Headers/Setext.html9
-rw-r--r--oldtests/Headers/Setext.markdown17
-rw-r--r--oldtests/Links/AngleBrackets.html3
-rw-r--r--oldtests/Links/AngleBrackets.markdown7
-rw-r--r--oldtests/Links/AutoLinks.html7
-rw-r--r--oldtests/Links/AutoLinks.markdown7
-rw-r--r--oldtests/Links/BackticksInLinks.html1
-rw-r--r--oldtests/Links/BackticksInLinks.markdown1
-rw-r--r--oldtests/Links/CaseInsensitiveReferences.html1
-rw-r--r--oldtests/Links/CaseInsensitiveReferences.markdown3
-rw-r--r--oldtests/Links/Entities.html2
-rw-r--r--oldtests/Links/Entities.markdown3
-rw-r--r--oldtests/Links/InlineLinks.html10
-rw-r--r--oldtests/Links/InlineLinks.markdown9
-rw-r--r--oldtests/Links/ParensInURLs.html6
-rw-r--r--oldtests/Links/ParensInURLs.markdown14
-rw-r--r--oldtests/Links/ReferenceLinks.html7
-rw-r--r--oldtests/Links/ReferenceLinks.markdown10
-rw-r--r--oldtests/Lists/CodeBlocksInLists.html14
-rw-r--r--oldtests/Lists/CodeBlocksInLists.markdown18
-rw-r--r--oldtests/Lists/ConsecutiveLists.html20
-rw-r--r--oldtests/Lists/ConsecutiveLists.markdown10
-rw-r--r--oldtests/Lists/EmptyListItem.html10
-rw-r--r--oldtests/Lists/EmptyListItem.markdown7
-rw-r--r--oldtests/Lists/InBlockquote.html22
-rw-r--r--oldtests/Lists/InBlockquote.markdown12
-rw-r--r--oldtests/Lists/Indents.html22
-rw-r--r--oldtests/Lists/Indents.markdown17
-rw-r--r--oldtests/Lists/ListsAndHRs.html7
-rw-r--r--oldtests/Lists/ListsAndHRs.markdown3
-rw-r--r--oldtests/Lists/ListsAndSetextHeaders.html6
-rw-r--r--oldtests/Lists/ListsAndSetextHeaders.markdown4
-rw-r--r--oldtests/Lists/MultipleBlankLines.html56
-rw-r--r--oldtests/Lists/MultipleBlankLines.markdown37
-rw-r--r--oldtests/Lists/Start.html11
-rw-r--r--oldtests/Lists/Start.markdown7
-rw-r--r--oldtests/Lists/Sublists.html49
-rw-r--r--oldtests/Lists/Sublists.markdown24
-rw-r--r--oldtests/Lists/TightAndLoose.html49
-rw-r--r--oldtests/Lists/TightAndLoose.markdown45
-rw-r--r--oldtests/Lists/TightLooseBlockquote.html32
-rw-r--r--oldtests/Lists/TightLooseBlockquote.markdown25
-rw-r--r--oldtests/Lists/TightLooseMore.html7
-rw-r--r--oldtests/Lists/TightLooseMore.markdown4
-rw-r--r--oldtests/Lists/TwoBlankLinesEndList.html21
-rw-r--r--oldtests/Lists/TwoBlankLinesEndList.markdown20
-rw-r--r--oldtests/Makefile55
-rw-r--r--oldtests/Misc/BackslashEscapes.html14
-rw-r--r--oldtests/Misc/BackslashEscapes.markdown19
-rw-r--r--oldtests/Misc/Laziness.html22
-rw-r--r--oldtests/Misc/Laziness.markdown14
-rw-r--r--oldtests/Misc/LineBreaks.html11
-rw-r--r--oldtests/Misc/LineBreaks.markdown18
-rw-r--r--oldtests/Misc/Transitions.html26
-rw-r--r--oldtests/Misc/Transitions.markdown20
-rw-r--r--oldtests/Original/Amps_and_angle_encoding.html9
-rw-r--r--oldtests/Original/Amps_and_angle_encoding.markdown21
-rw-r--r--oldtests/Original/Auto_links.html13
-rw-r--r--oldtests/Original/Auto_links.markdown13
-rw-r--r--oldtests/Original/Backslash_escapes.html75
-rw-r--r--oldtests/Original/Backslash_escapes.markdown120
-rw-r--r--oldtests/Original/Blockquotes_with_code_blocks.html12
-rw-r--r--oldtests/Original/Blockquotes_with_code_blocks.markdown11
-rw-r--r--oldtests/Original/Code_Blocks.html12
-rw-r--r--oldtests/Original/Code_Blocks.markdown14
-rw-r--r--oldtests/Original/Code_Spans.html3
-rw-r--r--oldtests/Original/Code_Spans.markdown5
-rw-r--r--oldtests/Original/Horizontal_rules.html39
-rw-r--r--oldtests/Original/Horizontal_rules.markdown67
-rw-r--r--oldtests/Original/Images.html11
-rw-r--r--oldtests/Original/Images.markdown26
-rw-r--r--oldtests/Original/Inline_HTML_Advanced.html23
-rw-r--r--oldtests/Original/Inline_HTML_Advanced.markdown30
-rw-r--r--oldtests/Original/Inline_HTML_Simple.html45
-rw-r--r--oldtests/Original/Inline_HTML_Simple.markdown69
-rw-r--r--oldtests/Original/Inline_HTML_comments.html8
-rw-r--r--oldtests/Original/Inline_HTML_comments.markdown13
-rw-r--r--oldtests/Original/Links_inline_style.html12
-rw-r--r--oldtests/Original/Links_inline_style.markdown24
-rw-r--r--oldtests/Original/Links_reference_style.html28
-rw-r--r--oldtests/Original/Links_reference_style.markdown71
-rw-r--r--oldtests/Original/Links_shortcut_references.html6
-rw-r--r--oldtests/Original/Links_shortcut_references.markdown20
-rw-r--r--oldtests/Original/Literal_quotes_in_titles.html2
-rw-r--r--oldtests/Original/Literal_quotes_in_titles.markdown7
-rw-r--r--oldtests/Original/Markdown_Documentation_Basics.html242
-rw-r--r--oldtests/Original/Markdown_Documentation_Basics.markdown306
-rw-r--r--oldtests/Original/Markdown_Documentation_Syntax.html708
-rw-r--r--oldtests/Original/Markdown_Documentation_Syntax.markdown888
-rw-r--r--oldtests/Original/Nested_blockquotes.html7
-rw-r--r--oldtests/Original/Nested_blockquotes.markdown5
-rw-r--r--oldtests/Original/Ordered_and_unordered_lists.html112
-rw-r--r--oldtests/Original/Ordered_and_unordered_lists.markdown131
-rw-r--r--oldtests/Original/README15
-rw-r--r--oldtests/Original/Strong_and_em_together.html4
-rw-r--r--oldtests/Original/Strong_and_em_together.markdown7
-rw-r--r--oldtests/Original/Tabs.html19
-rw-r--r--oldtests/Original/Tabs.markdown21
-rw-r--r--oldtests/Original/Tidyness.html8
-rw-r--r--oldtests/Original/Tidyness.markdown5
-rw-r--r--oldtests/Tabs/TabConversionUnicode.html1
-rw-r--r--oldtests/Tabs/TabConversionUnicode.markdown1
-rw-r--r--runtests.pl159
-rw-r--r--spec.txt6044
-rwxr-xr-xspec2js.js17
-rw-r--r--spec2md.pl36
-rwxr-xr-xspecfilter.hs37
-rw-r--r--src/blocks.c747
-rw-r--r--src/bstrlib.c2979
-rw-r--r--src/bstrlib.h304
-rw-r--r--src/case_fold_switch.c2637
-rw-r--r--src/casefold.c2699
-rw-r--r--src/debug.h36
-rw-r--r--src/detab.c48
-rw-r--r--src/getopt.c199
-rw-r--r--src/html.c276
-rw-r--r--src/inlines.c998
-rw-r--r--src/main.c102
-rw-r--r--src/print.c168
-rw-r--r--src/scanners.h15
-rw-r--r--src/scanners.re238
-rw-r--r--src/stmd.h121
-rw-r--r--src/utf8.c106
-rw-r--r--src/utf8.h6
-rw-r--r--src/uthash.h948
-rw-r--r--template.html66
-rw-r--r--template.tex229
184 files changed, 27304 insertions, 3 deletions
diff --git a/.gitignore b/.gitignore
index 4d40434..9cfb3a6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,3 +21,13 @@
*.i*86
*.x86_64
*.hex
+
+*~
+*.bak
+*.diff
+*#
+scanners.c
+*.zip
+bstrlib.txt
+stmd.dSYM/*
+stmd
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..bb8c36f
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,30 @@
+Copyright (c) 2014, John MacFarlane
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following
+ disclaimer in the documentation and/or other materials provided
+ with the distribution.
+
+ * Neither the name of John MacFarlane nor the names of other
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..98d0c45
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,62 @@
+CFLAGS=-g -O3 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS)
+LDFLAGS=-g -O3 -Wall -Werror
+SRCDIR=src
+DATADIR=data
+
+PROG=./stmd
+
+.PHONY: all oldtests test spec benchjs testjs
+all: $(SRCDIR)/case_fold_switch.c $(PROG)
+
+spec: test spec.html
+
+spec.md: spec.txt
+ perl spec2md.pl < $< > $@
+
+spec.html: spec.md template.html
+ pandoc --no-highlight --number-sections --template template.html -s --toc -S $< > $@ # | perl -pe 's/␣/<span class="space"> <\/span>/g' > $@
+
+spec.pdf: spec.md template.tex specfilter.hs
+ pandoc -s $< --template template.tex \
+ --filter ./specfilter.hs -o $@ --latex-engine=xelatex --toc \
+ --number-sections -V documentclass=report -V tocdepth=2 \
+ -V classoption=twosides
+
+oldtests:
+ make -C oldtests --quiet clean all
+
+test: spec.txt
+ perl runtests.pl $(PROG) $<
+
+testjs: spec.txt
+ node js/test.js
+# perl runtests.pl js/markdown $<
+
+benchjs:
+ node js/bench.js
+
+$(PROG): $(SRCDIR)/main.c $(SRCDIR)/inlines.o $(SRCDIR)/blocks.o $(SRCDIR)/detab.o $(SRCDIR)/bstrlib.o $(SRCDIR)/scanners.o $(SRCDIR)/print.o $(SRCDIR)/html.o $(SRCDIR)/utf8.o
+ $(CC) $(LDFLAGS) -o $@ $^
+
+$(SRCDIR)/scanners.c: $(SRCDIR)/scanners.re
+ re2c --case-insensitive -bis $< > $@
+
+$(SRCDIR)/case_fold_switch.c: $(DATADIR)/CaseFolding-3.2.0.txt
+ perl mkcasefold.pl < $< > $@
+
+.PHONY: leakcheck clean fuzztest dingus
+
+dingus:
+ cd js && echo "Starting dingus server at http://localhost:9000" && python -m SimpleHTTPServer 9000
+
+leakcheck: $(PROG)
+ cat oldtests/*/*.markdown | valgrind --leak-check=full --dsymutil=yes $(PROG)
+
+fuzztest:
+ for i in `seq 1 10`; do \
+ time cat /dev/urandom | head -c 100000 | iconv -f latin1 -t utf-8 | $(PROG) >/dev/null; done
+
+clean:
+ -rm test $(SRCDIR)/*.o $(SRCDIR)/scanners.c
+ -rm -r *.dSYM
+ -rm spec.md fuzz.txt spec.html
diff --git a/README.md b/README.md
index 93aecf8..1f7c7a5 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,39 @@
-stmd
-====
+Standard markdown
+=================
+
+Standard markdown is a specification of markdown syntax, together
+with implementations (`stmd`) in C and javascript.
+
+The C implementation provides both a library and a standalone program
+that converts markdown to HTML. It is written in standard C99 and has
+no library dependencies. (However, if you check it out from the
+repository, you'll need `re2c` to generate `scanners.c` from
+`scanners.re`. This is only a build dependency for developers, since
+`scanners.c` can be provided in a released source tarball.)
+
+The javascript implementation is a single javascript file
+that can be linked to an HTML page. A standalone version (using
+`node.js`) is also provided (`js/markdown`), and there is a
+"dingus" for playing with it interactively. (`make dingus` will start
+this.)
+
+The spec contains over 400 embedded examples which serve as
+conformance tests. To run the tests for `stmd`, do `make test`.
+To run them for another markdown program, say `myprog`,
+do `make test PROG=myprog`. To run the tests for `stmd.js`,
+do `make testjs`.
+
+The source of the spec is `spec.txt`. This is basically a markdown
+file, with code examples written in a shorthand form:
+
+ .
+ markdown source
+ .
+ expected HTML output
+ .
+
+To build an HTML version of the spec, do `make spec.html`.
+To build a PDF version, do `make spec.pdf`. Both these commands
+require that pandoc is installed, and creating a PDF requires
+a latex installation.
-a spec for "standard markdown," with matching C and javascript implementations
diff --git a/TODO b/TODO
new file mode 100644
index 0000000..f9369e1
--- /dev/null
+++ b/TODO
@@ -0,0 +1,5 @@
+- add library function to convert a string
+- add README/library documentation
+- add man page for prog and library
+- document/clean up code
+
diff --git a/alternative-html-blocks.txt b/alternative-html-blocks.txt
new file mode 100644
index 0000000..3ba0d15
--- /dev/null
+++ b/alternative-html-blocks.txt
@@ -0,0 +1,247 @@
+# Appendix B: An alternate spec for HTML blocks {-}
+
+(The following spec departs less from original markdown than the
+one described above, but is also less flexible.)
+
+An [HTML block](#html-block) <a id="html-block-tag"/> begins
+with an [open tag](#open-tag), [HTML comment](#html-comment),
+[processing instruction](#processing-instruction),
+[declaration](#declaration), or [CDATA section](#cdata-section).
+This opening element may optionally be preceded by 1-3 spaces,
+and must not be followed on a line by anything other than white space.
+
+If the opening tag is self-closing, or if it is an [HTML
+comment](#html-comment), [processing
+instruction](#processing-instruction), [declaration](#declaration), or
+[CDATA section](#cdata-section), then the [HTML block](#html-block)
+contains just that tag.
+
+If it is an [open tag](#open-tag), then the [HTML block](#html-block)
+continues until a matching closing tag is found, or until the end
+of the document. Note that the matching closing tag is not necessarily
+the first closing tag of the same type that is encountered, since
+that tag may close a later open tag of the same type. Open and closing
+tags must be balanced.
+
+The contents of the HTML block are interpreted as raw HTML, and will not
+be escaped in HTML output.
+
+Some simple examples:
+
+.
+<table>
+ <tr>
+ <td>
+ hi
+ </td>
+ </tr>
+</table>
+
+okay.
+.
+<table>
+ <tr>
+ <td>
+ hi
+ </td>
+ </tr>
+</table>
+<p>okay.</p>
+.
+
+
+.
+<div class="outer">
+
+ <div class="inner">
+
+ <p>foo&ouml;</p>
+
+ </div>
+
+</div>
+.
+<div class="outer">
+
+ <div class="inner">
+
+ <p>foo&ouml;</p>
+
+ </div>
+
+</div>
+.
+
+A self-closing tag:
+
+.
+<div />
+.
+<div />
+.
+
+Here we have an unclosed tag, and the block continues to the end of
+the document:
+
+.
+<div>
+<div>
+foo
+</div>
+
+*bar*
+.
+<div>
+<div>
+foo
+</div>
+
+*bar*
+.
+
+A comment:
+
+.
+<!-- Foo
+bar
+ baz -->
+.
+<!-- Foo
+bar
+ baz -->
+.
+
+A processing instruction:
+
+.
+<?php
+ echo 'foo'
+?>
+.
+<?php
+ echo 'foo'
+?>
+.
+
+CDATA:
+
+.
+<![CDATA[
+function matchwo(a,b)
+{
+if (a < b && a < 0) then
+ {
+ return 1;
+ }
+else
+ {
+ return 0;
+ }
+}
+]]>
+.
+<![CDATA[
+function matchwo(a,b)
+{
+if (a < b && a < 0) then
+ {
+ return 1;
+ }
+else
+ {
+ return 0;
+ }
+}
+]]>
+.
+
+The opening tag can be indented 1-3 spaces, but not 4:
+
+.
+ <!-- foo -->
+ <!-- foo -->
+.
+ <!-- foo -->
+<pre><code>&lt;!-- foo --&gt;
+</code></pre>
+.
+
+The opening tag must be on a line (or lines) by itself:
+
+.
+<table><tr><td>
+foo
+</td></tr></table>
+.
+<p><table><tr<td> foo </td></tr></table></p>
+.
+
+.
+<!-- foo -->bar
+.
+<p><!-- foo -->bar</p>
+.
+
+The opening tag need not be an HTML block tag or even an HTML tag:
+
+.
+<a>
+foo
+</a>
+.
+<a>
+foo
+</a>
+.
+
+.
+<foo>
+bar
+</foo>
+.
+<foo>
+bar
+</foo>
+.
+
+So, note the difference:
+
+.
+<del>
+bar
+</del>
+
+<del>bar</del>
+.
+<del>
+bar
+</del>
+<p><del>bar</del></p>
+.
+
+This rule differs from John Gruber's original markdown syntax
+specification, which says:
+
+> The only restrictions are that block-level HTML elements —
+> e.g. `<div>`, `<table>`, `<pre>`, `<p>`, etc. — must be separated from
+> surrounding content by blank lines, and the start and end tags of the
+> block should not be indented with tabs or spaces.
+
+In some ways Gruber's rule is more restrictive than the one given
+here:
+
+- It requires that an HTML block be preceded and followed by a blank line.
+- It does not allow the start tag to be indented.
+- It does not allow the end tag to be indented.
+- It does not require that the open tag be an HTML block-level tag.
+
+Indeed, most markdown implementations, including some of Gruber's
+own perl implementations, do not impose these restrictions.
+
+However, unlike Gruber's rule, this one requires that the open
+tag be on a line by itself. It also differs from most markdown
+implementations in how it handles the case where there is no matching
+closing tag (a case not mentioned in Gruber's rule). In such a case,
+the rule stated above includes the whole rest of the document in the
+HTML block.
+
diff --git a/data/CaseFolding-3.2.0.txt b/data/CaseFolding-3.2.0.txt
new file mode 100644
index 0000000..104a823
--- /dev/null
+++ b/data/CaseFolding-3.2.0.txt
@@ -0,0 +1,912 @@
+# CaseFolding-3.2.0.txt
+# Date: 2002-03-22,20:54:33 GMT [MD]
+#
+# Case Folding Properties
+#
+# This file is a supplement to the UnicodeData file.
+# It provides a case folding mapping generated from the Unicode Character Database.
+# If all characters are mapped according to the full mapping below, then
+# case differences (according to UnicodeData.txt and SpecialCasing.txt)
+# are eliminated.
+#
+# The data supports both implementations that require simple case foldings
+# (where string lengths don't change), and implementations that allow full case folding
+# (where string lengths may grow). Note that where they can be supported, the
+# full case foldings are superior: for example, they allow "MASSE" and "Mae" to match.
+#
+# NOTE: case folding does not preserve normalization formats!
+#
+# For information on case folding, see
+# UTR #21 Case Mappings, at http://www.unicode.org/unicode/reports/tr21/
+#
+# ================================================================================
+# Format
+# ================================================================================
+# The entries in this file are in the following machine-readable format:
+#
+# <code>; <status>; <mapping>; # <name>
+#
+# The status field is:
+# C: common case folding, common mappings shared by both simple and full mappings.
+# F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces.
+# S: simple case folding, mappings to single characters where different from F.
+# T: special case for uppercase I and dotted uppercase I
+# - For non-Turkic languages, this mapping is normally not used.
+# - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters.
+#
+# Usage:
+# A. To do a simple case folding, use the mappings with status C + S.
+# B. To do a full case folding, use the mappings with status C + F.
+#
+# The mappings with status T can be used or omitted depending on the desired case-folding
+# behavior. (The default option is to exclude them.)
+#
+# =================================================================
+
+0041; C; 0061; # LATIN CAPITAL LETTER A
+0042; C; 0062; # LATIN CAPITAL LETTER B
+0043; C; 0063; # LATIN CAPITAL LETTER C
+0044; C; 0064; # LATIN CAPITAL LETTER D
+0045; C; 0065; # LATIN CAPITAL LETTER E
+0046; C; 0066; # LATIN CAPITAL LETTER F
+0047; C; 0067; # LATIN CAPITAL LETTER G
+0048; C; 0068; # LATIN CAPITAL LETTER H
+0049; C; 0069; # LATIN CAPITAL LETTER I
+0049; T; 0131; # LATIN CAPITAL LETTER I
+004A; C; 006A; # LATIN CAPITAL LETTER J
+004B; C; 006B; # LATIN CAPITAL LETTER K
+004C; C; 006C; # LATIN CAPITAL LETTER L
+004D; C; 006D; # LATIN CAPITAL LETTER M
+004E; C; 006E; # LATIN CAPITAL LETTER N
+004F; C; 006F; # LATIN CAPITAL LETTER O
+0050; C; 0070; # LATIN CAPITAL LETTER P
+0051; C; 0071; # LATIN CAPITAL LETTER Q
+0052; C; 0072; # LATIN CAPITAL LETTER R
+0053; C; 0073; # LATIN CAPITAL LETTER S
+0054; C; 0074; # LATIN CAPITAL LETTER T
+0055; C; 0075; # LATIN CAPITAL LETTER U
+0056; C; 0076; # LATIN CAPITAL LETTER V
+0057; C; 0077; # LATIN CAPITAL LETTER W
+0058; C; 0078; # LATIN CAPITAL LETTER X
+0059; C; 0079; # LATIN CAPITAL LETTER Y
+005A; C; 007A; # LATIN CAPITAL LETTER Z
+00B5; C; 03BC; # MICRO SIGN
+00C0; C; 00E0; # LATIN CAPITAL LETTER A WITH GRAVE
+00C1; C; 00E1; # LATIN CAPITAL LETTER A WITH ACUTE
+00C2; C; 00E2; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+00C3; C; 00E3; # LATIN CAPITAL LETTER A WITH TILDE
+00C4; C; 00E4; # LATIN CAPITAL LETTER A WITH DIAERESIS
+00C5; C; 00E5; # LATIN CAPITAL LETTER A WITH RING ABOVE
+00C6; C; 00E6; # LATIN CAPITAL LETTER AE
+00C7; C; 00E7; # LATIN CAPITAL LETTER C WITH CEDILLA
+00C8; C; 00E8; # LATIN CAPITAL LETTER E WITH GRAVE
+00C9; C; 00E9; # LATIN CAPITAL LETTER E WITH ACUTE
+00CA; C; 00EA; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+00CB; C; 00EB; # LATIN CAPITAL LETTER E WITH DIAERESIS
+00CC; C; 00EC; # LATIN CAPITAL LETTER I WITH GRAVE
+00CD; C; 00ED; # LATIN CAPITAL LETTER I WITH ACUTE
+00CE; C; 00EE; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+00CF; C; 00EF; # LATIN CAPITAL LETTER I WITH DIAERESIS
+00D0; C; 00F0; # LATIN CAPITAL LETTER ETH
+00D1; C; 00F1; # LATIN CAPITAL LETTER N WITH TILDE
+00D2; C; 00F2; # LATIN CAPITAL LETTER O WITH GRAVE
+00D3; C; 00F3; # LATIN CAPITAL LETTER O WITH ACUTE
+00D4; C; 00F4; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+00D5; C; 00F5; # LATIN CAPITAL LETTER O WITH TILDE
+00D6; C; 00F6; # LATIN CAPITAL LETTER O WITH DIAERESIS
+00D8; C; 00F8; # LATIN CAPITAL LETTER O WITH STROKE
+00D9; C; 00F9; # LATIN CAPITAL LETTER U WITH GRAVE
+00DA; C; 00FA; # LATIN CAPITAL LETTER U WITH ACUTE
+00DB; C; 00FB; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+00DC; C; 00FC; # LATIN CAPITAL LETTER U WITH DIAERESIS
+00DD; C; 00FD; # LATIN CAPITAL LETTER Y WITH ACUTE
+00DE; C; 00FE; # LATIN CAPITAL LETTER THORN
+00DF; F; 0073 0073; # LATIN SMALL LETTER SHARP S
+0100; C; 0101; # LATIN CAPITAL LETTER A WITH MACRON
+0102; C; 0103; # LATIN CAPITAL LETTER A WITH BREVE
+0104; C; 0105; # LATIN CAPITAL LETTER A WITH OGONEK
+0106; C; 0107; # LATIN CAPITAL LETTER C WITH ACUTE
+0108; C; 0109; # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
+010A; C; 010B; # LATIN CAPITAL LETTER C WITH DOT ABOVE
+010C; C; 010D; # LATIN CAPITAL LETTER C WITH CARON
+010E; C; 010F; # LATIN CAPITAL LETTER D WITH CARON
+0110; C; 0111; # LATIN CAPITAL LETTER D WITH STROKE
+0112; C; 0113; # LATIN CAPITAL LETTER E WITH MACRON
+0114; C; 0115; # LATIN CAPITAL LETTER E WITH BREVE
+0116; C; 0117; # LATIN CAPITAL LETTER E WITH DOT ABOVE
+0118; C; 0119; # LATIN CAPITAL LETTER E WITH OGONEK
+011A; C; 011B; # LATIN CAPITAL LETTER E WITH CARON
+011C; C; 011D; # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
+011E; C; 011F; # LATIN CAPITAL LETTER G WITH BREVE
+0120; C; 0121; # LATIN CAPITAL LETTER G WITH DOT ABOVE
+0122; C; 0123; # LATIN CAPITAL LETTER G WITH CEDILLA
+0124; C; 0125; # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
+0126; C; 0127; # LATIN CAPITAL LETTER H WITH STROKE
+0128; C; 0129; # LATIN CAPITAL LETTER I WITH TILDE
+012A; C; 012B; # LATIN CAPITAL LETTER I WITH MACRON
+012C; C; 012D; # LATIN CAPITAL LETTER I WITH BREVE
+012E; C; 012F; # LATIN CAPITAL LETTER I WITH OGONEK
+0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
+0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
+0132; C; 0133; # LATIN CAPITAL LIGATURE IJ
+0134; C; 0135; # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
+0136; C; 0137; # LATIN CAPITAL LETTER K WITH CEDILLA
+0139; C; 013A; # LATIN CAPITAL LETTER L WITH ACUTE
+013B; C; 013C; # LATIN CAPITAL LETTER L WITH CEDILLA
+013D; C; 013E; # LATIN CAPITAL LETTER L WITH CARON
+013F; C; 0140; # LATIN CAPITAL LETTER L WITH MIDDLE DOT
+0141; C; 0142; # LATIN CAPITAL LETTER L WITH STROKE
+0143; C; 0144; # LATIN CAPITAL LETTER N WITH ACUTE
+0145; C; 0146; # LATIN CAPITAL LETTER N WITH CEDILLA
+0147; C; 0148; # LATIN CAPITAL LETTER N WITH CARON
+0149; F; 02BC 006E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
+014A; C; 014B; # LATIN CAPITAL LETTER ENG
+014C; C; 014D; # LATIN CAPITAL LETTER O WITH MACRON
+014E; C; 014F; # LATIN CAPITAL LETTER O WITH BREVE
+0150; C; 0151; # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+0152; C; 0153; # LATIN CAPITAL LIGATURE OE
+0154; C; 0155; # LATIN CAPITAL LETTER R WITH ACUTE
+0156; C; 0157; # LATIN CAPITAL LETTER R WITH CEDILLA
+0158; C; 0159; # LATIN CAPITAL LETTER R WITH CARON
+015A; C; 015B; # LATIN CAPITAL LETTER S WITH ACUTE
+015C; C; 015D; # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
+015E; C; 015F; # LATIN CAPITAL LETTER S WITH CEDILLA
+0160; C; 0161; # LATIN CAPITAL LETTER S WITH CARON
+0162; C; 0163; # LATIN CAPITAL LETTER T WITH CEDILLA
+0164; C; 0165; # LATIN CAPITAL LETTER T WITH CARON
+0166; C; 0167; # LATIN CAPITAL LETTER T WITH STROKE
+0168; C; 0169; # LATIN CAPITAL LETTER U WITH TILDE
+016A; C; 016B; # LATIN CAPITAL LETTER U WITH MACRON
+016C; C; 016D; # LATIN CAPITAL LETTER U WITH BREVE
+016E; C; 016F; # LATIN CAPITAL LETTER U WITH RING ABOVE
+0170; C; 0171; # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+0172; C; 0173; # LATIN CAPITAL LETTER U WITH OGONEK
+0174; C; 0175; # LATIN CAPITAL LETTER W WITH CIRCUMFLEX
+0176; C; 0177; # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
+0178; C; 00FF; # LATIN CAPITAL LETTER Y WITH DIAERESIS
+0179; C; 017A; # LATIN CAPITAL LETTER Z WITH ACUTE
+017B; C; 017C; # LATIN CAPITAL LETTER Z WITH DOT ABOVE
+017D; C; 017E; # LATIN CAPITAL LETTER Z WITH CARON
+017F; C; 0073; # LATIN SMALL LETTER LONG S
+0181; C; 0253; # LATIN CAPITAL LETTER B WITH HOOK
+0182; C; 0183; # LATIN CAPITAL LETTER B WITH TOPBAR
+0184; C; 0185; # LATIN CAPITAL LETTER TONE SIX
+0186; C; 0254; # LATIN CAPITAL LETTER OPEN O
+0187; C; 0188; # LATIN CAPITAL LETTER C WITH HOOK
+0189; C; 0256; # LATIN CAPITAL LETTER AFRICAN D
+018A; C; 0257; # LATIN CAPITAL LETTER D WITH HOOK
+018B; C; 018C; # LATIN CAPITAL LETTER D WITH TOPBAR
+018E; C; 01DD; # LATIN CAPITAL LETTER REVERSED E
+018F; C; 0259; # LATIN CAPITAL LETTER SCHWA
+0190; C; 025B; # LATIN CAPITAL LETTER OPEN E
+0191; C; 0192; # LATIN CAPITAL LETTER F WITH HOOK
+0193; C; 0260; # LATIN CAPITAL LETTER G WITH HOOK
+0194; C; 0263; # LATIN CAPITAL LETTER GAMMA
+0196; C; 0269; # LATIN CAPITAL LETTER IOTA
+0197; C; 0268; # LATIN CAPITAL LETTER I WITH STROKE
+0198; C; 0199; # LATIN CAPITAL LETTER K WITH HOOK
+019C; C; 026F; # LATIN CAPITAL LETTER TURNED M
+019D; C; 0272; # LATIN CAPITAL LETTER N WITH LEFT HOOK
+019F; C; 0275; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE
+01A0; C; 01A1; # LATIN CAPITAL LETTER O WITH HORN
+01A2; C; 01A3; # LATIN CAPITAL LETTER OI
+01A4; C; 01A5; # LATIN CAPITAL LETTER P WITH HOOK
+01A6; C; 0280; # LATIN LETTER YR
+01A7; C; 01A8; # LATIN CAPITAL LETTER TONE TWO
+01A9; C; 0283; # LATIN CAPITAL LETTER ESH
+01AC; C; 01AD; # LATIN CAPITAL LETTER T WITH HOOK
+01AE; C; 0288; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
+01AF; C; 01B0; # LATIN CAPITAL LETTER U WITH HORN
+01B1; C; 028A; # LATIN CAPITAL LETTER UPSILON
+01B2; C; 028B; # LATIN CAPITAL LETTER V WITH HOOK
+01B3; C; 01B4; # LATIN CAPITAL LETTER Y WITH HOOK
+01B5; C; 01B6; # LATIN CAPITAL LETTER Z WITH STROKE
+01B7; C; 0292; # LATIN CAPITAL LETTER EZH
+01B8; C; 01B9; # LATIN CAPITAL LETTER EZH REVERSED
+01BC; C; 01BD; # LATIN CAPITAL LETTER TONE FIVE
+01C4; C; 01C6; # LATIN CAPITAL LETTER DZ WITH CARON
+01C5; C; 01C6; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
+01C7; C; 01C9; # LATIN CAPITAL LETTER LJ
+01C8; C; 01C9; # LATIN CAPITAL LETTER L WITH SMALL LETTER J
+01CA; C; 01CC; # LATIN CAPITAL LETTER NJ
+01CB; C; 01CC; # LATIN CAPITAL LETTER N WITH SMALL LETTER J
+01CD; C; 01CE; # LATIN CAPITAL LETTER A WITH CARON
+01CF; C; 01D0; # LATIN CAPITAL LETTER I WITH CARON
+01D1; C; 01D2; # LATIN CAPITAL LETTER O WITH CARON
+01D3; C; 01D4; # LATIN CAPITAL LETTER U WITH CARON
+01D5; C; 01D6; # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
+01D7; C; 01D8; # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
+01D9; C; 01DA; # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
+01DB; C; 01DC; # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
+01DE; C; 01DF; # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
+01E0; C; 01E1; # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
+01E2; C; 01E3; # LATIN CAPITAL LETTER AE WITH MACRON
+01E4; C; 01E5; # LATIN CAPITAL LETTER G WITH STROKE
+01E6; C; 01E7; # LATIN CAPITAL LETTER G WITH CARON
+01E8; C; 01E9; # LATIN CAPITAL LETTER K WITH CARON
+01EA; C; 01EB; # LATIN CAPITAL LETTER O WITH OGONEK
+01EC; C; 01ED; # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
+01EE; C; 01EF; # LATIN CAPITAL LETTER EZH WITH CARON
+01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
+01F1; C; 01F3; # LATIN CAPITAL LETTER DZ
+01F2; C; 01F3; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z
+01F4; C; 01F5; # LATIN CAPITAL LETTER G WITH ACUTE
+01F6; C; 0195; # LATIN CAPITAL LETTER HWAIR
+01F7; C; 01BF; # LATIN CAPITAL LETTER WYNN
+01F8; C; 01F9; # LATIN CAPITAL LETTER N WITH GRAVE
+01FA; C; 01FB; # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
+01FC; C; 01FD; # LATIN CAPITAL LETTER AE WITH ACUTE
+01FE; C; 01FF; # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE
+0200; C; 0201; # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE
+0202; C; 0203; # LATIN CAPITAL LETTER A WITH INVERTED BREVE
+0204; C; 0205; # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
+0206; C; 0207; # LATIN CAPITAL LETTER E WITH INVERTED BREVE
+0208; C; 0209; # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
+020A; C; 020B; # LATIN CAPITAL LETTER I WITH INVERTED BREVE
+020C; C; 020D; # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
+020E; C; 020F; # LATIN CAPITAL LETTER O WITH INVERTED BREVE
+0210; C; 0211; # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
+0212; C; 0213; # LATIN CAPITAL LETTER R WITH INVERTED BREVE
+0214; C; 0215; # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE
+0216; C; 0217; # LATIN CAPITAL LETTER U WITH INVERTED BREVE
+0218; C; 0219; # LATIN CAPITAL LETTER S WITH COMMA BELOW
+021A; C; 021B; # LATIN CAPITAL LETTER T WITH COMMA BELOW
+021C; C; 021D; # LATIN CAPITAL LETTER YOGH
+021E; C; 021F; # LATIN CAPITAL LETTER H WITH CARON
+0220; C; 019E; # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
+0222; C; 0223; # LATIN CAPITAL LETTER OU
+0224; C; 0225; # LATIN CAPITAL LETTER Z WITH HOOK
+0226; C; 0227; # LATIN CAPITAL LETTER A WITH DOT ABOVE
+0228; C; 0229; # LATIN CAPITAL LETTER E WITH CEDILLA
+022A; C; 022B; # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON
+022C; C; 022D; # LATIN CAPITAL LETTER O WITH TILDE AND MACRON
+022E; C; 022F; # LATIN CAPITAL LETTER O WITH DOT ABOVE
+0230; C; 0231; # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON
+0232; C; 0233; # LATIN CAPITAL LETTER Y WITH MACRON
+0345; C; 03B9; # COMBINING GREEK YPOGEGRAMMENI
+0386; C; 03AC; # GREEK CAPITAL LETTER ALPHA WITH TONOS
+0388; C; 03AD; # GREEK CAPITAL LETTER EPSILON WITH TONOS
+0389; C; 03AE; # GREEK CAPITAL LETTER ETA WITH TONOS
+038A; C; 03AF; # GREEK CAPITAL LETTER IOTA WITH TONOS
+038C; C; 03CC; # GREEK CAPITAL LETTER OMICRON WITH TONOS
+038E; C; 03CD; # GREEK CAPITAL LETTER UPSILON WITH TONOS
+038F; C; 03CE; # GREEK CAPITAL LETTER OMEGA WITH TONOS
+0390; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+0391; C; 03B1; # GREEK CAPITAL LETTER ALPHA
+0392; C; 03B2; # GREEK CAPITAL LETTER BETA
+0393; C; 03B3; # GREEK CAPITAL LETTER GAMMA
+0394; C; 03B4; # GREEK CAPITAL LETTER DELTA
+0395; C; 03B5; # GREEK CAPITAL LETTER EPSILON
+0396; C; 03B6; # GREEK CAPITAL LETTER ZETA
+0397; C; 03B7; # GREEK CAPITAL LETTER ETA
+0398; C; 03B8; # GREEK CAPITAL LETTER THETA
+0399; C; 03B9; # GREEK CAPITAL LETTER IOTA
+039A; C; 03BA; # GREEK CAPITAL LETTER KAPPA
+039B; C; 03BB; # GREEK CAPITAL LETTER LAMDA
+039C; C; 03BC; # GREEK CAPITAL LETTER MU
+039D; C; 03BD; # GREEK CAPITAL LETTER NU
+039E; C; 03BE; # GREEK CAPITAL LETTER XI
+039F; C; 03BF; # GREEK CAPITAL LETTER OMICRON
+03A0; C; 03C0; # GREEK CAPITAL LETTER PI
+03A1; C; 03C1; # GREEK CAPITAL LETTER RHO
+03A3; C; 03C3; # GREEK CAPITAL LETTER SIGMA
+03A4; C; 03C4; # GREEK CAPITAL LETTER TAU
+03A5; C; 03C5; # GREEK CAPITAL LETTER UPSILON
+03A6; C; 03C6; # GREEK CAPITAL LETTER PHI
+03A7; C; 03C7; # GREEK CAPITAL LETTER CHI
+03A8; C; 03C8; # GREEK CAPITAL LETTER PSI
+03A9; C; 03C9; # GREEK CAPITAL LETTER OMEGA
+03AA; C; 03CA; # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+03AB; C; 03CB; # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+03B0; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+03C2; C; 03C3; # GREEK SMALL LETTER FINAL SIGMA
+03D0; C; 03B2; # GREEK BETA SYMBOL
+03D1; C; 03B8; # GREEK THETA SYMBOL
+03D5; C; 03C6; # GREEK PHI SYMBOL
+03D6; C; 03C0; # GREEK PI SYMBOL
+03D8; C; 03D9; # GREEK LETTER ARCHAIC KOPPA
+03DA; C; 03DB; # GREEK LETTER STIGMA
+03DC; C; 03DD; # GREEK LETTER DIGAMMA
+03DE; C; 03DF; # GREEK LETTER KOPPA
+03E0; C; 03E1; # GREEK LETTER SAMPI
+03E2; C; 03E3; # COPTIC CAPITAL LETTER SHEI
+03E4; C; 03E5; # COPTIC CAPITAL LETTER FEI
+03E6; C; 03E7; # COPTIC CAPITAL LETTER KHEI
+03E8; C; 03E9; # COPTIC CAPITAL LETTER HORI
+03EA; C; 03EB; # COPTIC CAPITAL LETTER GANGIA
+03EC; C; 03ED; # COPTIC CAPITAL LETTER SHIMA
+03EE; C; 03EF; # COPTIC CAPITAL LETTER DEI
+03F0; C; 03BA; # GREEK KAPPA SYMBOL
+03F1; C; 03C1; # GREEK RHO SYMBOL
+03F2; C; 03C3; # GREEK LUNATE SIGMA SYMBOL
+03F4; C; 03B8; # GREEK CAPITAL THETA SYMBOL
+03F5; C; 03B5; # GREEK LUNATE EPSILON SYMBOL
+0400; C; 0450; # CYRILLIC CAPITAL LETTER IE WITH GRAVE
+0401; C; 0451; # CYRILLIC CAPITAL LETTER IO
+0402; C; 0452; # CYRILLIC CAPITAL LETTER DJE
+0403; C; 0453; # CYRILLIC CAPITAL LETTER GJE
+0404; C; 0454; # CYRILLIC CAPITAL LETTER UKRAINIAN IE
+0405; C; 0455; # CYRILLIC CAPITAL LETTER DZE
+0406; C; 0456; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+0407; C; 0457; # CYRILLIC CAPITAL LETTER YI
+0408; C; 0458; # CYRILLIC CAPITAL LETTER JE
+0409; C; 0459; # CYRILLIC CAPITAL LETTER LJE
+040A; C; 045A; # CYRILLIC CAPITAL LETTER NJE
+040B; C; 045B; # CYRILLIC CAPITAL LETTER TSHE
+040C; C; 045C; # CYRILLIC CAPITAL LETTER KJE
+040D; C; 045D; # CYRILLIC CAPITAL LETTER I WITH GRAVE
+040E; C; 045E; # CYRILLIC CAPITAL LETTER SHORT U
+040F; C; 045F; # CYRILLIC CAPITAL LETTER DZHE
+0410; C; 0430; # CYRILLIC CAPITAL LETTER A
+0411; C; 0431; # CYRILLIC CAPITAL LETTER BE
+0412; C; 0432; # CYRILLIC CAPITAL LETTER VE
+0413; C; 0433; # CYRILLIC CAPITAL LETTER GHE
+0414; C; 0434; # CYRILLIC CAPITAL LETTER DE
+0415; C; 0435; # CYRILLIC CAPITAL LETTER IE
+0416; C; 0436; # CYRILLIC CAPITAL LETTER ZHE
+0417; C; 0437; # CYRILLIC CAPITAL LETTER ZE
+0418; C; 0438; # CYRILLIC CAPITAL LETTER I
+0419; C; 0439; # CYRILLIC CAPITAL LETTER SHORT I
+041A; C; 043A; # CYRILLIC CAPITAL LETTER KA
+041B; C; 043B; # CYRILLIC CAPITAL LETTER EL
+041C; C; 043C; # CYRILLIC CAPITAL LETTER EM
+041D; C; 043D; # CYRILLIC CAPITAL LETTER EN
+041E; C; 043E; # CYRILLIC CAPITAL LETTER O
+041F; C; 043F; # CYRILLIC CAPITAL LETTER PE
+0420; C; 0440; # CYRILLIC CAPITAL LETTER ER
+0421; C; 0441; # CYRILLIC CAPITAL LETTER ES
+0422; C; 0442; # CYRILLIC CAPITAL LETTER TE
+0423; C; 0443; # CYRILLIC CAPITAL LETTER U
+0424; C; 0444; # CYRILLIC CAPITAL LETTER EF
+0425; C; 0445; # CYRILLIC CAPITAL LETTER HA
+0426; C; 0446; # CYRILLIC CAPITAL LETTER TSE
+0427; C; 0447; # CYRILLIC CAPITAL LETTER CHE
+0428; C; 0448; # CYRILLIC CAPITAL LETTER SHA
+0429; C; 0449; # CYRILLIC CAPITAL LETTER SHCHA
+042A; C; 044A; # CYRILLIC CAPITAL LETTER HARD SIGN
+042B; C; 044B; # CYRILLIC CAPITAL LETTER YERU
+042C; C; 044C; # CYRILLIC CAPITAL LETTER SOFT SIGN
+042D; C; 044D; # CYRILLIC CAPITAL LETTER E
+042E; C; 044E; # CYRILLIC CAPITAL LETTER YU
+042F; C; 044F; # CYRILLIC CAPITAL LETTER YA
+0460; C; 0461; # CYRILLIC CAPITAL LETTER OMEGA
+0462; C; 0463; # CYRILLIC CAPITAL LETTER YAT
+0464; C; 0465; # CYRILLIC CAPITAL LETTER IOTIFIED E
+0466; C; 0467; # CYRILLIC CAPITAL LETTER LITTLE YUS
+0468; C; 0469; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
+046A; C; 046B; # CYRILLIC CAPITAL LETTER BIG YUS
+046C; C; 046D; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
+046E; C; 046F; # CYRILLIC CAPITAL LETTER KSI
+0470; C; 0471; # CYRILLIC CAPITAL LETTER PSI
+0472; C; 0473; # CYRILLIC CAPITAL LETTER FITA
+0474; C; 0475; # CYRILLIC CAPITAL LETTER IZHITSA
+0476; C; 0477; # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT
+0478; C; 0479; # CYRILLIC CAPITAL LETTER UK
+047A; C; 047B; # CYRILLIC CAPITAL LETTER ROUND OMEGA
+047C; C; 047D; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
+047E; C; 047F; # CYRILLIC CAPITAL LETTER OT
+0480; C; 0481; # CYRILLIC CAPITAL LETTER KOPPA
+048A; C; 048B; # CYRILLIC CAPITAL LETTER SHORT I WITH TAIL
+048C; C; 048D; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN
+048E; C; 048F; # CYRILLIC CAPITAL LETTER ER WITH TICK
+0490; C; 0491; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN
+0492; C; 0493; # CYRILLIC CAPITAL LETTER GHE WITH STROKE
+0494; C; 0495; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
+0496; C; 0497; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
+0498; C; 0499; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
+049A; C; 049B; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER
+049C; C; 049D; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
+049E; C; 049F; # CYRILLIC CAPITAL LETTER KA WITH STROKE
+04A0; C; 04A1; # CYRILLIC CAPITAL LETTER BASHKIR KA
+04A2; C; 04A3; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER
+04A4; C; 04A5; # CYRILLIC CAPITAL LIGATURE EN GHE
+04A6; C; 04A7; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
+04A8; C; 04A9; # CYRILLIC CAPITAL LETTER ABKHASIAN HA
+04AA; C; 04AB; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER
+04AC; C; 04AD; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER
+04AE; C; 04AF; # CYRILLIC CAPITAL LETTER STRAIGHT U
+04B0; C; 04B1; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
+04B2; C; 04B3; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER
+04B4; C; 04B5; # CYRILLIC CAPITAL LIGATURE TE TSE
+04B6; C; 04B7; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
+04B8; C; 04B9; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
+04BA; C; 04BB; # CYRILLIC CAPITAL LETTER SHHA
+04BC; C; 04BD; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE
+04BE; C; 04BF; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
+04C1; C; 04C2; # CYRILLIC CAPITAL LETTER ZHE WITH BREVE
+04C3; C; 04C4; # CYRILLIC CAPITAL LETTER KA WITH HOOK
+04C5; C; 04C6; # CYRILLIC CAPITAL LETTER EL WITH TAIL
+04C7; C; 04C8; # CYRILLIC CAPITAL LETTER EN WITH HOOK
+04C9; C; 04CA; # CYRILLIC CAPITAL LETTER EN WITH TAIL
+04CB; C; 04CC; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
+04CD; C; 04CE; # CYRILLIC CAPITAL LETTER EM WITH TAIL
+04D0; C; 04D1; # CYRILLIC CAPITAL LETTER A WITH BREVE
+04D2; C; 04D3; # CYRILLIC CAPITAL LETTER A WITH DIAERESIS
+04D4; C; 04D5; # CYRILLIC CAPITAL LIGATURE A IE
+04D6; C; 04D7; # CYRILLIC CAPITAL LETTER IE WITH BREVE
+04D8; C; 04D9; # CYRILLIC CAPITAL LETTER SCHWA
+04DA; C; 04DB; # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS
+04DC; C; 04DD; # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS
+04DE; C; 04DF; # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS
+04E0; C; 04E1; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE
+04E2; C; 04E3; # CYRILLIC CAPITAL LETTER I WITH MACRON
+04E4; C; 04E5; # CYRILLIC CAPITAL LETTER I WITH DIAERESIS
+04E6; C; 04E7; # CYRILLIC CAPITAL LETTER O WITH DIAERESIS
+04E8; C; 04E9; # CYRILLIC CAPITAL LETTER BARRED O
+04EA; C; 04EB; # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS
+04EC; C; 04ED; # CYRILLIC CAPITAL LETTER E WITH DIAERESIS
+04EE; C; 04EF; # CYRILLIC CAPITAL LETTER U WITH MACRON
+04F0; C; 04F1; # CYRILLIC CAPITAL LETTER U WITH DIAERESIS
+04F2; C; 04F3; # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE
+04F4; C; 04F5; # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS
+04F8; C; 04F9; # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS
+0500; C; 0501; # CYRILLIC CAPITAL LETTER KOMI DE
+0502; C; 0503; # CYRILLIC CAPITAL LETTER KOMI DJE
+0504; C; 0505; # CYRILLIC CAPITAL LETTER KOMI ZJE
+0506; C; 0507; # CYRILLIC CAPITAL LETTER KOMI DZJE
+0508; C; 0509; # CYRILLIC CAPITAL LETTER KOMI LJE
+050A; C; 050B; # CYRILLIC CAPITAL LETTER KOMI NJE
+050C; C; 050D; # CYRILLIC CAPITAL LETTER KOMI SJE
+050E; C; 050F; # CYRILLIC CAPITAL LETTER KOMI TJE
+0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB
+0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN
+0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM
+0534; C; 0564; # ARMENIAN CAPITAL LETTER DA
+0535; C; 0565; # ARMENIAN CAPITAL LETTER ECH
+0536; C; 0566; # ARMENIAN CAPITAL LETTER ZA
+0537; C; 0567; # ARMENIAN CAPITAL LETTER EH
+0538; C; 0568; # ARMENIAN CAPITAL LETTER ET
+0539; C; 0569; # ARMENIAN CAPITAL LETTER TO
+053A; C; 056A; # ARMENIAN CAPITAL LETTER ZHE
+053B; C; 056B; # ARMENIAN CAPITAL LETTER INI
+053C; C; 056C; # ARMENIAN CAPITAL LETTER LIWN
+053D; C; 056D; # ARMENIAN CAPITAL LETTER XEH
+053E; C; 056E; # ARMENIAN CAPITAL LETTER CA
+053F; C; 056F; # ARMENIAN CAPITAL LETTER KEN
+0540; C; 0570; # ARMENIAN CAPITAL LETTER HO
+0541; C; 0571; # ARMENIAN CAPITAL LETTER JA
+0542; C; 0572; # ARMENIAN CAPITAL LETTER GHAD
+0543; C; 0573; # ARMENIAN CAPITAL LETTER CHEH
+0544; C; 0574; # ARMENIAN CAPITAL LETTER MEN
+0545; C; 0575; # ARMENIAN CAPITAL LETTER YI
+0546; C; 0576; # ARMENIAN CAPITAL LETTER NOW
+0547; C; 0577; # ARMENIAN CAPITAL LETTER SHA
+0548; C; 0578; # ARMENIAN CAPITAL LETTER VO
+0549; C; 0579; # ARMENIAN CAPITAL LETTER CHA
+054A; C; 057A; # ARMENIAN CAPITAL LETTER PEH
+054B; C; 057B; # ARMENIAN CAPITAL LETTER JHEH
+054C; C; 057C; # ARMENIAN CAPITAL LETTER RA
+054D; C; 057D; # ARMENIAN CAPITAL LETTER SEH
+054E; C; 057E; # ARMENIAN CAPITAL LETTER VEW
+054F; C; 057F; # ARMENIAN CAPITAL LETTER TIWN
+0550; C; 0580; # ARMENIAN CAPITAL LETTER REH
+0551; C; 0581; # ARMENIAN CAPITAL LETTER CO
+0552; C; 0582; # ARMENIAN CAPITAL LETTER YIWN
+0553; C; 0583; # ARMENIAN CAPITAL LETTER PIWR
+0554; C; 0584; # ARMENIAN CAPITAL LETTER KEH
+0555; C; 0585; # ARMENIAN CAPITAL LETTER OH
+0556; C; 0586; # ARMENIAN CAPITAL LETTER FEH
+0587; F; 0565 0582; # ARMENIAN SMALL LIGATURE ECH YIWN
+1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW
+1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE
+1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW
+1E06; C; 1E07; # LATIN CAPITAL LETTER B WITH LINE BELOW
+1E08; C; 1E09; # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
+1E0A; C; 1E0B; # LATIN CAPITAL LETTER D WITH DOT ABOVE
+1E0C; C; 1E0D; # LATIN CAPITAL LETTER D WITH DOT BELOW
+1E0E; C; 1E0F; # LATIN CAPITAL LETTER D WITH LINE BELOW
+1E10; C; 1E11; # LATIN CAPITAL LETTER D WITH CEDILLA
+1E12; C; 1E13; # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW
+1E14; C; 1E15; # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
+1E16; C; 1E17; # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
+1E18; C; 1E19; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW
+1E1A; C; 1E1B; # LATIN CAPITAL LETTER E WITH TILDE BELOW
+1E1C; C; 1E1D; # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
+1E1E; C; 1E1F; # LATIN CAPITAL LETTER F WITH DOT ABOVE
+1E20; C; 1E21; # LATIN CAPITAL LETTER G WITH MACRON
+1E22; C; 1E23; # LATIN CAPITAL LETTER H WITH DOT ABOVE
+1E24; C; 1E25; # LATIN CAPITAL LETTER H WITH DOT BELOW
+1E26; C; 1E27; # LATIN CAPITAL LETTER H WITH DIAERESIS
+1E28; C; 1E29; # LATIN CAPITAL LETTER H WITH CEDILLA
+1E2A; C; 1E2B; # LATIN CAPITAL LETTER H WITH BREVE BELOW
+1E2C; C; 1E2D; # LATIN CAPITAL LETTER I WITH TILDE BELOW
+1E2E; C; 1E2F; # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
+1E30; C; 1E31; # LATIN CAPITAL LETTER K WITH ACUTE
+1E32; C; 1E33; # LATIN CAPITAL LETTER K WITH DOT BELOW
+1E34; C; 1E35; # LATIN CAPITAL LETTER K WITH LINE BELOW
+1E36; C; 1E37; # LATIN CAPITAL LETTER L WITH DOT BELOW
+1E38; C; 1E39; # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
+1E3A; C; 1E3B; # LATIN CAPITAL LETTER L WITH LINE BELOW
+1E3C; C; 1E3D; # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW
+1E3E; C; 1E3F; # LATIN CAPITAL LETTER M WITH ACUTE
+1E40; C; 1E41; # LATIN CAPITAL LETTER M WITH DOT ABOVE
+1E42; C; 1E43; # LATIN CAPITAL LETTER M WITH DOT BELOW
+1E44; C; 1E45; # LATIN CAPITAL LETTER N WITH DOT ABOVE
+1E46; C; 1E47; # LATIN CAPITAL LETTER N WITH DOT BELOW
+1E48; C; 1E49; # LATIN CAPITAL LETTER N WITH LINE BELOW
+1E4A; C; 1E4B; # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW
+1E4C; C; 1E4D; # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
+1E4E; C; 1E4F; # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
+1E50; C; 1E51; # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
+1E52; C; 1E53; # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
+1E54; C; 1E55; # LATIN CAPITAL LETTER P WITH ACUTE
+1E56; C; 1E57; # LATIN CAPITAL LETTER P WITH DOT ABOVE
+1E58; C; 1E59; # LATIN CAPITAL LETTER R WITH DOT ABOVE
+1E5A; C; 1E5B; # LATIN CAPITAL LETTER R WITH DOT BELOW
+1E5C; C; 1E5D; # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
+1E5E; C; 1E5F; # LATIN CAPITAL LETTER R WITH LINE BELOW
+1E60; C; 1E61; # LATIN CAPITAL LETTER S WITH DOT ABOVE
+1E62; C; 1E63; # LATIN CAPITAL LETTER S WITH DOT BELOW
+1E64; C; 1E65; # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
+1E66; C; 1E67; # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
+1E68; C; 1E69; # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
+1E6A; C; 1E6B; # LATIN CAPITAL LETTER T WITH DOT ABOVE
+1E6C; C; 1E6D; # LATIN CAPITAL LETTER T WITH DOT BELOW
+1E6E; C; 1E6F; # LATIN CAPITAL LETTER T WITH LINE BELOW
+1E70; C; 1E71; # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW
+1E72; C; 1E73; # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
+1E74; C; 1E75; # LATIN CAPITAL LETTER U WITH TILDE BELOW
+1E76; C; 1E77; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW
+1E78; C; 1E79; # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
+1E7A; C; 1E7B; # LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS
+1E7C; C; 1E7D; # LATIN CAPITAL LETTER V WITH TILDE
+1E7E; C; 1E7F; # LATIN CAPITAL LETTER V WITH DOT BELOW
+1E80; C; 1E81; # LATIN CAPITAL LETTER W WITH GRAVE
+1E82; C; 1E83; # LATIN CAPITAL LETTER W WITH ACUTE
+1E84; C; 1E85; # LATIN CAPITAL LETTER W WITH DIAERESIS
+1E86; C; 1E87; # LATIN CAPITAL LETTER W WITH DOT ABOVE
+1E88; C; 1E89; # LATIN CAPITAL LETTER W WITH DOT BELOW
+1E8A; C; 1E8B; # LATIN CAPITAL LETTER X WITH DOT ABOVE
+1E8C; C; 1E8D; # LATIN CAPITAL LETTER X WITH DIAERESIS
+1E8E; C; 1E8F; # LATIN CAPITAL LETTER Y WITH DOT ABOVE
+1E90; C; 1E91; # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
+1E92; C; 1E93; # LATIN CAPITAL LETTER Z WITH DOT BELOW
+1E94; C; 1E95; # LATIN CAPITAL LETTER Z WITH LINE BELOW
+1E96; F; 0068 0331; # LATIN SMALL LETTER H WITH LINE BELOW
+1E97; F; 0074 0308; # LATIN SMALL LETTER T WITH DIAERESIS
+1E98; F; 0077 030A; # LATIN SMALL LETTER W WITH RING ABOVE
+1E99; F; 0079 030A; # LATIN SMALL LETTER Y WITH RING ABOVE
+1E9A; F; 0061 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING
+1E9B; C; 1E61; # LATIN SMALL LETTER LONG S WITH DOT ABOVE
+1EA0; C; 1EA1; # LATIN CAPITAL LETTER A WITH DOT BELOW
+1EA2; C; 1EA3; # LATIN CAPITAL LETTER A WITH HOOK ABOVE
+1EA4; C; 1EA5; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
+1EA6; C; 1EA7; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
+1EA8; C; 1EA9; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
+1EAA; C; 1EAB; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
+1EAC; C; 1EAD; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
+1EAE; C; 1EAF; # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
+1EB0; C; 1EB1; # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
+1EB2; C; 1EB3; # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
+1EB4; C; 1EB5; # LATIN CAPITAL LETTER A WITH BREVE AND TILDE
+1EB6; C; 1EB7; # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
+1EB8; C; 1EB9; # LATIN CAPITAL LETTER E WITH DOT BELOW
+1EBA; C; 1EBB; # LATIN CAPITAL LETTER E WITH HOOK ABOVE
+1EBC; C; 1EBD; # LATIN CAPITAL LETTER E WITH TILDE
+1EBE; C; 1EBF; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
+1EC0; C; 1EC1; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
+1EC2; C; 1EC3; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
+1EC4; C; 1EC5; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
+1EC6; C; 1EC7; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
+1EC8; C; 1EC9; # LATIN CAPITAL LETTER I WITH HOOK ABOVE
+1ECA; C; 1ECB; # LATIN CAPITAL LETTER I WITH DOT BELOW
+1ECC; C; 1ECD; # LATIN CAPITAL LETTER O WITH DOT BELOW
+1ECE; C; 1ECF; # LATIN CAPITAL LETTER O WITH HOOK ABOVE
+1ED0; C; 1ED1; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
+1ED2; C; 1ED3; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
+1ED4; C; 1ED5; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
+1ED6; C; 1ED7; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
+1ED8; C; 1ED9; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
+1EDA; C; 1EDB; # LATIN CAPITAL LETTER O WITH HORN AND ACUTE
+1EDC; C; 1EDD; # LATIN CAPITAL LETTER O WITH HORN AND GRAVE
+1EDE; C; 1EDF; # LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE
+1EE0; C; 1EE1; # LATIN CAPITAL LETTER O WITH HORN AND TILDE
+1EE2; C; 1EE3; # LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW
+1EE4; C; 1EE5; # LATIN CAPITAL LETTER U WITH DOT BELOW
+1EE6; C; 1EE7; # LATIN CAPITAL LETTER U WITH HOOK ABOVE
+1EE8; C; 1EE9; # LATIN CAPITAL LETTER U WITH HORN AND ACUTE
+1EEA; C; 1EEB; # LATIN CAPITAL LETTER U WITH HORN AND GRAVE
+1EEC; C; 1EED; # LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE
+1EEE; C; 1EEF; # LATIN CAPITAL LETTER U WITH HORN AND TILDE
+1EF0; C; 1EF1; # LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW
+1EF2; C; 1EF3; # LATIN CAPITAL LETTER Y WITH GRAVE
+1EF4; C; 1EF5; # LATIN CAPITAL LETTER Y WITH DOT BELOW
+1EF6; C; 1EF7; # LATIN CAPITAL LETTER Y WITH HOOK ABOVE
+1EF8; C; 1EF9; # LATIN CAPITAL LETTER Y WITH TILDE
+1F08; C; 1F00; # GREEK CAPITAL LETTER ALPHA WITH PSILI
+1F09; C; 1F01; # GREEK CAPITAL LETTER ALPHA WITH DASIA
+1F0A; C; 1F02; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA
+1F0B; C; 1F03; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA
+1F0C; C; 1F04; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA
+1F0D; C; 1F05; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA
+1F0E; C; 1F06; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI
+1F0F; C; 1F07; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI
+1F18; C; 1F10; # GREEK CAPITAL LETTER EPSILON WITH PSILI
+1F19; C; 1F11; # GREEK CAPITAL LETTER EPSILON WITH DASIA
+1F1A; C; 1F12; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA
+1F1B; C; 1F13; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA
+1F1C; C; 1F14; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA
+1F1D; C; 1F15; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
+1F28; C; 1F20; # GREEK CAPITAL LETTER ETA WITH PSILI
+1F29; C; 1F21; # GREEK CAPITAL LETTER ETA WITH DASIA
+1F2A; C; 1F22; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA
+1F2B; C; 1F23; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA
+1F2C; C; 1F24; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA
+1F2D; C; 1F25; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA
+1F2E; C; 1F26; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI
+1F2F; C; 1F27; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI
+1F38; C; 1F30; # GREEK CAPITAL LETTER IOTA WITH PSILI
+1F39; C; 1F31; # GREEK CAPITAL LETTER IOTA WITH DASIA
+1F3A; C; 1F32; # GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA
+1F3B; C; 1F33; # GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA
+1F3C; C; 1F34; # GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA
+1F3D; C; 1F35; # GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA
+1F3E; C; 1F36; # GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI
+1F3F; C; 1F37; # GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI
+1F48; C; 1F40; # GREEK CAPITAL LETTER OMICRON WITH PSILI
+1F49; C; 1F41; # GREEK CAPITAL LETTER OMICRON WITH DASIA
+1F4A; C; 1F42; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA
+1F4B; C; 1F43; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA
+1F4C; C; 1F44; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA
+1F4D; C; 1F45; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
+1F50; F; 03C5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI
+1F52; F; 03C5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
+1F54; F; 03C5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
+1F56; F; 03C5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
+1F59; C; 1F51; # GREEK CAPITAL LETTER UPSILON WITH DASIA
+1F5B; C; 1F53; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
+1F5D; C; 1F55; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
+1F5F; C; 1F57; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI
+1F68; C; 1F60; # GREEK CAPITAL LETTER OMEGA WITH PSILI
+1F69; C; 1F61; # GREEK CAPITAL LETTER OMEGA WITH DASIA
+1F6A; C; 1F62; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA
+1F6B; C; 1F63; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA
+1F6C; C; 1F64; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA
+1F6D; C; 1F65; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA
+1F6E; C; 1F66; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI
+1F6F; C; 1F67; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI
+1F80; F; 1F00 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
+1F81; F; 1F01 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI
+1F82; F; 1F02 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+1F83; F; 1F03 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+1F84; F; 1F04 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+1F85; F; 1F05 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+1F86; F; 1F06 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+1F87; F; 1F07 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+1F88; F; 1F00 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
+1F88; S; 1F80; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
+1F89; F; 1F01 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
+1F89; S; 1F81; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
+1F8A; F; 1F02 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1F8A; S; 1F82; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1F8B; F; 1F03 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1F8B; S; 1F83; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1F8C; F; 1F04 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1F8C; S; 1F84; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1F8D; F; 1F05 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1F8D; S; 1F85; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1F8E; F; 1F06 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1F8E; S; 1F86; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1F8F; F; 1F07 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1F8F; S; 1F87; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1F90; F; 1F20 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI
+1F91; F; 1F21 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI
+1F92; F; 1F22 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+1F93; F; 1F23 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+1F94; F; 1F24 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+1F95; F; 1F25 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+1F96; F; 1F26 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+1F97; F; 1F27 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+1F98; F; 1F20 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
+1F98; S; 1F90; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
+1F99; F; 1F21 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
+1F99; S; 1F91; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
+1F9A; F; 1F22 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1F9A; S; 1F92; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1F9B; F; 1F23 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1F9B; S; 1F93; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1F9C; F; 1F24 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1F9C; S; 1F94; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1F9D; F; 1F25 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1F9D; S; 1F95; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1F9E; F; 1F26 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1F9E; S; 1F96; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1F9F; F; 1F27 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1F9F; S; 1F97; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1FA0; F; 1F60 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI
+1FA1; F; 1F61 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI
+1FA2; F; 1F62 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+1FA3; F; 1F63 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+1FA4; F; 1F64 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+1FA5; F; 1F65 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+1FA6; F; 1F66 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+1FA7; F; 1F67 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+1FA8; F; 1F60 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
+1FA8; S; 1FA0; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
+1FA9; F; 1F61 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
+1FA9; S; 1FA1; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
+1FAA; F; 1F62 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1FAA; S; 1FA2; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1FAB; F; 1F63 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1FAB; S; 1FA3; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1FAC; F; 1F64 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1FAC; S; 1FA4; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1FAD; F; 1F65 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1FAD; S; 1FA5; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1FAE; F; 1F66 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1FAE; S; 1FA6; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1FAF; F; 1F67 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1FAF; S; 1FA7; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1FB2; F; 1F70 03B9; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
+1FB3; F; 03B1 03B9; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
+1FB4; F; 03AC 03B9; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
+1FB6; F; 03B1 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI
+1FB7; F; 03B1 0342 03B9; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
+1FB8; C; 1FB0; # GREEK CAPITAL LETTER ALPHA WITH VRACHY
+1FB9; C; 1FB1; # GREEK CAPITAL LETTER ALPHA WITH MACRON
+1FBA; C; 1F70; # GREEK CAPITAL LETTER ALPHA WITH VARIA
+1FBB; C; 1F71; # GREEK CAPITAL LETTER ALPHA WITH OXIA
+1FBC; F; 03B1 03B9; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
+1FBC; S; 1FB3; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
+1FBE; C; 03B9; # GREEK PROSGEGRAMMENI
+1FC2; F; 1F74 03B9; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
+1FC3; F; 03B7 03B9; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI
+1FC4; F; 03AE 03B9; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
+1FC6; F; 03B7 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI
+1FC7; F; 03B7 0342 03B9; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
+1FC8; C; 1F72; # GREEK CAPITAL LETTER EPSILON WITH VARIA
+1FC9; C; 1F73; # GREEK CAPITAL LETTER EPSILON WITH OXIA
+1FCA; C; 1F74; # GREEK CAPITAL LETTER ETA WITH VARIA
+1FCB; C; 1F75; # GREEK CAPITAL LETTER ETA WITH OXIA
+1FCC; F; 03B7 03B9; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
+1FCC; S; 1FC3; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
+1FD2; F; 03B9 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
+1FD3; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
+1FD6; F; 03B9 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI
+1FD7; F; 03B9 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
+1FD8; C; 1FD0; # GREEK CAPITAL LETTER IOTA WITH VRACHY
+1FD9; C; 1FD1; # GREEK CAPITAL LETTER IOTA WITH MACRON
+1FDA; C; 1F76; # GREEK CAPITAL LETTER IOTA WITH VARIA
+1FDB; C; 1F77; # GREEK CAPITAL LETTER IOTA WITH OXIA
+1FE2; F; 03C5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
+1FE3; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
+1FE4; F; 03C1 0313; # GREEK SMALL LETTER RHO WITH PSILI
+1FE6; F; 03C5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI
+1FE7; F; 03C5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
+1FE8; C; 1FE0; # GREEK CAPITAL LETTER UPSILON WITH VRACHY
+1FE9; C; 1FE1; # GREEK CAPITAL LETTER UPSILON WITH MACRON
+1FEA; C; 1F7A; # GREEK CAPITAL LETTER UPSILON WITH VARIA
+1FEB; C; 1F7B; # GREEK CAPITAL LETTER UPSILON WITH OXIA
+1FEC; C; 1FE5; # GREEK CAPITAL LETTER RHO WITH DASIA
+1FF2; F; 1F7C 03B9; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
+1FF3; F; 03C9 03B9; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
+1FF4; F; 03CE 03B9; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
+1FF6; F; 03C9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI
+1FF7; F; 03C9 0342 03B9; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
+1FF8; C; 1F78; # GREEK CAPITAL LETTER OMICRON WITH VARIA
+1FF9; C; 1F79; # GREEK CAPITAL LETTER OMICRON WITH OXIA
+1FFA; C; 1F7C; # GREEK CAPITAL LETTER OMEGA WITH VARIA
+1FFB; C; 1F7D; # GREEK CAPITAL LETTER OMEGA WITH OXIA
+1FFC; F; 03C9 03B9; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
+1FFC; S; 1FF3; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
+2126; C; 03C9; # OHM SIGN
+212A; C; 006B; # KELVIN SIGN
+212B; C; 00E5; # ANGSTROM SIGN
+2160; C; 2170; # ROMAN NUMERAL ONE
+2161; C; 2171; # ROMAN NUMERAL TWO
+2162; C; 2172; # ROMAN NUMERAL THREE
+2163; C; 2173; # ROMAN NUMERAL FOUR
+2164; C; 2174; # ROMAN NUMERAL FIVE
+2165; C; 2175; # ROMAN NUMERAL SIX
+2166; C; 2176; # ROMAN NUMERAL SEVEN
+2167; C; 2177; # ROMAN NUMERAL EIGHT
+2168; C; 2178; # ROMAN NUMERAL NINE
+2169; C; 2179; # ROMAN NUMERAL TEN
+216A; C; 217A; # ROMAN NUMERAL ELEVEN
+216B; C; 217B; # ROMAN NUMERAL TWELVE
+216C; C; 217C; # ROMAN NUMERAL FIFTY
+216D; C; 217D; # ROMAN NUMERAL ONE HUNDRED
+216E; C; 217E; # ROMAN NUMERAL FIVE HUNDRED
+216F; C; 217F; # ROMAN NUMERAL ONE THOUSAND
+24B6; C; 24D0; # CIRCLED LATIN CAPITAL LETTER A
+24B7; C; 24D1; # CIRCLED LATIN CAPITAL LETTER B
+24B8; C; 24D2; # CIRCLED LATIN CAPITAL LETTER C
+24B9; C; 24D3; # CIRCLED LATIN CAPITAL LETTER D
+24BA; C; 24D4; # CIRCLED LATIN CAPITAL LETTER E
+24BB; C; 24D5; # CIRCLED LATIN CAPITAL LETTER F
+24BC; C; 24D6; # CIRCLED LATIN CAPITAL LETTER G
+24BD; C; 24D7; # CIRCLED LATIN CAPITAL LETTER H
+24BE; C; 24D8; # CIRCLED LATIN CAPITAL LETTER I
+24BF; C; 24D9; # CIRCLED LATIN CAPITAL LETTER J
+24C0; C; 24DA; # CIRCLED LATIN CAPITAL LETTER K
+24C1; C; 24DB; # CIRCLED LATIN CAPITAL LETTER L
+24C2; C; 24DC; # CIRCLED LATIN CAPITAL LETTER M
+24C3; C; 24DD; # CIRCLED LATIN CAPITAL LETTER N
+24C4; C; 24DE; # CIRCLED LATIN CAPITAL LETTER O
+24C5; C; 24DF; # CIRCLED LATIN CAPITAL LETTER P
+24C6; C; 24E0; # CIRCLED LATIN CAPITAL LETTER Q
+24C7; C; 24E1; # CIRCLED LATIN CAPITAL LETTER R
+24C8; C; 24E2; # CIRCLED LATIN CAPITAL LETTER S
+24C9; C; 24E3; # CIRCLED LATIN CAPITAL LETTER T
+24CA; C; 24E4; # CIRCLED LATIN CAPITAL LETTER U
+24CB; C; 24E5; # CIRCLED LATIN CAPITAL LETTER V
+24CC; C; 24E6; # CIRCLED LATIN CAPITAL LETTER W
+24CD; C; 24E7; # CIRCLED LATIN CAPITAL LETTER X
+24CE; C; 24E8; # CIRCLED LATIN CAPITAL LETTER Y
+24CF; C; 24E9; # CIRCLED LATIN CAPITAL LETTER Z
+FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF
+FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI
+FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL
+FB03; F; 0066 0066 0069; # LATIN SMALL LIGATURE FFI
+FB04; F; 0066 0066 006C; # LATIN SMALL LIGATURE FFL
+FB05; F; 0073 0074; # LATIN SMALL LIGATURE LONG S T
+FB06; F; 0073 0074; # LATIN SMALL LIGATURE ST
+FB13; F; 0574 0576; # ARMENIAN SMALL LIGATURE MEN NOW
+FB14; F; 0574 0565; # ARMENIAN SMALL LIGATURE MEN ECH
+FB15; F; 0574 056B; # ARMENIAN SMALL LIGATURE MEN INI
+FB16; F; 057E 0576; # ARMENIAN SMALL LIGATURE VEW NOW
+FB17; F; 0574 056D; # ARMENIAN SMALL LIGATURE MEN XEH
+FF21; C; FF41; # FULLWIDTH LATIN CAPITAL LETTER A
+FF22; C; FF42; # FULLWIDTH LATIN CAPITAL LETTER B
+FF23; C; FF43; # FULLWIDTH LATIN CAPITAL LETTER C
+FF24; C; FF44; # FULLWIDTH LATIN CAPITAL LETTER D
+FF25; C; FF45; # FULLWIDTH LATIN CAPITAL LETTER E
+FF26; C; FF46; # FULLWIDTH LATIN CAPITAL LETTER F
+FF27; C; FF47; # FULLWIDTH LATIN CAPITAL LETTER G
+FF28; C; FF48; # FULLWIDTH LATIN CAPITAL LETTER H
+FF29; C; FF49; # FULLWIDTH LATIN CAPITAL LETTER I
+FF2A; C; FF4A; # FULLWIDTH LATIN CAPITAL LETTER J
+FF2B; C; FF4B; # FULLWIDTH LATIN CAPITAL LETTER K
+FF2C; C; FF4C; # FULLWIDTH LATIN CAPITAL LETTER L
+FF2D; C; FF4D; # FULLWIDTH LATIN CAPITAL LETTER M
+FF2E; C; FF4E; # FULLWIDTH LATIN CAPITAL LETTER N
+FF2F; C; FF4F; # FULLWIDTH LATIN CAPITAL LETTER O
+FF30; C; FF50; # FULLWIDTH LATIN CAPITAL LETTER P
+FF31; C; FF51; # FULLWIDTH LATIN CAPITAL LETTER Q
+FF32; C; FF52; # FULLWIDTH LATIN CAPITAL LETTER R
+FF33; C; FF53; # FULLWIDTH LATIN CAPITAL LETTER S
+FF34; C; FF54; # FULLWIDTH LATIN CAPITAL LETTER T
+FF35; C; FF55; # FULLWIDTH LATIN CAPITAL LETTER U
+FF36; C; FF56; # FULLWIDTH LATIN CAPITAL LETTER V
+FF37; C; FF57; # FULLWIDTH LATIN CAPITAL LETTER W
+FF38; C; FF58; # FULLWIDTH LATIN CAPITAL LETTER X
+FF39; C; FF59; # FULLWIDTH LATIN CAPITAL LETTER Y
+FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
+10400; C; 10428; # DESERET CAPITAL LETTER LONG I
+10401; C; 10429; # DESERET CAPITAL LETTER LONG E
+10402; C; 1042A; # DESERET CAPITAL LETTER LONG A
+10403; C; 1042B; # DESERET CAPITAL LETTER LONG AH
+10404; C; 1042C; # DESERET CAPITAL LETTER LONG O
+10405; C; 1042D; # DESERET CAPITAL LETTER LONG OO
+10406; C; 1042E; # DESERET CAPITAL LETTER SHORT I
+10407; C; 1042F; # DESERET CAPITAL LETTER SHORT E
+10408; C; 10430; # DESERET CAPITAL LETTER SHORT A
+10409; C; 10431; # DESERET CAPITAL LETTER SHORT AH
+1040A; C; 10432; # DESERET CAPITAL LETTER SHORT O
+1040B; C; 10433; # DESERET CAPITAL LETTER SHORT OO
+1040C; C; 10434; # DESERET CAPITAL LETTER AY
+1040D; C; 10435; # DESERET CAPITAL LETTER OW
+1040E; C; 10436; # DESERET CAPITAL LETTER WU
+1040F; C; 10437; # DESERET CAPITAL LETTER YEE
+10410; C; 10438; # DESERET CAPITAL LETTER H
+10411; C; 10439; # DESERET CAPITAL LETTER PEE
+10412; C; 1043A; # DESERET CAPITAL LETTER BEE
+10413; C; 1043B; # DESERET CAPITAL LETTER TEE
+10414; C; 1043C; # DESERET CAPITAL LETTER DEE
+10415; C; 1043D; # DESERET CAPITAL LETTER CHEE
+10416; C; 1043E; # DESERET CAPITAL LETTER JEE
+10417; C; 1043F; # DESERET CAPITAL LETTER KAY
+10418; C; 10440; # DESERET CAPITAL LETTER GAY
+10419; C; 10441; # DESERET CAPITAL LETTER EF
+1041A; C; 10442; # DESERET CAPITAL LETTER VEE
+1041B; C; 10443; # DESERET CAPITAL LETTER ETH
+1041C; C; 10444; # DESERET CAPITAL LETTER THEE
+1041D; C; 10445; # DESERET CAPITAL LETTER ES
+1041E; C; 10446; # DESERET CAPITAL LETTER ZEE
+1041F; C; 10447; # DESERET CAPITAL LETTER ESH
+10420; C; 10448; # DESERET CAPITAL LETTER ZHEE
+10421; C; 10449; # DESERET CAPITAL LETTER ER
+10422; C; 1044A; # DESERET CAPITAL LETTER EL
+10423; C; 1044B; # DESERET CAPITAL LETTER EM
+10424; C; 1044C; # DESERET CAPITAL LETTER EN
+10425; C; 1044D; # DESERET CAPITAL LETTER ENG
diff --git a/js/LICENSE b/js/LICENSE
new file mode 100644
index 0000000..bb8c36f
--- /dev/null
+++ b/js/LICENSE
@@ -0,0 +1,30 @@
+Copyright (c) 2014, John MacFarlane
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following
+ disclaimer in the documentation and/or other materials provided
+ with the distribution.
+
+ * Neither the name of John MacFarlane nor the names of other
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/js/bench.js b/js/bench.js
new file mode 100644
index 0000000..3c486b1
--- /dev/null
+++ b/js/bench.js
@@ -0,0 +1,35 @@
+var Benchmark = require('benchmark').Benchmark;
+var suite = new Benchmark.Suite;
+var fs = require('fs');
+var sm = require('./stmd');
+// https://github.com/coreyti/showdown
+var showdown = require('../../showdown/src/showdown');
+// https://github.com/chjj/marked
+var marked = require('../../marked/marked.min.js');
+
+var benchfile = process.argv[2] || 'oldtests/Original/Markdown_Documentation_Syntax.markdown';
+
+var contents = fs.readFileSync(benchfile, 'utf8');
+
+// var converter = new showdown.converter();
+
+suite.add('stmd markdown->html', function() {
+ var doc = new sm.DocParser().parse(contents);
+ var renderer = new sm.HtmlRenderer();
+ renderer.renderBlock(doc);
+})
+
+.add('showdown.js markdown->html', function() {
+ var converter = new showdown.converter();
+ converter.makeHtml(contents);
+})
+
+.add('marked.js markdown->html', function() {
+ marked(contents);
+})
+
+.on('cycle', function(event) {
+ console.log(String(event.target));
+})
+.run();
+
diff --git a/js/markdown b/js/markdown
new file mode 100755
index 0000000..05a372a
--- /dev/null
+++ b/js/markdown
@@ -0,0 +1,15 @@
+#!/usr/bin/env node
+var fs = require('fs');
+var util = require('util');
+var stmd = require('./stmd');
+
+file = process.argv[2] || '/dev/stdin';
+
+fs.readFile(file, 'utf8', function(err, data) {
+ if (err) {
+ return console.log(err);
+ }
+ var parser = new stmd.DocParser();
+ var renderer = new stmd.HtmlRenderer();
+ console.log(renderer.render(parser.parse(data)));
+});
diff --git a/js/stmd.js b/js/stmd.js
new file mode 100755
index 0000000..399d58d
--- /dev/null
+++ b/js/stmd.js
@@ -0,0 +1,1540 @@
+// stmd.js - "standard markdown" in javascript
+// Copyright (C) 2014 John MacFarlane
+// License: BSD3.
+
+// Basic usage:
+//
+// var stmd = require('stmd');
+// var parser = new stmd.DocParser();
+// var renderer = new stmd.HtmlRenderer();
+// console.log(renderer.render(parser.parse('Hello *world*')));
+
+(function(exports) {
+
+// Some regexps used in inline parser:
+
+var ESCAPABLE = '[!"#$%&\'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]';
+var ESCAPED_CHAR = '\\\\' + ESCAPABLE;
+var IN_DOUBLE_QUOTES = '"(' + ESCAPED_CHAR + '|[^"\\x00])*"';
+var IN_SINGLE_QUOTES = '\'(' + ESCAPED_CHAR + '|[^\'\\x00])*\'';
+var IN_PARENS = '\\((' + ESCAPED_CHAR + '|[^)\\x00])*\\)';
+var REG_CHAR = '[^\\\\()\\x00-\\x20]';
+var IN_PARENS_NOSP = '\\((' + REG_CHAR + '|' + ESCAPED_CHAR + ')*\\)';
+var TAGNAME = '[A-Za-z][A-Za-z0-9]*';
+var BLOCKTAGNAME = '(?:article|header|aside|hgroup|blockquote|hr|body|li|br|map|button|object|canvas|ol|caption|output|col|p|colgroup|pre|dd|progress|div|section|dl|table|td|dt|tbody|embed|textarea|fieldset|tfoot|figcaption|th|figure|thead|footer|footer|tr|form|ul|h1|h2|h3|h4|h5|h6|video|script|style)';
+var ATTRIBUTENAME = '[a-zA-Z_:][a-zA-Z0-9:._-]*';
+var UNQUOTEDVALUE = "[^\"'=<>`\\x00-\\x20]+";
+var SINGLEQUOTEDVALUE = "'[^']*'";
+var DOUBLEQUOTEDVALUE = '"[^"]*"';
+var ATTRIBUTEVALUE = "(?:" + UNQUOTEDVALUE + "|" + SINGLEQUOTEDVALUE + "|" + DOUBLEQUOTEDVALUE + ")";
+var ATTRIBUTEVALUESPEC = "(?:" + "\\s*=" + "\\s*" + ATTRIBUTEVALUE + ")";
+var ATTRIBUTE = "(?:" + "\\s+" + ATTRIBUTENAME + ATTRIBUTEVALUESPEC + "?)";
+var OPENTAG = "<" + TAGNAME + ATTRIBUTE + "*" + "\\s*/?>";
+var CLOSETAG = "</" + TAGNAME + "\\s*[>]";
+var OPENBLOCKTAG = "<" + BLOCKTAGNAME + ATTRIBUTE + "*" + "\\s*/?>";
+var CLOSEBLOCKTAG = "</" + BLOCKTAGNAME + "\\s*[>]";
+var HTMLCOMMENT = "<!--([^-]+|[-][^-]+)*-->";
+var PROCESSINGINSTRUCTION = "[<][?].*?[?][>]";
+var DECLARATION = "<![A-Z]+" + "\\s+[^>]*>";
+var CDATA = "<!\\[CDATA\\[([^\\]]+|\\][^\\]]|\\]\\][^>])*\\]\\]>";
+var HTMLTAG = "(?:" + OPENTAG + "|" + CLOSETAG + "|" + HTMLCOMMENT + "|" +
+ PROCESSINGINSTRUCTION + "|" + DECLARATION + "|" + CDATA + ")";
+var HTMLBLOCKOPEN = "<(?:" + BLOCKTAGNAME + "[\\s/>]" + "|" +
+ "/" + BLOCKTAGNAME + "[\\s>]" + "|" + "[?!])";
+
+var reHtmlTag = new RegExp('^' + HTMLTAG, 'i');
+
+var reHtmlBlockOpen = new RegExp('^' + HTMLBLOCKOPEN, 'i');
+
+var reLinkTitle = new RegExp(
+ '^(?:"(' + ESCAPED_CHAR + '|[^"\\x00])*"' +
+ '|' +
+ '\'(' + ESCAPED_CHAR + '|[^\'\\x00])*\'' +
+ '|' +
+ '\\((' + ESCAPED_CHAR + '|[^)\\x00])*\\))');
+
+var reLinkDestinationBraces = new RegExp(
+ '[<](?:[^<>\\n\\\\\\x00]' + '|' + ESCAPED_CHAR + '|' + '\\\\)*[>]');
+
+var reLinkDestination = new RegExp(
+ '(?:' + REG_CHAR + '+|' + ESCAPED_CHAR + '|' + IN_PARENS_NOSP + ')*');
+
+var reEscapable = new RegExp(ESCAPABLE);
+
+var reAllEscapedChar = new RegExp('\\\\(' + ESCAPABLE + ')', 'g');
+
+var reEscapedChar = new RegExp('^\\\\(' + ESCAPABLE + ')');
+
+var reAllTab = /\t/g;
+
+var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/;
+
+// Matches a character with a special meaning in markdown,
+// or a string of non-special characters.
+var reMain = /[\n`\[\]\\!<&*_]|[^\n`\[\]\\!<&*_]+/m;
+
+// UTILITY FUNCTIONS
+
+// Replace backslash escapes with literal characters.
+var unescape = function(s) {
+ return s.replace(reAllEscapedChar, '$1');
+};
+
+// Returns true if string contains only space characters.
+var isBlank = function(s) {
+ return /^\s*$/.test(s);
+};
+
+// Normalize reference label: collapse internal whitespace
+// to single space, remove leading/trailing whitespace, case fold.
+var normalizeReference = function(s) {
+ return s.trim()
+ .replace(/\s+/,' ')
+ .toUpperCase();
+};
+
+// Attempt to match a regex in string s at offset offset.
+// Return index of match or null.
+var matchAt = function(re, s, offset) {
+ var res = s.slice(offset).match(re);
+ if (res) {
+ return offset + res.index;
+ } else {
+ return null;
+ }
+};
+
+// Convert tabs to spaces on each line using a 4-space tab stop.
+var detabLine = function(text) {
+ if (text.indexOf('\t') == -1) {
+ return text;
+ } else {
+ var lastStop = 0;
+ return text.replace(reAllTab, function(match, offset) {
+ var result = ' '.slice((offset - lastStop) % 4);
+ lastStop = offset + 1;
+ return result;
+ });
+ }
+};
+
+// INLINE PARSER
+
+// These are methods of an InlineParser object, defined below.
+// An InlineParser keeps track of a subject (a string to be
+// parsed) and a position in that subject.
+
+// If re matches at current position in the subject, advance
+// position in subject and return the match; otherwise return null.
+var match = function(re) {
+ var match = re.exec(this.subject.slice(this.pos));
+ if (match) {
+ this.pos += match.index + match[0].length;
+ return match[0];
+ } else {
+ return null;
+ }
+};
+
+// Returns the character at the current subject position, or null if
+// there are no more characters.
+var peek = function() {
+ return this.subject[this.pos] || null;
+};
+
+// Parse zero or more space characters, including at most one newline
+var spnl = function() {
+ this.match(/^ *(?:\n *)?/);
+ return 1;
+};
+
+// All of the parsers below try to match something at the current position
+// in the subject. If they succeed in matching anything, they
+// push an inline element onto the 'inlines' list. They return the
+// number of characters parsed (possibly 0).
+
+// Attempt to parse backticks, adding either a backtick code span or a
+// literal sequence of backticks to the 'inlines' list.
+var parseBackticks = function(inlines) {
+ var startpos = this.pos;
+ var ticks = this.match(/^`+/);
+ if (!ticks) {
+ return 0;
+ }
+ var afterOpenTicks = this.pos;
+ var foundCode = false;
+ var match;
+ while (!foundCode && (match = this.match(/`+/m))) {
+ if (match == ticks) {
+ inlines.push({ t: 'Code', c: this.subject.slice(afterOpenTicks,
+ this.pos - ticks.length)
+ .replace(/[ \n]+/g,' ')
+ .trim() });
+ return (this.pos - startpos);
+ }
+ }
+ // If we got here, we didn't match a closing backtick sequence.
+ inlines.push({ t: 'Str', c: ticks });
+ this.pos = afterOpenTicks;
+ return (this.pos - startpos);
+};
+
+// Parse a backslash-escaped special character, adding either the escaped
+// character, a hard line break (if the backslash is followed by a newline),
+// or a literal backslash to the 'inlines' list.
+var parseEscaped = function(inlines) {
+ var subj = this.subject,
+ pos = this.pos;
+ if (subj[pos] === '\\') {
+ if (subj[pos + 1] === '\n') {
+ inlines.push({ t: 'Hardbreak' });
+ this.pos = this.pos + 2;
+ return 2;
+ } else if (reEscapable.test(subj[pos + 1])) {
+ inlines.push({ t: 'Str', c: subj[pos + 1] });
+ this.pos = this.pos + 2;
+ return 2;
+ } else {
+ this.pos++;
+ inlines.push({t: 'Str', c: '\\'});
+ return 1;
+ }
+ } else {
+ return 0;
+ }
+};
+
+// Attempt to parse an autolink (URL or email in pointy brackets).
+var parseAutolink = function(inlines) {
+ var m;
+ var dest;
+ if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink
+ dest = m.slice(1,-1);
+ inlines.push({ t: 'Link', label: [{ t: 'Str', c: dest }],
+ destination: 'mailto:' + dest });
+ return m.length;
+ } else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) {
+ dest = m.slice(1,-1);
+ inlines.push({ t: 'Link', label: [{ t: 'Str', c: dest }],
+ destination: dest });
+ return m.length;
+ } else {
+ return 0;
+ }
+};
+
+// Attempt to parse a raw HTML tag.
+var parseHtmlTag = function(inlines) {
+ var m = this.match(reHtmlTag);
+ if (m) {
+ inlines.push({ t: 'Html', c: m });
+ return m.length;
+ } else {
+ return 0;
+ }
+};
+
+// Scan a sequence of characters == c, and return information about
+// the number of delimiters and whether they are positioned such that
+// they can open and/or close emphasis or strong emphasis. A utility
+// function for strong/emph parsing.
+var scanDelims = function(c) {
+ var numdelims = 0;
+ var first_close_delims = 0;
+ var char_before, char_after;
+ var startpos = this.pos;
+
+ char_before = this.pos === 0 ? '\n' :
+ this.subject[this.pos - 1];
+
+ while (this.peek() === c) {
+ numdelims++;
+ this.pos++;
+ }
+
+ char_after = this.peek() || '\n';
+
+ var can_open = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_after));
+ var can_close = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_before));
+ if (c === '_') {
+ can_open = can_open && !((/[a-z0-9]/i).test(char_before));
+ can_close = can_close && !((/[a-z0-9]/i).test(char_after));
+ }
+ this.pos = startpos;
+ return { numdelims: numdelims,
+ can_open: can_open,
+ can_close: can_close };
+};
+
+// Attempt to parse emphasis or strong emphasis in an efficient way,
+// with no backtracking.
+var parseEmphasis = function(inlines) {
+ var startpos = this.pos;
+ var c ;
+ var first_close = 0;
+ var nxt = this.peek();
+ if (nxt == '*' || nxt == '_') {
+ c = nxt;
+ } else {
+ return 0;
+ }
+
+ var numdelims;
+ var delimpos;
+
+ // Get opening delimiters.
+ res = this.scanDelims(c);
+ numdelims = res.numdelims;
+ this.pos += numdelims;
+ // We provisionally add a literal string. If we match appropriate
+ // closing delimiters, we'll change this to Strong or Emph.
+ inlines.push({t: 'Str',
+ c: this.subject.substr(this.pos - numdelims, numdelims)});
+ // Record the position of this opening delimiter:
+ delimpos = inlines.length - 1;
+
+ if (!res.can_open || numdelims === 0) {
+ return 0;
+ }
+
+ var first_close_delims = 0;
+
+ switch (numdelims) {
+ case 1: // we started with * or _
+ while (true) {
+ res = this.scanDelims(c);
+ if (res.numdelims >= 1 && res.can_close) {
+ this.pos += 1;
+ // Convert the inline at delimpos, currently a string with the delim,
+ // into an Emph whose contents are the succeeding inlines
+ inlines[delimpos].t = 'Emph';
+ inlines[delimpos].c = inlines.slice(delimpos + 1);
+ inlines.splice(delimpos + 1);
+ break;
+ } else {
+ if (this.parseInline(inlines) === 0) {
+ break;
+ }
+ }
+ }
+ return (this.pos - startpos);
+
+ case 2: // We started with ** or __
+ while (true) {
+ res = this.scanDelims(c);
+ if (res.numdelims >= 2 && res.can_close) {
+ this.pos += 2;
+ inlines[delimpos].t = 'Strong';
+ inlines[delimpos].c = inlines.slice(delimpos + 1);
+ inlines.splice(delimpos + 1);
+ break;
+ } else {
+ if (this.parseInline(inlines) === 0) {
+ break;
+ }
+ }
+ }
+ return (this.pos - startpos);
+
+ case 3: // We started with *** or ___
+ while (true) {
+ res = this.scanDelims(c);
+ if (res.numdelims >= 1 && res.numdelims <= 3 && res.can_close &&
+ res.numdelims != first_close_delims) {
+ if (res.numdelims === 3) {
+ // If we opened with ***, then we interpret *** as * followed by **
+ // giving us <strong><em>
+ res.numdelims = 1;
+ }
+ this.pos += res.numdelims;
+
+ if (first_close > 0) { // if we've already passed the first closer:
+ inlines[delimpos].t = first_close_delims === 1 ? 'Strong' : 'Emph';
+ inlines[delimpos].c = [
+ { t: first_close_delims === 1 ? 'Emph' : 'Strong',
+ c: inlines.slice(delimpos + 1, first_close)}
+ ].concat(inlines.slice(first_close + 1));
+ inlines.splice(delimpos + 1);
+ break;
+ } else { // this is the first closer; for now, add literal string;
+ // we'll change this when he hit the second closer
+ inlines.push({t: 'Str',
+ c: this.subject.slice(this.pos - res.numdelims,
+ this.pos) });
+ first_close = inlines.length - 1;
+ first_close_delims = res.numdelims;
+ }
+ } else { // parse another inline element, til we hit the end
+ if (this.parseInline(inlines) === 0) {
+ break;
+ }
+ }
+ }
+ return (this.pos - startpos);
+
+ default:
+ return result;
+ }
+
+ return 0;
+};
+
+// Attempt to parse link title (sans quotes), returning the string
+// or null if no match.
+var parseLinkTitle = function() {
+ title = this.match(reLinkTitle);
+ if (title) {
+ // chop off quotes from title and unescape:
+ return unescape(title.substr(1, title.length - 2));
+ } else {
+ return null;
+ }
+};
+
+// Attempt to parse link destination, returning the string or
+// null if no match.
+var parseLinkDestination = function() {
+ var res = this.match(reLinkDestinationBraces);
+ if (res) { // chop off surrounding <..>:
+ return unescape(res.substr(1, res.length - 2));
+ } else {
+ res = this.match(reLinkDestination);
+ if (res !== null) {
+ return unescape(res);
+ } else {
+ return null;
+ }
+ }
+};
+
+// Attempt to parse a link label, returning number of characters parsed.
+var parseLinkLabel = function() {
+ if (this.peek() != '[') {
+ return 0;
+ }
+ var startpos = this.pos;
+ var nest_level = 0;
+ if (this.label_nest_level > 0) {
+ // If we've already checked to the end of this subject
+ // for a label, even with a different starting [, we
+ // know we won't find one here and we can just return.
+ // This avoids lots of backtracking.
+ // Note: nest level 1 would be: [foo [bar]
+ // nest level 2 would be: [foo [bar [baz]
+ this.label_nest_level--;
+ return 0;
+ }
+ this.pos++; // advance past [
+ var c;
+ while ((c = this.peek()) && (c != ']' || nest_level > 0)) {
+ switch (c) {
+ case '`':
+ this.parseBackticks([]);
+ break;
+ case '<':
+ this.parseAutolink([]) || this.parseHtmlTag([]) || this.parseString([]);
+ break;
+ case '[': // nested []
+ nest_level++;
+ this.pos++;
+ break;
+ case ']': // nested []
+ nest_level--;
+ this.pos++;
+ break;
+ case '\\':
+ this.parseEscaped([]);
+ break;
+ default:
+ this.parseString([]);
+ }
+ }
+ if (c === ']') {
+ this.label_nest_level = 0;
+ this.pos++; // advance past ]
+ return this.pos - startpos;
+ } else {
+ if (!c) {
+ this.label_nest_level = nest_level;
+ }
+ this.pos = startpos;
+ return 0;
+ }
+};
+
+// Parse raw link label, including surrounding [], and return
+// inline contents. (Note: this is not a method of InlineParser.)
+var parseRawLabel = function(s) {
+ // note: parse without a refmap; we don't want links to resolve
+ // in nested brackets!
+ return new InlineParser().parse(s.substr(1, s.length - 2), {});
+};
+
+// Attempt to parse a link. If successful, add the link to
+// inlines.
+var parseLink = function(inlines) {
+ var startpos = this.pos;
+ var reflabel;
+ var n;
+ var dest;
+ var title;
+
+ n = this.parseLinkLabel();
+ if (n === 0) {
+ return 0;
+ }
+ var afterlabel = this.pos;
+ var rawlabel = this.subject.substr(startpos, n);
+
+ // if we got this far, we've parsed a label.
+ // Try to parse an explicit link: [label](url "title")
+ if (this.peek() == '(') {
+ this.pos++;
+ if (this.spnl() &&
+ ((dest = this.parseLinkDestination()) !== null) &&
+ this.spnl() &&
+ // make sure there's a space before the title:
+ (/^\s/.test(this.subject[this.pos - 1]) &&
+ (title = this.parseLinkTitle() || '') || true) &&
+ this.spnl() &&
+ this.match(/^\)/)) {
+ inlines.push({ t: 'Link',
+ destination: dest,
+ title: title,
+ label: parseRawLabel(rawlabel) });
+ return this.pos - startpos;
+ } else {
+ this.pos = startpos;
+ return 0;
+ }
+ }
+ // If we're here, it wasn't an explicit link. Try to parse a reference link.
+ // first, see if there's another label
+ var savepos = this.pos;
+ this.spnl();
+ var beforelabel = this.pos;
+ n = this.parseLinkLabel();
+ if (n == 2) {
+ // empty second label
+ reflabel = rawlabel;
+ } else if (n > 0) {
+ reflabel = this.subject.slice(beforelabel, beforelabel + n);
+ } else {
+ this.pos = savepos;
+ reflabel = rawlabel;
+ }
+ // lookup rawlabel in refmap
+ var link = this.refmap[normalizeReference(reflabel)];
+ if (link) {
+ inlines.push({t: 'Link',
+ destination: link.destination,
+ title: link.title,
+ label: parseRawLabel(rawlabel) });
+ return this.pos - startpos;
+ } else {
+ this.pos = startpos;
+ return 0;
+ }
+ // Nothing worked, rewind:
+ this.pos = startpos;
+ return 0;
+};
+
+// Attempt to parse an entity, adding to inlines if successful.
+var parseEntity = function(inlines) {
+ var m;
+ if ((m = this.match(/^&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});/i))) {
+ inlines.push({ t: 'Entity', c: m });
+ return m.length;
+ } else {
+ return 0;
+ }
+};
+
+// Parse a run of ordinary characters, or a single character with
+// a special meaning in markdown, as a plain string, adding to inlines.
+var parseString = function(inlines) {
+ var m;
+ if ((m = this.match(reMain))) {
+ inlines.push({ t: 'Str', c: m });
+ return m.length;
+ } else {
+ return 0;
+ }
+};
+
+// Parse a newline. If it was preceded by two spaces, return a hard
+// line break; otherwise a soft line break.
+var parseNewline = function(inlines) {
+ if (this.peek() == '\n') {
+ this.pos++;
+ var last = inlines[inlines.length - 1];
+ if (last && last.t == 'Str' && last.c.slice(-2) == ' ') {
+ last.c = last.c.replace(/ *$/,'');
+ inlines.push({ t: 'Hardbreak' });
+ } else {
+ if (last && last.t == 'Str' && last.c.slice(-1) == ' ') {
+ last.c = last.c.slice(0, -1);
+ }
+ inlines.push({ t: 'Softbreak' });
+ }
+ return 1;
+ } else {
+ return 0;
+ }
+};
+
+// Attempt to parse an image. If the opening '!' is not followed
+// by a link, add a literal '!' to inlines.
+var parseImage = function(inlines) {
+ if (this.match(/^!/)) {
+ var n = this.parseLink(inlines);
+ if (n === 0) {
+ inlines.push({ t: 'Str', c: '!' });
+ return 1;
+ } else if (inlines[inlines.length - 1] &&
+ inlines[inlines.length - 1].t == 'Link') {
+ inlines[inlines.length - 1].t = 'Image';
+ return n+1;
+ } else {
+ throw "Shouldn't happen";
+ }
+ } else {
+ return 0;
+ }
+};
+
+// Attempt to parse a link reference, modifying refmap.
+var parseReference = function(s, refmap) {
+ this.subject = s;
+ this.pos = 0;
+ var rawlabel;
+ var dest;
+ var title;
+ var matchChars;
+ var startpos = this.pos;
+ var match;
+
+ // label:
+ matchChars = this.parseLinkLabel();
+ if (matchChars === 0) {
+ return 0;
+ } else {
+ rawlabel = this.subject.substr(0, matchChars);
+ }
+
+ // colon:
+ if (this.peek() === ':') {
+ this.pos++;
+ } else {
+ this.pos = startpos;
+ return 0;
+ }
+
+ // link url
+ this.spnl();
+
+ dest = this.parseLinkDestination();
+ if (dest === null || dest.length === 0) {
+ this.pos = startpos;
+ return 0;
+ }
+
+ var beforetitle = this.pos;
+ this.spnl();
+ title = this.parseLinkTitle();
+ if (title === null) {
+ title = '';
+ // rewind before spaces
+ this.pos = beforetitle;
+ }
+
+ // make sure we're at line end:
+ if (this.match(/^ *(?:\n|$)/) === null) {
+ this.pos = startpos;
+ return 0;
+ }
+
+ var normlabel = normalizeReference(rawlabel);
+
+ if (!refmap[normlabel]) {
+ refmap[normlabel] = { destination: dest, title: title };
+ }
+ return this.pos - startpos;
+};
+
+// Parse the next inline element in subject, advancing subject position
+// and adding the result to 'inlines'.
+var parseInline = function(inlines) {
+ var c = this.peek();
+ var res;
+ switch(c) {
+ case '\n':
+ res = this.parseNewline(inlines);
+ break;
+ case '\\':
+ res = this.parseEscaped(inlines);
+ break;
+ case '`':
+ res = this.parseBackticks(inlines);
+ break;
+ case '*':
+ case '_':
+ res = this.parseEmphasis(inlines);
+ break;
+ case '[':
+ res = this.parseLink(inlines);
+ break;
+ case '!':
+ res = this.parseImage(inlines);
+ break;
+ case '<':
+ res = this.parseAutolink(inlines) ||
+ this.parseHtmlTag(inlines);
+ break;
+ case '&':
+ res = this.parseEntity(inlines);
+ break;
+ default:
+ }
+ return res || this.parseString(inlines);
+};
+
+// Parse s as a list of inlines, using refmap to resolve references.
+var parseInlines = function(s, refmap) {
+ this.subject = s;
+ this.pos = 0;
+ this.refmap = refmap || {};
+ var inlines = [];
+ while (this.parseInline(inlines)) ;
+ return inlines;
+};
+
+// The InlineParser object.
+function InlineParser(){
+ return {
+ subject: '',
+ label_nest_level: 0, // used by parseLinkLabel method
+ pos: 0,
+ refmap: {},
+ match: match,
+ peek: peek,
+ spnl: spnl,
+ parseBackticks: parseBackticks,
+ parseEscaped: parseEscaped,
+ parseAutolink: parseAutolink,
+ parseHtmlTag: parseHtmlTag,
+ scanDelims: scanDelims,
+ parseEmphasis: parseEmphasis,
+ parseLinkTitle: parseLinkTitle,
+ parseLinkDestination: parseLinkDestination,
+ parseLinkLabel: parseLinkLabel,
+ parseLink: parseLink,
+ parseEntity: parseEntity,
+ parseString: parseString,
+ parseNewline: parseNewline,
+ parseImage: parseImage,
+ parseReference: parseReference,
+ parseInline: parseInline,
+ parse: parseInlines,
+ };
+}
+
+// DOC PARSER
+
+// These are methods of a DocParser object, defined below.
+
+var makeBlock = function(tag, start_line, start_column) {
+ return { t: tag,
+ open: true,
+ last_line_blank: false,
+ start_line: start_line,
+ start_column: start_column,
+ end_line: start_line,
+ children: [],
+ parent: null,
+ // string_content is formed by concatenating strings, in finalize:
+ string_content: "",
+ strings: [],
+ inline_content: [],
+ };
+};
+
+// Returns true if parent block can contain child block.
+var canContain = function(parent_type, child_type) {
+ return ( parent_type == 'Document' ||
+ parent_type == 'BlockQuote' ||
+ parent_type == 'ListItem' ||
+ (parent_type == 'List' && child_type == 'ListItem') );
+};
+
+// Returns true if block type can accept lines of text.
+var acceptsLines = function(block_type) {
+ return ( block_type == 'Paragraph' ||
+ block_type == 'IndentedCode' ||
+ block_type == 'FencedCode' );
+};
+
+// Returns true if block ends with a blank line, descending if needed
+// into lists and sublists.
+var endsWithBlankLine = function(block) {
+ if (block.last_line_blank) {
+ return true;
+ }
+ if ((block.t == 'List' || block.t == 'ListItem') && block.children.length > 0) {
+ return endsWithBlankLine(block.children[block.children.length - 1]);
+ } else {
+ return false;
+ }
+};
+
+// Break out of all containing lists, resetting the tip of the
+// document to the parent of the highest list, and finalizing
+// all the lists. (This is used to implement the "two blank lines
+// break of of all lists" feature.)
+var breakOutOfLists = function(block, line_number) {
+ var b = block;
+ var last_list = null;
+ do {
+ if (b.t === 'List') {
+ last_list = b;
+ }
+ b = b.parent;
+ } while (b);
+
+ if (last_list) {
+ while (block != last_list) {
+ this.finalize(block, line_number);
+ block = block.parent;
+ }
+ this.finalize(last_list, line_number);
+ this.tip = last_list.parent;
+ }
+};
+
+// Add a line to the block at the tip. We assume the tip
+// can accept lines -- that check should be done before calling this.
+var addLine = function(ln, offset) {
+ var s = ln.slice(offset);
+ if (!(this.tip.open)) {
+ throw({ msg: "Attempted to add line (" + ln + ") to closed container." });
+ }
+ this.tip.strings.push(s);
+};
+
+// Add block of type tag as a child of the tip. If the tip can't
+// accept children, close and finalize it and try its parent,
+// and so on til we find a block that can accept children.
+var addChild = function(tag, line_number, offset) {
+ while (!canContain(this.tip.t, tag)) {
+ this.finalize(this.tip, line_number);
+ }
+
+ var column_number = offset + 1; // offset 0 = column 1
+ var newBlock = makeBlock(tag, line_number, column_number);
+ this.tip.children.push(newBlock);
+ newBlock.parent = this.tip;
+ this.tip = newBlock;
+ return newBlock;
+};
+
+// Parse a list marker and return data on the marker (type,
+// start, delimiter, bullet character, padding) or null.
+var parseListMarker = function(ln, offset) {
+ var rest = ln.slice(offset);
+ var match;
+ var spaces_after_marker;
+ var data = {};
+ if (rest.match(reHrule)) {
+ return null;
+ }
+ if ((match = rest.match(/^[*+-]( +|$)/))) {
+ spaces_after_marker = match[1].length;
+ data.type = 'Bullet';
+ data.bullet_char = match[0][0];
+
+ } else if ((match = rest.match(/^(\d+)([.)])( +|$)/))) {
+ spaces_after_marker = match[3].length;
+ data.type = 'Ordered';
+ data.start = parseInt(match[1]);
+ data.delimiter = match[2];
+ } else {
+ return null;
+ }
+ blank_item = match[0].length === rest.length;
+ if (spaces_after_marker >= 5 ||
+ spaces_after_marker < 1 ||
+ blank_item) {
+ data.padding = match[0].length - spaces_after_marker + 1;
+ } else {
+ data.padding = match[0].length;
+ }
+ return data;
+};
+
+// Returns true if the two list items are of the same type,
+// with the same delimiter and bullet character. This is used
+// in agglomerating list items into lists.
+var listsMatch = function(list_data, item_data) {
+ return (list_data.type === item_data.type &&
+ list_data.delimiter === item_data.delimiter &&
+ list_data.bullet_char === item_data.bullet_char);
+};
+
+// Analyze a line of text and update the document appropriately.
+// We parse markdown text by calling this on each line of input,
+// then finalizing the document.
+var incorporateLine = function(ln, line_number) {
+
+ var all_matched = true;
+ var last_child;
+ var first_nonspace;
+ var offset = 0;
+ var match;
+ var data;
+ var blank;
+ var indent;
+ var last_matched_container;
+ var i;
+ var CODE_INDENT = 4;
+
+ var container = this.doc;
+ var oldtip = this.tip;
+
+ // Convert tabs to spaces:
+ ln = detabLine(ln);
+
+ // For each containing block, try to parse the associated line start.
+ // Bail out on failure: container will point to the last matching block.
+ // Set all_matched to false if not all containers match.
+ while (container.children.length > 0) {
+ last_child = container.children[container.children.length - 1];
+ if (!last_child.open) {
+ break;
+ }
+ container = last_child;
+
+ match = matchAt(/[^ ]/, ln, offset);
+ if (match === null) {
+ first_nonspace = ln.length;
+ blank = true;
+ } else {
+ first_nonspace = match;
+ blank = false;
+ }
+ indent = first_nonspace - offset;
+
+ switch (container.t) {
+ case 'BlockQuote':
+ matched = indent <= 3 && ln[first_nonspace] === '>';
+ if (matched) {
+ offset = first_nonspace + 1;
+ if (ln[offset] === ' ') {
+ offset++;
+ }
+ } else {
+ all_matched = false;
+ }
+ break;
+
+ case 'ListItem':
+ if (indent >= container.list_data.marker_offset +
+ container.list_data.padding) {
+ offset += container.list_data.marker_offset +
+ container.list_data.padding;
+ } else if (blank) {
+ offset = first_nonspace;
+ } else {
+ all_matched = false;
+ }
+ break;
+
+ case 'IndentedCode':
+ if (indent >= CODE_INDENT) {
+ offset += CODE_INDENT;
+ } else if (blank) {
+ offset = first_nonspace;
+ } else {
+ all_matched = false;
+ }
+ break;
+
+ case 'ATXHeader':
+ case 'SetextHeader':
+ case 'HorizontalRule':
+ // a header can never container > 1 line, so fail to match:
+ all_matched = false;
+ break;
+
+ case 'FencedCode':
+ // skip optional spaces of fence offset
+ i = container.fence_offset;
+ while (i > 0 && ln[offset] === ' ') {
+ offset++;
+ i--;
+ }
+ break;
+
+ case 'HtmlBlock':
+ if (blank) {
+ all_matched = false;
+ }
+ break;
+
+ case 'Paragraph':
+ if (blank) {
+ container.last_line_blank = true;
+ all_matched = false;
+ }
+ break;
+
+ default:
+ }
+
+ if (!all_matched) {
+ container = container.parent; // back up to last matching block
+ break;
+ }
+ }
+
+ last_matched_container = container;
+
+ // This function is used to finalize and close any unmatched
+ // blocks. We aren't ready to do this now, because we might
+ // have a lazy paragraph continuation, in which case we don't
+ // want to close unmatched blocks. So we store this closure for
+ // use later, when we have more information.
+ var closeUnmatchedBlocks = function(mythis) {
+ // finalize any blocks not matched
+ while (!already_done && oldtip != last_matched_container) {
+ mythis.finalize(oldtip, line_number);
+ oldtip = oldtip.parent;
+ }
+ var already_done = true;
+ };
+
+ // Check to see if we've hit 2nd blank line; if so break out of list:
+ if (blank && container.last_line_blank) {
+ this.breakOutOfLists(container, line_number);
+ }
+
+ // Unless last matched container is a code block, try new container starts,
+ // adding children to the last matched container:
+ while (container.t != 'FencedCode' &&
+ container.t != 'IndentedCode' &&
+ container.t != 'HtmlBlock' &&
+ // this is a little performance optimization:
+ matchAt(/^[ #`~*+_=<>0-9-]/,ln,offset) !== null) {
+
+ match = matchAt(/[^ ]/, ln, offset);
+ if (match === null) {
+ first_nonspace = ln.length;
+ blank = true;
+ } else {
+ first_nonspace = match;
+ blank = false;
+ }
+ indent = first_nonspace - offset;
+
+ if (indent >= CODE_INDENT) {
+ // indented code
+ if (this.tip.t != 'Paragraph' && !blank) {
+ offset += CODE_INDENT;
+ closeUnmatchedBlocks(this);
+ container = this.addChild('IndentedCode', line_number, offset);
+ } else { // indent > 4 in a lazy paragraph continuation
+ break;
+ }
+
+ } else if (ln[first_nonspace] === '>') {
+ // blockquote
+ offset = first_nonspace + 1;
+ // optional following space
+ if (ln[offset] === ' ') {
+ offset++;
+ }
+ closeUnmatchedBlocks(this);
+ container = this.addChild('BlockQuote', line_number, offset);
+
+ } else if ((match = ln.slice(first_nonspace).match(/^#{1,6}(?: +|$)/))) {
+ // ATX header
+ offset = first_nonspace + match[0].length;
+ closeUnmatchedBlocks(this);
+ container = this.addChild('ATXHeader', line_number, first_nonspace);
+ container.level = match[0].trim().length; // number of #s
+ // remove trailing ###s:
+ container.strings =
+ [ln.slice(offset).replace(/(?:(\\#) *#+| *#+) *$/,'$1')];
+ break;
+
+ } else if ((match = ln.slice(first_nonspace).match(/^`{3,}(?!.*`)|^~{3,}(?!.*~)/))) {
+ // fenced code block
+ var fence_length = match[0].length;
+ closeUnmatchedBlocks(this);
+ container = this.addChild('FencedCode', line_number, first_nonspace);
+ container.fence_length = fence_length;
+ container.fence_char = match[0][0];
+ container.fence_offset = first_nonspace - offset;
+ offset = first_nonspace + fence_length;
+ break;
+
+ } else if (matchAt(reHtmlBlockOpen, ln, first_nonspace) !== null) {
+ // html block
+ closeUnmatchedBlocks(this);
+ container = this.addChild('HtmlBlock', line_number, first_nonspace);
+ // note, we don't adjust offset because the tag is part of the text
+ break;
+
+ } else if (container.t == 'Paragraph' &&
+ container.strings.length === 1 &&
+ ((match = ln.slice(first_nonspace).match(/^(?:=+|-+) *$/)))) {
+ // setext header line
+ closeUnmatchedBlocks(this);
+ container.t = 'SetextHeader'; // convert Paragraph to SetextHeader
+ container.level = match[0][0] === '=' ? 1 : 2;
+ offset = ln.length;
+
+ } else if (matchAt(reHrule, ln, first_nonspace) !== null) {
+ // hrule
+ closeUnmatchedBlocks(this);
+ container = this.addChild('HorizontalRule', line_number, first_nonspace);
+ offset = ln.length - 1;
+ break;
+
+ } else if ((data = parseListMarker(ln, first_nonspace))) {
+ // list item
+ closeUnmatchedBlocks(this);
+ data.marker_offset = indent;
+ offset = first_nonspace + data.padding;
+
+ // add the list if needed
+ if (container.t !== 'List' ||
+ !(listsMatch(container.list_data, data))) {
+ container = this.addChild('List', line_number, first_nonspace);
+ container.list_data = data;
+ }
+
+ // add the list item
+ container = this.addChild('ListItem', line_number, first_nonspace);
+ container.list_data = data;
+
+ } else {
+ break;
+
+ }
+
+ if (acceptsLines(container.t)) {
+ // if it's a line container, it can't contain other containers
+ break;
+ }
+ }
+
+ // What remains at the offset is a text line. Add the text to the
+ // appropriate container.
+
+ match = matchAt(/[^ ]/, ln, offset);
+ if (match === null) {
+ first_nonspace = ln.length;
+ blank = true;
+ } else {
+ first_nonspace = match;
+ blank = false;
+ }
+ indent = first_nonspace - offset;
+
+ // First check for a lazy paragraph continuation:
+ if (this.tip !== last_matched_container &&
+ !blank &&
+ this.tip.t == 'Paragraph' &&
+ this.tip.strings.length > 0) {
+ // lazy paragraph continuation
+
+ this.last_line_blank = false;
+ this.addLine(ln, offset);
+
+ } else { // not a lazy continuation
+
+ // finalize any blocks not matched
+ closeUnmatchedBlocks(this);
+
+ // Block quote lines are never blank as they start with >
+ // and we don't count blanks in fenced code for purposes of tight/loose
+ // lists or breaking out of lists. We also don't set last_line_blank
+ // on an empty list item.
+ container.last_line_blank = blank &&
+ !(container.t == 'BlockQuote' ||
+ container.t == 'FencedCode' ||
+ (container.t == 'ListItem' &&
+ container.children.length === 0 &&
+ container.start_line == line_number));
+
+ var cont = container;
+ while (cont.parent) {
+ cont.parent.last_line_blank = false;
+ cont = cont.parent;
+ }
+
+ switch (container.t) {
+ case 'IndentedCode':
+ case 'HtmlBlock':
+ this.addLine(ln, offset);
+ break;
+
+ case 'FencedCode':
+ // check for closing code fence:
+ match = (indent <= 3 &&
+ ln[first_nonspace] == container.fence_char &&
+ ln.slice(first_nonspace).match(/^(?:`{3,}|~{3,})(?= *$)/));
+ if (match && match[0].length >= container.fence_length) {
+ // don't add closing fence to container; instead, close it:
+ this.finalize(container, line_number);
+ } else {
+ this.addLine(ln, offset);
+ }
+ break;
+
+ case 'ATXHeader':
+ case 'SetextHeader':
+ case 'HorizontalRule':
+ // nothing to do; we already added the contents.
+ break;
+
+ default:
+ if (acceptsLines(container.t)) {
+ this.addLine(ln, first_nonspace);
+ } else if (blank) {
+ // do nothing
+ } else if (container.t != 'HorizontalRule' &&
+ container.t != 'SetextHeader') {
+ // create paragraph container for line
+ container = this.addChild('Paragraph', line_number, first_nonspace);
+ this.addLine(ln, first_nonspace);
+ } else {
+ console.log("Line " + line_number.toString() +
+ " with container type " + container.t +
+ " did not match any condition.");
+
+ }
+ }
+ }
+};
+
+// Finalize a block. Close it and do any necessary postprocessing,
+// e.g. creating string_content from strings, setting the 'tight'
+// or 'loose' status of a list, and parsing the beginnings
+// of paragraphs for reference definitions. Reset the tip to the
+// parent of the closed block.
+var finalize = function(block, line_number) {
+ var pos;
+ // don't do anything if the block is already closed
+ if (!block.open) {
+ return 0;
+ }
+ block.open = false;
+ if (line_number > block.start_line) {
+ block.end_line = line_number - 1;
+ } else {
+ block_end_line = line_number;
+ }
+
+ switch (block.t) {
+ case 'Paragraph':
+ block.string_content = block.strings.join('\n').replace(/^ */m,'');
+
+ // try parsing the beginning as link reference definitions:
+ while (block.string_content[0] === '[' &&
+ (pos = this.inlineParser.parseReference(block.string_content,
+ this.refmap))) {
+ block.string_content = block.string_content.slice(pos);
+ if (isBlank(block.string_content)) {
+ block.t = 'ReferenceDef';
+ break;
+ }
+ }
+ break;
+
+ case 'ATXHeader':
+ case 'SetextHeader':
+ case 'HtmlBlock':
+ block.string_content = block.strings.join('\n');
+ break;
+
+ case 'IndentedCode':
+ block.string_content = block.strings.join('\n').replace(/(\n *)*$/,'\n');
+ break;
+
+ case 'FencedCode':
+ // first line becomes info string
+ block.info = unescape(block.strings[0].trim());
+ if (block.strings.length == 1) {
+ block.string_content = '';
+ } else {
+ block.string_content = block.strings.slice(1).join('\n') + '\n';
+ }
+ break;
+
+ case 'List':
+ block.tight = true; // tight by default
+
+ var numitems = block.children.length;
+ var i = 0;
+ while (i < numitems) {
+ var item = block.children[i];
+ // check for non-final list item ending with blank line:
+ var last_item = i == numitems - 1;
+ if (endsWithBlankLine(item) && !last_item) {
+ block.tight = false;
+ break;
+ }
+ // recurse into children of list item, to see if there are
+ // spaces between any of them:
+ var numsubitems = item.children.length;
+ var j = 0;
+ while (j < numsubitems) {
+ var subitem = item.children[j];
+ var last_subitem = j == numsubitems - 1;
+ if (endsWithBlankLine(subitem) && !(last_item && last_subitem)) {
+ block.tight = false;
+ break;
+ }
+ j++;
+ }
+ i++;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ this.tip = block.parent || this.top;
+};
+
+// Walk through a block & children recursively, parsing string content
+// into inline content where appropriate.
+var processInlines = function(block) {
+ switch(block.t) {
+ case 'Paragraph':
+ case 'SetextHeader':
+ case 'ATXHeader':
+ block.inline_content =
+ this.inlineParser.parse(block.string_content.trim(), this.refmap);
+ block.string_content = "";
+ break;
+ default:
+ break;
+ }
+
+ if (block.children) {
+ for (var i = 0; i < block.children.length; i++) {
+ this.processInlines(block.children[i]);
+ }
+ }
+
+};
+
+// The main parsing function. Returns a parsed document AST.
+var parse = function(input) {
+ this.doc = makeBlock('Document', 1, 1);
+ this.tip = this.doc;
+ this.refmap = {};
+ var lines = input.replace(/\n$/,'').split(/\r\n|\n|\r/);
+ var len = lines.length;
+ for (var i = 0; i < len; i++) {
+ this.incorporateLine(lines[i], i+1);
+ }
+ while (this.tip) {
+ this.finalize(this.tip, len - 1);
+ }
+ this.processInlines(this.doc);
+ return this.doc;
+};
+
+
+// The DocParser object.
+function DocParser(){
+ return {
+ doc: makeBlock('Document', 1, 1),
+ tip: this.doc,
+ refmap: {},
+ inlineParser: new InlineParser(),
+ breakOutOfLists: breakOutOfLists,
+ addLine: addLine,
+ addChild: addChild,
+ incorporateLine: incorporateLine,
+ finalize: finalize,
+ processInlines: processInlines,
+ parse: parse,
+ };
+}
+
+// HTML RENDERER
+
+// Helper function to produce content in a pair of HTML tags.
+var inTags = function(tag, attribs, contents, selfclosing) {
+ var result = '<' + tag;
+ if (attribs) {
+ var i = 0;
+ var attrib;
+ while ((attrib = attribs[i]) !== undefined) {
+ result = result.concat(' ', attrib[0], '="', attrib[1], '"');
+ i++;
+ }
+ }
+ if (contents) {
+ result = result.concat('>', contents, '</', tag, '>');
+ } else if (selfclosing) {
+ result = result + ' />';
+ } else {
+ result = result.concat('></', tag, '>');
+ }
+ return result;
+};
+
+// Render an inline element as HTML.
+var renderInline = function(inline) {
+ var attrs;
+ switch (inline.t) {
+ case 'Str':
+ return this.escape(inline.c);
+ case 'Softbreak':
+ return this.softbreak;
+ case 'Hardbreak':
+ return inTags('br',[],"",true) + '\n';
+ case 'Emph':
+ return inTags('em', [], this.renderInlines(inline.c));
+ case 'Strong':
+ return inTags('strong', [], this.renderInlines(inline.c));
+ case 'Html':
+ return inline.c;
+ case 'Entity':
+ return inline.c;
+ case 'Link':
+ attrs = [['href', this.escape(inline.destination, true)]];
+ if (inline.title) {
+ attrs.push(['title', this.escape(inline.title, true)]);
+ }
+ return inTags('a', attrs, this.renderInlines(inline.label));
+ case 'Image':
+ attrs = [['src', this.escape(inline.destination, true)],
+ ['alt', this.escape(this.renderInlines(inline.label))]];
+ if (inline.title) {
+ attrs.push(['title', this.escape(inline.title, true)]);
+ }
+ return inTags('img', attrs, "", true);
+ case 'Code':
+ return inTags('code', [], this.escape(inline.c));
+ default:
+ console.log("Uknown inline type " + inline.t);
+ return "";
+ }
+};
+
+// Render a list of inlines.
+var renderInlines = function(inlines) {
+ var result = '';
+ for (var i=0; i < inlines.length; i++) {
+ result = result + this.renderInline(inlines[i]);
+ }
+ return result;
+};
+
+// Render a single block element.
+var renderBlock = function(block, in_tight_list) {
+ var tag;
+ var attr;
+ var info_words;
+ switch (block.t) {
+ case 'Document':
+ var whole_doc = this.renderBlocks(block.children);
+ return (whole_doc === '' ? '' : whole_doc + '\n');
+ case 'Paragraph':
+ if (in_tight_list) {
+ return this.renderInlines(block.inline_content);
+ } else {
+ return inTags('p', [], this.renderInlines(block.inline_content));
+ }
+ break;
+ case 'BlockQuote':
+ var filling = this.renderBlocks(block.children);
+ return inTags('blockquote', [], filling === '' ? this.innersep :
+ this.innersep + this.renderBlocks(block.children) + this.innersep);
+ case 'ListItem':
+ return inTags('li', [], this.renderBlocks(block.children, in_tight_list).trim());
+ case 'List':
+ tag = block.list_data.type == 'Bullet' ? 'ul' : 'ol';
+ attr = (!block.list_data.start || block.list_data.start == 1) ?
+ [] : [['start', block.list_data.start.toString()]];
+ return inTags(tag, attr, this.innersep +
+ this.renderBlocks(block.children, block.tight) +
+ this.innersep);
+ case 'ATXHeader':
+ case 'SetextHeader':
+ tag = 'h' + block.level;
+ return inTags(tag, [], this.renderInlines(block.inline_content));
+ case 'IndentedCode':
+ return inTags('pre', [],
+ inTags('code', [], this.escape(block.string_content)));
+ case 'FencedCode':
+ info_words = block.info.split(/ +/);
+ attr = info_words.length === 0 || info_words[0].length === 0 ?
+ [] : [['class',this.escape(info_words[0],true)]];
+ return inTags('pre', attr,
+ inTags('code', [], this.escape(block.string_content)));
+ case 'HtmlBlock':
+ return block.string_content;
+ case 'ReferenceDef':
+ return "";
+ case 'HorizontalRule':
+ return inTags('hr',[],"",true);
+ default:
+ console.log("Uknown block type " + block.t);
+ return "";
+ }
+};
+
+// Render a list of block elements, separated by this.blocksep.
+var renderBlocks = function(blocks, in_tight_list) {
+ var result = [];
+ for (var i=0; i < blocks.length; i++) {
+ if (blocks[i].t !== 'ReferenceDef') {
+ result.push(this.renderBlock(blocks[i], in_tight_list));
+ }
+ }
+ return result.join(this.blocksep);
+};
+
+// The HtmlRenderer object.
+function HtmlRenderer(){
+ return {
+ // default options:
+ blocksep: '\n', // space between blocks
+ innersep: '\n', // space between block container tag and contents
+ softbreak: '\n', // by default, soft breaks are rendered as newlines in HTML
+ // set to "<br />" to make them hard breaks
+ // set to " " if you want to ignore line wrapping in source
+ escape: function(s, preserve_entities) {
+ if (preserve_entities) {
+ return s.replace(/[&](?![#](x[a-f0-9]{1,8}|[0-9]{1,8});|[a-z][a-z0-9]{1,31};)/gi,'&amp;')
+ .replace(/[<]/g,'&lt;')
+ .replace(/[>]/g,'&gt;')
+ .replace(/["]/g,'&quot;');
+ } else {
+ return s.replace(/[&]/g,'&amp;')
+ .replace(/[<]/g,'&lt;')
+ .replace(/[>]/g,'&gt;')
+ .replace(/["]/g,'&quot;');
+ }
+ },
+ renderInline: renderInline,
+ renderInlines: renderInlines,
+ renderBlock: renderBlock,
+ renderBlocks: renderBlocks,
+ render: renderBlock
+ };
+}
+
+exports.DocParser = DocParser;
+exports.HtmlRenderer = HtmlRenderer;
+
+})(typeof exports === 'undefined' ? this.stmd = {} : exports);
diff --git a/js/test.js b/js/test.js
new file mode 100755
index 0000000..c1ea5b6
--- /dev/null
+++ b/js/test.js
@@ -0,0 +1,79 @@
+#!/usr/bin/env node
+
+var fs = require('fs');
+var util = require('util');
+var stmd = require('./stmd');
+var ansi = require('ansi')
+var cursor = ansi(process.stdout);
+
+var writer = new stmd.HtmlRenderer();
+var reader = new stmd.DocParser();
+
+var passed = 0;
+var failed = 0;
+
+var showSpaces = function(s) {
+ var t = s;
+ return t.replace(/\t/g,'→')
+ .replace(/ /g,'␣');
+}
+
+fs.readFile('spec.txt', 'utf8', function(err, data) {
+ if (err) {
+ return console.log(err);
+ }
+ var examples = [];
+ var current_section = "";
+ var example_number = 0;
+ tests = data.replace(/^<!-- END TESTS -->(.|[\n])*/m,'');
+ tests.replace(/^\.\n([\s\S]*?)^\.\n([\s\S]*?)^\.$|^#{1,6} *(.*)$/gm,
+ function(_,x,y,z,w){
+ if (z) {
+ current_section = z;
+ } else {
+ example_number++;
+ examples.push({markdown: x,
+ html: y,
+ section: current_section,
+ number: example_number});
+ }
+ });
+
+ current_section = "";
+
+ console.time("Elapsed time");
+
+ for (i = 0; i < examples.length; i++) {
+ var example = examples[i];
+ if (example.section != current_section) {
+ if (current_section !== '') {
+ cursor.write('\n');
+ }
+ current_section = example.section;
+ cursor.reset().write(current_section).reset().write(' ');
+ }
+ var actual = writer.renderBlock(reader.parse(example.markdown.replace(/→/g, '\t')));
+ if (actual == example.html) {
+ passed++;
+ cursor.green().write('✓').reset();
+ } else {
+ failed++;
+ cursor.write('\n');
+
+ cursor.red().write('✘ Example ' + example.number + '\n');
+ cursor.cyan();
+ cursor.write('=== markdown ===============\n');
+ cursor.write(showSpaces(example.markdown));
+ cursor.write('=== expected ===============\n');
+ cursor.write(showSpaces(example.html));
+ cursor.write('=== got ====================\n');
+ cursor.write(showSpaces(actual));
+ cursor.reset();
+ }
+ }
+ cursor.write('\n' + passed.toString() + ' tests passed, ' +
+ failed.toString() + ' failed.\n');
+
+ console.timeEnd("Elapsed time");
+});
+
diff --git a/license.bstrlib.txt b/license.bstrlib.txt
new file mode 100644
index 0000000..cf78a98
--- /dev/null
+++ b/license.bstrlib.txt
@@ -0,0 +1,29 @@
+Copyright (c) 2002-2008 Paul Hsieh
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ Neither the name of bstrlib nor the names of its contributors may be used
+ to endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/license.uthash.txt b/license.uthash.txt
new file mode 100644
index 0000000..ad8e16a
--- /dev/null
+++ b/license.uthash.txt
@@ -0,0 +1,21 @@
+Copyright (c) 2005-2013, Troy D. Hanson http://troydhanson.github.com/uthash/
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/mkcasefold.pl b/mkcasefold.pl
new file mode 100644
index 0000000..4c08634
--- /dev/null
+++ b/mkcasefold.pl
@@ -0,0 +1,21 @@
+print(" switch (c) {\n");
+my $lastchar = "";
+while (<STDIN>) {
+ if (/^[A-F0-9]/ and / [CF]; /) {
+ my ($char, $type, $subst) = m/([A-F0-9]+); ([CF]); ([^;]+)/;
+ if ($char eq $lastchar) {
+ break;
+ }
+ my @subst = $subst =~ m/(\w+)/g;
+ printf(" case 0x%s:\n", $char);
+ foreach (@subst) {
+ printf(" bufpush(0x%s);\n", $_);
+ }
+ printf(" break;\n");
+ $lastchar = $char;
+ }
+}
+printf(" default:\n");
+printf(" bufpush(c);\n");
+print(" }\n");
+
diff --git a/oldtests/Blockquotes/Indents.html b/oldtests/Blockquotes/Indents.html
new file mode 100644
index 0000000..fd98ee8
--- /dev/null
+++ b/oldtests/Blockquotes/Indents.html
@@ -0,0 +1,12 @@
+<blockquote>
+<p>one
+blockquote</p>
+</blockquote>
+<blockquote>
+<blockquote>
+<blockquote>
+<p>triply nested
+triply nested</p>
+</blockquote>
+</blockquote>
+</blockquote>
diff --git a/oldtests/Blockquotes/Indents.markdown b/oldtests/Blockquotes/Indents.markdown
new file mode 100644
index 0000000..f9342ff
--- /dev/null
+++ b/oldtests/Blockquotes/Indents.markdown
@@ -0,0 +1,5 @@
+> one
+ > blockquote
+
+>>> triply nested
+ > > > triply nested
diff --git a/oldtests/Blockquotes/Nesting.html b/oldtests/Blockquotes/Nesting.html
new file mode 100644
index 0000000..f40e999
--- /dev/null
+++ b/oldtests/Blockquotes/Nesting.html
@@ -0,0 +1,32 @@
+<p>These are all equivalent:</p>
+<blockquote>
+<blockquote>
+<p>nested
+blockquote</p>
+</blockquote>
+</blockquote>
+<blockquote>
+<blockquote>
+<p>nested
+blockquote</p>
+</blockquote>
+</blockquote>
+<blockquote>
+<blockquote>
+<p>nested
+blockquote</p>
+</blockquote>
+</blockquote>
+<blockquote>
+<blockquote>
+<p>nested
+blockquote</p>
+</blockquote>
+</blockquote>
+<p>This is not:</p>
+<blockquote>
+<p>nested</p>
+<blockquote>
+<p>blockquote</p>
+</blockquote>
+</blockquote>
diff --git a/oldtests/Blockquotes/Nesting.markdown b/oldtests/Blockquotes/Nesting.markdown
new file mode 100644
index 0000000..3d67843
--- /dev/null
+++ b/oldtests/Blockquotes/Nesting.markdown
@@ -0,0 +1,22 @@
+These are all equivalent:
+
+> > nested
+> > blockquote
+
+
+>> nested
+>> blockquote
+
+
+> > nested
+blockquote
+
+
+> > nested
+> blockquote
+
+
+This is not:
+
+> nested
+> > blockquote
diff --git a/oldtests/Blockquotes/Separation.html b/oldtests/Blockquotes/Separation.html
new file mode 100644
index 0000000..910d545
--- /dev/null
+++ b/oldtests/Blockquotes/Separation.html
@@ -0,0 +1,39 @@
+<p>One blockquote, two paragraphs:</p>
+<blockquote>
+<p>one</p>
+<p>two</p>
+</blockquote>
+<p>Two blockquotes:</p>
+<blockquote>
+<p>one</p>
+</blockquote>
+<blockquote>
+<p>two</p>
+</blockquote>
+<p>Nested blockquote, two paragraphs:</p>
+<blockquote>
+<blockquote>
+<p>one</p>
+<p>two</p>
+</blockquote>
+</blockquote>
+<p>Nested blockquote, two blockquotes:</p>
+<blockquote>
+<blockquote>
+<p>one</p>
+</blockquote>
+<blockquote>
+<p>two</p>
+</blockquote>
+</blockquote>
+<p>Two nested blockquotes:</p>
+<blockquote>
+<blockquote>
+<p>one</p>
+</blockquote>
+</blockquote>
+<blockquote>
+<blockquote>
+<p>two</p>
+</blockquote>
+</blockquote>
diff --git a/oldtests/Blockquotes/Separation.markdown b/oldtests/Blockquotes/Separation.markdown
new file mode 100644
index 0000000..823d865
--- /dev/null
+++ b/oldtests/Blockquotes/Separation.markdown
@@ -0,0 +1,29 @@
+One blockquote, two paragraphs:
+
+> one
+>
+> two
+
+Two blockquotes:
+
+> one
+
+> two
+
+Nested blockquote, two paragraphs:
+
+> > one
+> >
+> > two
+
+Nested blockquote, two blockquotes:
+
+> > one
+>
+> > two
+
+Two nested blockquotes:
+
+> > one
+
+> > two
diff --git a/oldtests/Code/BlankLines.html b/oldtests/Code/BlankLines.html
new file mode 100644
index 0000000..ae0abf7
--- /dev/null
+++ b/oldtests/Code/BlankLines.html
@@ -0,0 +1,33 @@
+<pre><code>foo
+
+
+
+bar
+</code></pre>
+<blockquote>
+<pre><code>foo
+
+
+
+bar
+</code></pre>
+</blockquote>
+<pre><code>foo
+
+
+
+bar
+</code></pre>
+<ol>
+<li><p>One</p>
+<pre><code>CodeA
+
+CodeB
+</code></pre></li>
+<li><p>Two</p>
+<pre><code>CodeA
+</code></pre></li>
+</ol>
+<ol>
+<li>One</li>
+</ol>
diff --git a/oldtests/Code/BlankLines.markdown b/oldtests/Code/BlankLines.markdown
new file mode 100644
index 0000000..b0d5a0c
--- /dev/null
+++ b/oldtests/Code/BlankLines.markdown
@@ -0,0 +1,28 @@
+ foo
+
+
+
+ bar
+> foo
+>
+>
+>
+> bar
+ foo
+
+
+
+ bar
+
+1. One
+
+ CodeA
+
+ CodeB
+
+2. Two
+
+ CodeA
+
+
+1. One
diff --git a/oldtests/Code/BlankLinesAtEnd.html b/oldtests/Code/BlankLinesAtEnd.html
new file mode 100644
index 0000000..ac803d9
--- /dev/null
+++ b/oldtests/Code/BlankLinesAtEnd.html
@@ -0,0 +1,14 @@
+<ul>
+<li><p>List</p>
+<pre><code>code
+</code></pre></li>
+</ul>
+<ul>
+<li>one</li>
+<li>two</li>
+</ul>
+<ul>
+<li><p>one
+not code</p></li>
+<li><p>two</p></li>
+</ul>
diff --git a/oldtests/Code/BlankLinesAtEnd.markdown b/oldtests/Code/BlankLinesAtEnd.markdown
new file mode 100644
index 0000000..55879ae
--- /dev/null
+++ b/oldtests/Code/BlankLinesAtEnd.markdown
@@ -0,0 +1,14 @@
+* List
+
+ code
+
+
+ * one
+ * two
+
+
+
+* one
+ not code
+
+* two
diff --git a/oldtests/Code/FenceMatching.html b/oldtests/Code/FenceMatching.html
new file mode 100644
index 0000000..4c7468e
--- /dev/null
+++ b/oldtests/Code/FenceMatching.html
@@ -0,0 +1,8 @@
+<pre class="abc"><code>```
+</code></pre>
+<pre class="blah"><code>
+`````
+
+````
+
+</code></pre>
diff --git a/oldtests/Code/FenceMatching.markdown b/oldtests/Code/FenceMatching.markdown
new file mode 100644
index 0000000..d86169a
--- /dev/null
+++ b/oldtests/Code/FenceMatching.markdown
@@ -0,0 +1,10 @@
+````abc
+```
+````
+``````blah
+
+`````
+
+````
+
+```````````
diff --git a/oldtests/Code/FencedCodeBlocks.html b/oldtests/Code/FencedCodeBlocks.html
new file mode 100644
index 0000000..4813d72
--- /dev/null
+++ b/oldtests/Code/FencedCodeBlocks.html
@@ -0,0 +1,24 @@
+<p>This is a fenced code block:</p>
+<pre class="haskell"><code>pairs :: [(Int,Char)]
+pairs = [(x,y) | x &lt;- [0..10], y &lt;- ['a'..'z']]
+</code></pre>
+<p>Here is one with tildes:</p>
+<pre class="haskell"><code>pairs :: [(Int,Char)]
+pairs = [(x,y) | x &lt;- [0..10], y &lt;- ['a'..'z']]
+</code></pre>
+<p>More metadata:</p>
+<pre class="haskell"><code>pairs :: [(Int,Char)]
+pairs = [(x,y) | x &lt;- [0..10], y &lt;- ['a'..'z']]
+</code></pre>
+<p>More backticks:</p>
+<pre class="haskell"><code>pairs :: [(Int,Char)]
+pairs = [(x,y) | x &lt;- [0..10], y &lt;- ['a'..'z']]
+
+backticks :: String
+backticks = &quot;`````&quot;
+</code></pre>
+<p>Without an end:</p>
+<pre><code>code with
+no end
+
+</code></pre>
diff --git a/oldtests/Code/FencedCodeBlocks.markdown b/oldtests/Code/FencedCodeBlocks.markdown
new file mode 100644
index 0000000..6ccc6be
--- /dev/null
+++ b/oldtests/Code/FencedCodeBlocks.markdown
@@ -0,0 +1,35 @@
+This is a fenced code block:
+```haskell
+pairs :: [(Int,Char)]
+pairs = [(x,y) | x <- [0..10], y <- ['a'..'z']]
+```
+Here is one with tildes:
+
+~~~ haskell
+pairs :: [(Int,Char)]
+pairs = [(x,y) | x <- [0..10], y <- ['a'..'z']]
+~~~
+
+More metadata:
+
+```haskell numberLines start=50
+pairs :: [(Int,Char)]
+pairs = [(x,y) | x <- [0..10], y <- ['a'..'z']]
+```
+
+More backticks:
+
+```````` haskell
+pairs :: [(Int,Char)]
+pairs = [(x,y) | x <- [0..10], y <- ['a'..'z']]
+
+backticks :: String
+backticks = "`````"
+`````````````
+
+Without an end:
+
+```
+code with
+no end
+
diff --git a/oldtests/Code/IndentedCodeBlocks.html b/oldtests/Code/IndentedCodeBlocks.html
new file mode 100644
index 0000000..0b9b7e7
--- /dev/null
+++ b/oldtests/Code/IndentedCodeBlocks.html
@@ -0,0 +1,22 @@
+<p>Indented code with two space indent in first and last line:</p>
+<pre><code> two spaces *hello*
+{ more }
+
+ and
+</code></pre>
+<p>Indented code requires a leading/trailing blank line:
+quick-command --option &quot;$*&quot;</p>
+<p>Indented code does not require a trailing blank line:</p>
+<pre><code>code
+</code></pre>
+<p>and not code.</p>
+<p>Code in blockquote:</p>
+<blockquote>
+<pre><code>code
+</code></pre>
+</blockquote>
+<p>Code in list:</p>
+<ol>
+<li><pre><code>code
+</code></pre></li>
+</ol>
diff --git a/oldtests/Code/IndentedCodeBlocks.markdown b/oldtests/Code/IndentedCodeBlocks.markdown
new file mode 100644
index 0000000..2a99db0
--- /dev/null
+++ b/oldtests/Code/IndentedCodeBlocks.markdown
@@ -0,0 +1,22 @@
+Indented code with two space indent in first and last line:
+
+ two spaces *hello*
+ { more }
+
+ and
+
+Indented code requires a leading/trailing blank line:
+ quick-command --option "$*"
+
+Indented code does not require a trailing blank line:
+
+ code
+and not code.
+
+Code in blockquote:
+
+> code
+
+Code in list:
+
+1. code
diff --git a/oldtests/Code/IndentedFences.html b/oldtests/Code/IndentedFences.html
new file mode 100644
index 0000000..66e76da
--- /dev/null
+++ b/oldtests/Code/IndentedFences.html
@@ -0,0 +1,20 @@
+<pre><code>a
+</code></pre>
+<pre><code>z
+</code></pre>
+<pre><code>a
+a
+a
+ a
+</code></pre>
+<ul>
+<li><p>foo</p>
+<pre><code> Hello
+
+World
+</code></pre></li>
+</ul>
+<blockquote>
+<pre><code>a
+</code></pre>
+</blockquote>
diff --git a/oldtests/Code/IndentedFences.markdown b/oldtests/Code/IndentedFences.markdown
new file mode 100644
index 0000000..098545f
--- /dev/null
+++ b/oldtests/Code/IndentedFences.markdown
@@ -0,0 +1,26 @@
+ ```
+ a
+ ```
+
+ ```
+z
+```
+
+ ```
+a
+ a
+ a
+ a
+ ```
+
+* foo
+
+ ```
+ Hello
+
+ World
+ ```
+
+> ```
+>a
+>```
diff --git a/oldtests/Code/IndentedInLists.html b/oldtests/Code/IndentedInLists.html
new file mode 100644
index 0000000..76ed424
--- /dev/null
+++ b/oldtests/Code/IndentedInLists.html
@@ -0,0 +1,22 @@
+<ul>
+<li><pre><code>code starts here
+</code></pre></li>
+</ul>
+<ol>
+<li><p>foo</p>
+<pre><code>code starts here
+</code></pre></li>
+<li><p>foo</p>
+<pre><code>code starts here
+</code></pre></li>
+</ol>
+<ul>
+<li><p>foo</p>
+<pre><code>code starts here
+</code></pre>
+<ul>
+<li><p>foo</p>
+<pre><code>code starts here
+</code></pre></li>
+</ul></li>
+</ul>
diff --git a/oldtests/Code/IndentedInLists.markdown b/oldtests/Code/IndentedInLists.markdown
new file mode 100644
index 0000000..54e1af1
--- /dev/null
+++ b/oldtests/Code/IndentedInLists.markdown
@@ -0,0 +1,17 @@
+- code starts here
+
+1. foo
+
+ code starts here
+
+2. foo
+
+ code starts here
+
+- foo
+
+ code starts here
+
+ - foo
+
+ code starts here
diff --git a/oldtests/Code/Inline.html b/oldtests/Code/Inline.html
new file mode 100644
index 0000000..9c52790
--- /dev/null
+++ b/oldtests/Code/Inline.html
@@ -0,0 +1,13 @@
+<p>All of these are equivalent:</p>
+<ul>
+<li><code>*hi*</code></li>
+<li><code>*hi*</code></li>
+<li><code>*hi*</code></li>
+<li><code>*hi*</code></li>
+<li><code>*hi*</code></li>
+</ul>
+<p>Backticks in code spans:</p>
+<ul>
+<li><code>``code``</code></li>
+<li><code>``code``</code></li>
+</ul>
diff --git a/oldtests/Code/Inline.markdown b/oldtests/Code/Inline.markdown
new file mode 100644
index 0000000..38e5b0c
--- /dev/null
+++ b/oldtests/Code/Inline.markdown
@@ -0,0 +1,13 @@
+All of these are equivalent:
+
+- `*hi*`
+- ` *hi* `
+- ``*hi* ``
+- ````*hi*````
+- `*hi*
+ `
+
+Backticks in code spans:
+
+- ``` ``code`` ```
+- ` ``code`` `
diff --git a/oldtests/Code/ListBreakAfter.html b/oldtests/Code/ListBreakAfter.html
new file mode 100644
index 0000000..29d6d5e
--- /dev/null
+++ b/oldtests/Code/ListBreakAfter.html
@@ -0,0 +1,30 @@
+<ul>
+<li><p>foo</p>
+<ul>
+<li><p>bar</p>
+<pre><code>code1
+code2
+</code></pre>
+<p>code?</p></li>
+</ul></li>
+<li><p>foo</p>
+<ul>
+<li><p>bar</p>
+<pre><code>code1
+code2
+</code></pre></li>
+</ul></li>
+</ul>
+<pre><code>code?
+</code></pre>
+<ul>
+<li>foo
+<ul>
+<li><p>bar</p>
+<pre><code>code1
+code2
+</code></pre></li>
+</ul></li>
+</ul>
+<pre><code>code?
+</code></pre>
diff --git a/oldtests/Code/ListBreakAfter.markdown b/oldtests/Code/ListBreakAfter.markdown
new file mode 100644
index 0000000..4fa79f1
--- /dev/null
+++ b/oldtests/Code/ListBreakAfter.markdown
@@ -0,0 +1,26 @@
+* foo
+ * bar
+
+ code1
+ code2
+
+ code?
+
+* foo
+ * bar
+
+ code1
+ code2
+
+
+ code?
+
+* foo
+ * bar
+
+ code1
+ code2
+
+
+
+ code?
diff --git a/oldtests/Code/WhiteLines.html b/oldtests/Code/WhiteLines.html
new file mode 100644
index 0000000..7fa137f
--- /dev/null
+++ b/oldtests/Code/WhiteLines.html
@@ -0,0 +1,7 @@
+<pre><code>ABC
+
+
+
+DEF
+</code></pre>
+<p>GHI</p>
diff --git a/oldtests/Code/WhiteLines.markdown b/oldtests/Code/WhiteLines.markdown
new file mode 100644
index 0000000..ea17af7
--- /dev/null
+++ b/oldtests/Code/WhiteLines.markdown
@@ -0,0 +1,9 @@
+ ABC
+
+
+
+ DEF
+
+
+
+GHI
diff --git a/oldtests/Emphasis/Escapes.html b/oldtests/Emphasis/Escapes.html
new file mode 100644
index 0000000..17c9e2d
--- /dev/null
+++ b/oldtests/Emphasis/Escapes.html
@@ -0,0 +1 @@
+<p><em>hi* there</em></p>
diff --git a/oldtests/Emphasis/Escapes.markdown b/oldtests/Emphasis/Escapes.markdown
new file mode 100644
index 0000000..4f14698
--- /dev/null
+++ b/oldtests/Emphasis/Escapes.markdown
@@ -0,0 +1 @@
+*hi\* there* \ No newline at end of file
diff --git a/oldtests/Emphasis/NestedEmphAndStrong.html b/oldtests/Emphasis/NestedEmphAndStrong.html
new file mode 100644
index 0000000..b41b527
--- /dev/null
+++ b/oldtests/Emphasis/NestedEmphAndStrong.html
@@ -0,0 +1,66 @@
+<ol>
+<li><strong><em>test test</em></strong></li>
+<li><strong><em>test test</em></strong></li>
+<li><em>test <strong>test</strong></em></li>
+<li><strong>test <em>test</em></strong></li>
+<li><strong><em>test</em> test</strong></li>
+<li><em><strong>test</strong> test</em></li>
+<li><strong><em>test</em> test</strong></li>
+<li><strong>test <em>test</em></strong></li>
+<li><em>test <strong>test</strong></em></li>
+<li><em>test <strong>test</strong></em></li>
+<li><strong>test <em>test</em></strong></li>
+<li><strong><em>test</em> test</strong></li>
+<li><em><strong>test</strong> test</em></li>
+<li><strong><em>test</em> test</strong></li>
+<li><strong>test <em>test</em></strong></li>
+<li><em>test <strong>test</strong></em></li>
+</ol>
+<p>Incorrect nesting:</p>
+<ol>
+<li>*test <strong>test* test</strong></li>
+<li>_test <strong>test_ test</strong></li>
+<li>**test <em>test</em>* test*</li>
+<li>__test␣<em>test</em>_␣test_</li>
+<li><em>test <em>test</em> test</em></li>
+<li><em>test <em>test</em> test</em></li>
+<li><strong>test <strong>test</strong> test</strong></li>
+<li><strong>test <strong>test</strong> test</strong></li>
+</ol>
+<p>No emphasis:</p>
+<ol>
+<li>test* test *test</li>
+<li>test** test **test</li>
+<li>test_ test _test</li>
+<li>test__ test __test</li>
+</ol>
+<p>Middle-word emphasis (asterisks):</p>
+<ol>
+<li><em>a</em>b</li>
+<li>a<em>b</em></li>
+<li>a<em>b</em>c</li>
+<li><strong>a</strong>b</li>
+<li>a<strong>b</strong></li>
+<li>a<strong>b</strong>c</li>
+</ol>
+<p>Middle-word emphasis (underscore):</p>
+<ol>
+<li>_a_b</li>
+<li>a_b_</li>
+<li>a_b_c</li>
+<li>__a__b</li>
+<li>a__b__</li>
+<li>a__b__c</li>
+<li>my_precious_file.txt</li>
+</ol>
+<p>Tricky Cases:</p>
+<ol>
+<li>E**. <strong>Test</strong> TestTestTest</li>
+<li>E**. <strong>Test</strong> Test Test Test</li>
+</ol>
+<p>Overlong emphasis:</p>
+<p>Name: ____________<br />
+Organization: ____<br />
+Region/Country: __</p>
+<p>_____Cut here_____</p>
+<p>____Cut here____</p>
diff --git a/oldtests/Emphasis/NestedEmphAndStrong.markdown b/oldtests/Emphasis/NestedEmphAndStrong.markdown
new file mode 100644
index 0000000..ec7da25
--- /dev/null
+++ b/oldtests/Emphasis/NestedEmphAndStrong.markdown
@@ -0,0 +1,69 @@
+1. ***test test***
+2. ___test test___
+3. *test **test***
+4. **test *test***
+5. ***test* test**
+6. ***test** test*
+7. ***test* test**
+8. **test *test***
+9. *test **test***
+10. _test __test___
+11. __test _test___
+12. ___test_ test__
+13. ___test__ test_
+14. ___test_ test__
+15. __test _test___
+16. _test __test___
+
+Incorrect nesting:
+
+1. *test **test* test**
+2. _test __test_ test__
+3. **test *test** test*
+4. __test _test__ test_
+5. *test *test* test*
+6. _test _test_ test_
+7. **test **test** test**
+8. __test __test__ test__
+
+No emphasis:
+
+1. test* test *test
+2. test** test **test
+3. test_ test _test
+4. test__ test __test
+
+Middle-word emphasis (asterisks):
+
+1. *a*b
+2. a*b*
+3. a*b*c
+4. **a**b
+5. a**b**
+6. a**b**c
+
+Middle-word emphasis (underscore):
+
+1. _a_b
+2. a_b_
+3. a_b_c
+4. __a__b
+5. a__b__
+6. a__b__c
+7. my_precious_file.txt
+
+Tricky Cases:
+
+1. E**. **Test** TestTestTest
+2. E**. **Test** Test Test Test
+
+Overlong emphasis:
+
+Name: ____________
+Organization: ____
+Region/Country: __
+
+_____Cut here_____
+
+____Cut here____
+
diff --git a/oldtests/Emphasis/Pathological.html b/oldtests/Emphasis/Pathological.html
new file mode 100644
index 0000000..37eb9fa
--- /dev/null
+++ b/oldtests/Emphasis/Pathological.html
@@ -0,0 +1,24 @@
+<p>This input can take a long time to parse in some implementations.</p>
+<p>*a
+*a
+*a
+*a
+*a
+*a
+*a
+*a
+*a
+*a
+*a
+*a
+*a
+*a
+*a
+*a
+*a
+*a
+*a
+*a
+*a
+aaaaa</p>
+<p><em>a</em><em>a</em>a<strong>a<em>a</em><em>a</em>a</strong>a<em>a</em><em>a</em>a<strong>a<em>a</em><em>a</em>a</strong>a<em>a</em><em>a</em>a<strong>a<em>a</em><em>a</em>a</strong>a<em>a</em><em>a</em>a<strong>a<em>a</em><em>a</em>a</strong>a<em>a</em><em>a</em>a<strong>a<em>a</em><em>a</em>a</strong>a<em>a</em><em>a</em>a<strong>a<em>a</em><em>a</em>a</strong>a<em>a</em><em>a</em>a<strong>a<em>a</em><em>a</em>a</strong>a<em>a</em><em>a</em>a<strong>a<em>a</em><em>a</em>a</strong>a<em>a</em><em>a</em>a<strong>a<em>a</em><em>a</em>a</strong>a<em>a</em><em>a</em>a<strong>a<em>a</em><em>a</em>a</strong>a<em>a</em><em>a</em>a<strong>a<em>a</em><em>a</em>a</strong>a<em>a</em><em>a</em>a<strong>a<em>a</em><em>a</em>a</strong>a<em>a</em><em>a</em>a<strong>a<em>a</em><em>a</em>a</strong>a<em>a</em><em>a</em>a<strong>a<em>a</em><em>a</em>a</strong>a<em>a</em><em>a</em>a<strong>a<em>a</em><em>a</em>a</strong>a<em>a</em><em>a</em>a<strong>a<em>a</em><em>a</em>a</strong>a<em>a</em><em>a</em>a<strong>a<em>a</em><em>a</em>a</strong>a<em>a</em><em>a</em>a<strong>a<em>a</em><em>a</em>a</strong>a<em>a</em><em>a</em>a<strong>a<em>a</em><em>a</em>a</strong>a<em>a</em><em>a</em>a<strong>a<em>a</em><em>a</em>a</strong>a<em>a</em><em>a</em>a<strong>a<em>a</em><em>a</em>a</strong>a<em>a</em><em>a</em>a<strong>a<em>a</em><em>a</em>a</strong>a<em>a</em><em>a</em>a<strong>a<em>a</em><em>a</em>a</strong>a<em>a</em><em>a</em>a<strong>a<em>a</em><em>a</em>a</strong>a<em>a</em><em>a</em>a<strong>a<em>a</em><em>a</em>a</strong>a<em>a</em><em>a</em>a<strong>a<em>a</em><em>a</em>a</strong>a<em>a</em><em>a</em>a<strong>a<em>a</em><em>a</em>a</strong>a<em>a</em><em>a</em>a**</p>
diff --git a/oldtests/Emphasis/Pathological.markdown b/oldtests/Emphasis/Pathological.markdown
new file mode 100644
index 0000000..5deb95e
--- /dev/null
+++ b/oldtests/Emphasis/Pathological.markdown
@@ -0,0 +1,26 @@
+This input can take a long time to parse in some implementations.
+
+*a
+*a
+*a
+*a
+*a
+*a
+*a
+*a
+*a
+*a
+*a
+*a
+*a
+*a
+*a
+*a
+*a
+*a
+*a
+*a
+*a
+aaaaa
+
+*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**
diff --git a/oldtests/Emphasis/Punctuation.html b/oldtests/Emphasis/Punctuation.html
new file mode 100644
index 0000000..6061b81
--- /dev/null
+++ b/oldtests/Emphasis/Punctuation.html
@@ -0,0 +1,10 @@
+<p>Here is a _ that is <em>cool</em>.</p>
+<p><em>Foo.</em></p>
+<p><strong>Foo.</strong></p>
+<p><strong><em>Foo.</em></strong></p>
+<p><em>Foo</em>.</p>
+<p><strong>Foo</strong>.</p>
+<p><strong><em>Foo</em></strong>.</p>
+<p><em>Foo</em>. <em>Foo</em>? <em>Foo</em>! <em>Foo</em>: <em>Foo</em>; (<em>Foo</em>)</p>
+<p><strong>Foo</strong>. <strong>Foo</strong>? <strong>Foo</strong>! <strong>Foo</strong>: <strong>Foo</strong>; (<strong>Foo</strong>)</p>
+<p><strong><em>Foo</em></strong>. <strong><em>Foo</em></strong>? <strong><em>Foo</em></strong>! <strong><em>Foo</em></strong>: <strong><em>Foo</em></strong>; (<strong><em>Foo</em></strong>)</p>
diff --git a/oldtests/Emphasis/Punctuation.markdown b/oldtests/Emphasis/Punctuation.markdown
new file mode 100644
index 0000000..e3f23b8
--- /dev/null
+++ b/oldtests/Emphasis/Punctuation.markdown
@@ -0,0 +1,19 @@
+Here is a _ that is _cool_.
+
+_Foo._
+
+__Foo.__
+
+___Foo.___
+
+_Foo_.
+
+__Foo__.
+
+___Foo___.
+
+_Foo_. _Foo_? _Foo_! _Foo_: _Foo_; (_Foo_)
+
+__Foo__. __Foo__? __Foo__! __Foo__: __Foo__; (__Foo__)
+
+___Foo___. ___Foo___? ___Foo___! ___Foo___: ___Foo___; (___Foo___)
diff --git a/oldtests/HTML/Blocks.html b/oldtests/HTML/Blocks.html
new file mode 100644
index 0000000..dc80335
--- /dev/null
+++ b/oldtests/HTML/Blocks.html
@@ -0,0 +1,18 @@
+<div>
+ <div>
+ *raw html*
+ </div>
+</div>
+<div>
+<div>
+<p><em>this is markdown</em></p>
+</div>
+</div>
+<!-- comment
+here -->
+<!--
+<p><em>commented out markdown</em></p>
+-->
+<div>
+* raw html with trailing space
+</div>
diff --git a/oldtests/HTML/Blocks.markdown b/oldtests/HTML/Blocks.markdown
new file mode 100644
index 0000000..a83fa66
--- /dev/null
+++ b/oldtests/HTML/Blocks.markdown
@@ -0,0 +1,26 @@
+<div>
+ <div>
+ *raw html*
+ </div>
+</div>
+
+<div>
+<div>
+
+*this is markdown*
+
+</div>
+</div>
+
+<!-- comment
+here -->
+
+<!--
+
+*commented out markdown*
+
+-->
+
+<div>
+* raw html with trailing space
+</div>
diff --git a/oldtests/HTML/Inline.html b/oldtests/HTML/Inline.html
new file mode 100644
index 0000000..94d40ac
--- /dev/null
+++ b/oldtests/HTML/Inline.html
@@ -0,0 +1,8 @@
+<p><span>hi</span>
+<span><code>hi</code></span>
+<span class="foo bar"
+title='whatever' blue=yes/>
+Hello <!-- this
+is a comment --> there.
+A line<br />break.
+<not a tag></p>
diff --git a/oldtests/HTML/Inline.markdown b/oldtests/HTML/Inline.markdown
new file mode 100644
index 0000000..2259421
--- /dev/null
+++ b/oldtests/HTML/Inline.markdown
@@ -0,0 +1,8 @@
+<span>hi</span>
+<span>`hi`</span>
+<span class="foo bar"
+title='whatever' blue=yes/>
+Hello <!-- this
+is a comment --> there.
+A line<br />break.
+<not a tag>
diff --git a/oldtests/HTML/UppercaseTags.html b/oldtests/HTML/UppercaseTags.html
new file mode 100644
index 0000000..8d2d828
--- /dev/null
+++ b/oldtests/HTML/UppercaseTags.html
@@ -0,0 +1,4 @@
+<DIV>
+this is a block
+</DIV>
+<p>Here is some <I>inline</I> html.</p>
diff --git a/oldtests/HTML/UppercaseTags.markdown b/oldtests/HTML/UppercaseTags.markdown
new file mode 100644
index 0000000..b476ffb
--- /dev/null
+++ b/oldtests/HTML/UppercaseTags.markdown
@@ -0,0 +1,5 @@
+<DIV>
+this is a block
+</DIV>
+
+Here is some <I>inline</I> html. \ No newline at end of file
diff --git a/oldtests/Headers/ATX.html b/oldtests/Headers/ATX.html
new file mode 100644
index 0000000..f375b98
--- /dev/null
+++ b/oldtests/Headers/ATX.html
@@ -0,0 +1,14 @@
+<h1>One</h1>
+<h2>Two</h2>
+<h3>Three</h3>
+<h4>Four</h4>
+<h5>Five</h5>
+<h6>Six</h6>
+<p>####### Seven</p>
+<h3>Three with</h3>
+<h2>Spacing doesn't matter</h2>
+<h2>Escaped final #</h2>
+<p>## Not a header</p>
+<p>#5 not a header</p>
+<h2></h2>
+<p>(empty header)</p>
diff --git a/oldtests/Headers/ATX.markdown b/oldtests/Headers/ATX.markdown
new file mode 100644
index 0000000..f687aa5
--- /dev/null
+++ b/oldtests/Headers/ATX.markdown
@@ -0,0 +1,20 @@
+# One
+## Two
+### Three
+#### Four
+##### Five
+
+###### Six
+
+####### Seven
+
+### Three with ###
+## Spacing doesn't matter ##
+## Escaped final \##
+
+\## Not a header
+
+#5 not a header
+
+##
+(empty header)
diff --git a/oldtests/Headers/Setext.html b/oldtests/Headers/Setext.html
new file mode 100644
index 0000000..787fb02
--- /dev/null
+++ b/oldtests/Headers/Setext.html
@@ -0,0 +1,9 @@
+<h1>Level one</h1>
+<h2>Two</h2>
+<p>In a paragraph</p>
+<h2>Level two</h2>
+<p>more text</p>
+<p>======
+no empty headers</p>
+<p>not a header</p>
+<hr />
diff --git a/oldtests/Headers/Setext.markdown b/oldtests/Headers/Setext.markdown
new file mode 100644
index 0000000..da0c7e2
--- /dev/null
+++ b/oldtests/Headers/Setext.markdown
@@ -0,0 +1,17 @@
+Level one
+=========
+
+Two
+---
+
+In a paragraph
+
+Level two
+---------
+more text
+
+======
+no empty headers
+
+not a header
+------------ -----
diff --git a/oldtests/Links/AngleBrackets.html b/oldtests/Links/AngleBrackets.html
new file mode 100644
index 0000000..21ac00d
--- /dev/null
+++ b/oldtests/Links/AngleBrackets.html
@@ -0,0 +1,3 @@
+<p><a href="?}]*+|&amp;)">silly URL with angle brackets</a>.</p>
+<p><a href="url://with spaces" title="title">link</a>.</p>
+<p><a href="url with spaces" title="title">link</a>.</p>
diff --git a/oldtests/Links/AngleBrackets.markdown b/oldtests/Links/AngleBrackets.markdown
new file mode 100644
index 0000000..c2e06ff
--- /dev/null
+++ b/oldtests/Links/AngleBrackets.markdown
@@ -0,0 +1,7 @@
+[silly URL with angle brackets](<?}]*+|&)>).
+
+[link](<url://with spaces> "title").
+
+[link][].
+
+[link]: <url with spaces> "title"
diff --git a/oldtests/Links/AutoLinks.html b/oldtests/Links/AutoLinks.html
new file mode 100644
index 0000000..092353f
--- /dev/null
+++ b/oldtests/Links/AutoLinks.html
@@ -0,0 +1,7 @@
+<p><a href="http://google.com?query=blah&amp;time=15">http://google.com?query=blah&amp;time=15</a>
+<a href="mailto:someone.else@somedomain.com">someone.else@somedomain.com</a>
+<a href="ftp://old.ftp.server.edu">ftp://old.ftp.server.edu</a>
+<a href="git://some.git.repo/project.git">git://some.git.repo/project.git</a>
+<not autolink>
+&lt;http://not.an autolink&gt;
+&lt;relative/not/autolink&gt;</p>
diff --git a/oldtests/Links/AutoLinks.markdown b/oldtests/Links/AutoLinks.markdown
new file mode 100644
index 0000000..bf95b8d
--- /dev/null
+++ b/oldtests/Links/AutoLinks.markdown
@@ -0,0 +1,7 @@
+<http://google.com?query=blah&time=15>
+<someone.else@somedomain.com>
+<ftp://old.ftp.server.edu>
+<git://some.git.repo/project.git>
+<not autolink>
+<http://not.an autolink>
+<relative/not/autolink>
diff --git a/oldtests/Links/BackticksInLinks.html b/oldtests/Links/BackticksInLinks.html
new file mode 100644
index 0000000..ff70383
--- /dev/null
+++ b/oldtests/Links/BackticksInLinks.html
@@ -0,0 +1 @@
+<p><a href="/url">the right bracket character (<code>]</code>)</a></p>
diff --git a/oldtests/Links/BackticksInLinks.markdown b/oldtests/Links/BackticksInLinks.markdown
new file mode 100644
index 0000000..539fd52
--- /dev/null
+++ b/oldtests/Links/BackticksInLinks.markdown
@@ -0,0 +1 @@
+[the right bracket character (`]`)](/url)
diff --git a/oldtests/Links/CaseInsensitiveReferences.html b/oldtests/Links/CaseInsensitiveReferences.html
new file mode 100644
index 0000000..afe4557
--- /dev/null
+++ b/oldtests/Links/CaseInsensitiveReferences.html
@@ -0,0 +1 @@
+<p><a href="/url">Толпой</a> is a Russian word.</p>
diff --git a/oldtests/Links/CaseInsensitiveReferences.markdown b/oldtests/Links/CaseInsensitiveReferences.markdown
new file mode 100644
index 0000000..f9653b9
--- /dev/null
+++ b/oldtests/Links/CaseInsensitiveReferences.markdown
@@ -0,0 +1,3 @@
+[Толпой] is a Russian word.
+
+[ТОЛПОЙ]: /url
diff --git a/oldtests/Links/Entities.html b/oldtests/Links/Entities.html
new file mode 100644
index 0000000..252dadb
--- /dev/null
+++ b/oldtests/Links/Entities.html
@@ -0,0 +1,2 @@
+<p><a href="http://g&ouml;&ouml;gle.com">http://g&ouml;&ouml;gle.com</a></p>
+<p><a href="/url" title="g&ouml;&ouml;gle &amp; yahoo">hi</a></p>
diff --git a/oldtests/Links/Entities.markdown b/oldtests/Links/Entities.markdown
new file mode 100644
index 0000000..d81ee36
--- /dev/null
+++ b/oldtests/Links/Entities.markdown
@@ -0,0 +1,3 @@
+<http://g&ouml;&ouml;gle.com>
+
+[hi](/url "g&ouml;&ouml;gle & yahoo")
diff --git a/oldtests/Links/InlineLinks.html b/oldtests/Links/InlineLinks.html
new file mode 100644
index 0000000..ae33f33
--- /dev/null
+++ b/oldtests/Links/InlineLinks.html
@@ -0,0 +1,10 @@
+<ol>
+<li><a href="/url">link</a></li>
+<li><a href="/url" title="title">link</a></li>
+<li><a href="/url" title="title">link</a></li>
+<li><a href="/url with spaces" title="title
+with linebreak">link <em>with
+linebreak</em></a>.</li>
+<li><a href="/url(withparens)" title="and single quoted title">link</a></li>
+<li>[not a link] (/url)</li>
+</ol>
diff --git a/oldtests/Links/InlineLinks.markdown b/oldtests/Links/InlineLinks.markdown
new file mode 100644
index 0000000..a822c4d
--- /dev/null
+++ b/oldtests/Links/InlineLinks.markdown
@@ -0,0 +1,9 @@
+1. [link](/url)
+2. [link](/url "title")
+3. [link](/url
+ "title")
+4. [link *with
+linebreak*](</url with spaces> "title
+with linebreak").
+5. [link](/url(withparens) 'and single quoted title')
+6. [not a link] (/url)
diff --git a/oldtests/Links/ParensInURLs.html b/oldtests/Links/ParensInURLs.html
new file mode 100644
index 0000000..9cd6de7
--- /dev/null
+++ b/oldtests/Links/ParensInURLs.html
@@ -0,0 +1,6 @@
+<p><a href="/url(test)" title="title">Inline link 1 with parens</a>.</p>
+<p><a href="/url(test)" title="title">Inline link 2 with parens</a>.</p>
+<p><a href="/url(test)" title="title">Inline link 3 with non-escaped parens</a>.</p>
+<p><a href="/url(test)" title="title">Inline link 4 with non-escaped parens</a>.</p>
+<p><a href="/url(test)" title="title">Reference link 1 with parens</a>.</p>
+<p><a href="/url(test)" title="title">Reference link 2 with parens</a>.</p>
diff --git a/oldtests/Links/ParensInURLs.markdown b/oldtests/Links/ParensInURLs.markdown
new file mode 100644
index 0000000..bb7be4f
--- /dev/null
+++ b/oldtests/Links/ParensInURLs.markdown
@@ -0,0 +1,14 @@
+[Inline link 1 with parens](/url\(test\) "title").
+
+[Inline link 2 with parens](</url\(test\)> "title").
+
+[Inline link 3 with non-escaped parens](/url(test) "title").
+
+[Inline link 4 with non-escaped parens](</url(test)> "title").
+
+[Reference link 1 with parens][1].
+
+[Reference link 2 with parens][2].
+
+ [1]: /url(test) "title"
+ [2]: </url(test)> "title"
diff --git a/oldtests/Links/ReferenceLinks.html b/oldtests/Links/ReferenceLinks.html
new file mode 100644
index 0000000..397cdb2
--- /dev/null
+++ b/oldtests/Links/ReferenceLinks.html
@@ -0,0 +1,7 @@
+<ol>
+<li><p><a href="/url" title="even in a list item">Link references</a> can be defined anywhere.</p></li>
+</ol>
+<blockquote>
+<p><a href="/foo" title="can break
+lines">another</a> one</p>
+</blockquote>
diff --git a/oldtests/Links/ReferenceLinks.markdown b/oldtests/Links/ReferenceLinks.markdown
new file mode 100644
index 0000000..ebcf5a9
--- /dev/null
+++ b/oldtests/Links/ReferenceLinks.markdown
@@ -0,0 +1,10 @@
+1. [Link references] can be defined anywhere.
+
+ [Link references]: /url
+ (even in a list item)
+
+> [another] one
+>
+> [another]:
+> /foo "can break
+> lines"
diff --git a/oldtests/Lists/CodeBlocksInLists.html b/oldtests/Lists/CodeBlocksInLists.html
new file mode 100644
index 0000000..fcd3e2a
--- /dev/null
+++ b/oldtests/Lists/CodeBlocksInLists.html
@@ -0,0 +1,14 @@
+<ol>
+<li><p>list item
+code</p></li>
+<li><p>list item</p>
+<pre><code>code
+</code></pre></li>
+<li><pre><code>code
+</code></pre></li>
+<li><pre><code>code
+</code></pre></li>
+<li><pre><code>code
+code
+</code></pre></li>
+</ol>
diff --git a/oldtests/Lists/CodeBlocksInLists.markdown b/oldtests/Lists/CodeBlocksInLists.markdown
new file mode 100644
index 0000000..7730808
--- /dev/null
+++ b/oldtests/Lists/CodeBlocksInLists.markdown
@@ -0,0 +1,18 @@
+1. list item
+ code
+
+2. list item
+ ~~~
+ code
+ ~~~
+
+3. ~~~
+ code
+ ~~~
+
+4. ~~~
+ code
+ ~~~
+
+5. code
+ code
diff --git a/oldtests/Lists/ConsecutiveLists.html b/oldtests/Lists/ConsecutiveLists.html
new file mode 100644
index 0000000..f8f9098
--- /dev/null
+++ b/oldtests/Lists/ConsecutiveLists.html
@@ -0,0 +1,20 @@
+<ul>
+<li>one</li>
+<li>one</li>
+</ul>
+<ul>
+<li>two</li>
+<li>two</li>
+</ul>
+<ul>
+<li>three</li>
+<li>three</li>
+</ul>
+<ol>
+<li>four</li>
+<li>four</li>
+</ol>
+<ol>
+<li>five</li>
+<li>five</li>
+</ol>
diff --git a/oldtests/Lists/ConsecutiveLists.markdown b/oldtests/Lists/ConsecutiveLists.markdown
new file mode 100644
index 0000000..c4faa54
--- /dev/null
+++ b/oldtests/Lists/ConsecutiveLists.markdown
@@ -0,0 +1,10 @@
+* one
+* one
++ two
++ two
+- three
+- three
+1. four
+1. four
+1) five
+1) five
diff --git a/oldtests/Lists/EmptyListItem.html b/oldtests/Lists/EmptyListItem.html
new file mode 100644
index 0000000..2c23fe1
--- /dev/null
+++ b/oldtests/Lists/EmptyListItem.html
@@ -0,0 +1,10 @@
+<ul>
+<li>one</li>
+<li></li>
+<li>three</li>
+</ul>
+<ol>
+<li>one</li>
+<li></li>
+<li>three</li>
+</ol>
diff --git a/oldtests/Lists/EmptyListItem.markdown b/oldtests/Lists/EmptyListItem.markdown
new file mode 100644
index 0000000..d30cbc3
--- /dev/null
+++ b/oldtests/Lists/EmptyListItem.markdown
@@ -0,0 +1,7 @@
+- one
+-
+- three
+
+1. one
+2.
+3. three
diff --git a/oldtests/Lists/InBlockquote.html b/oldtests/Lists/InBlockquote.html
new file mode 100644
index 0000000..da233e8
--- /dev/null
+++ b/oldtests/Lists/InBlockquote.html
@@ -0,0 +1,22 @@
+<blockquote>
+<ul>
+<li>tight</li>
+<li>tight</li>
+</ul>
+</blockquote>
+<blockquote>
+<ul>
+<li><p>loose</p></li>
+<li><p>loose</p></li>
+</ul>
+</blockquote>
+<blockquote>
+<ul>
+<li>one-item list</li>
+</ul>
+</blockquote>
+<blockquote>
+<ul>
+<li>one-item list</li>
+</ul>
+</blockquote>
diff --git a/oldtests/Lists/InBlockquote.markdown b/oldtests/Lists/InBlockquote.markdown
new file mode 100644
index 0000000..511563b
--- /dev/null
+++ b/oldtests/Lists/InBlockquote.markdown
@@ -0,0 +1,12 @@
+> - tight
+> - tight
+
+
+> - loose
+>
+> - loose
+
+
+> - one-item list
+
+> - one-item list
diff --git a/oldtests/Lists/Indents.html b/oldtests/Lists/Indents.html
new file mode 100644
index 0000000..a11a5a6
--- /dev/null
+++ b/oldtests/Lists/Indents.html
@@ -0,0 +1,22 @@
+<blockquote>
+<ul>
+<li><p>foo</p>
+<p>bar</p></li>
+</ul>
+</blockquote>
+<ul>
+<li>one</li>
+<li>two</li>
+</ul>
+<ul>
+<li>one</li>
+<li>two</li>
+<li>three</li>
+</ul>
+<ul>
+<li>one
+<ul>
+<li>two</li>
+</ul></li>
+<li>three</li>
+</ul>
diff --git a/oldtests/Lists/Indents.markdown b/oldtests/Lists/Indents.markdown
new file mode 100644
index 0000000..293d112
--- /dev/null
+++ b/oldtests/Lists/Indents.markdown
@@ -0,0 +1,17 @@
+ > * foo
+>
+> bar
+
+
+ - one
+ - two
+
+
+- one
+ - two
+- three
+
+
+- one
+ - two
+- three
diff --git a/oldtests/Lists/ListsAndHRs.html b/oldtests/Lists/ListsAndHRs.html
new file mode 100644
index 0000000..40826f7
--- /dev/null
+++ b/oldtests/Lists/ListsAndHRs.html
@@ -0,0 +1,7 @@
+<ul>
+<li>item 1
+<ul>
+<li>item 2</li>
+</ul></li>
+</ul>
+<hr />
diff --git a/oldtests/Lists/ListsAndHRs.markdown b/oldtests/Lists/ListsAndHRs.markdown
new file mode 100644
index 0000000..19c07e7
--- /dev/null
+++ b/oldtests/Lists/ListsAndHRs.markdown
@@ -0,0 +1,3 @@
+* item 1
+ * item 2
+* * * * *
diff --git a/oldtests/Lists/ListsAndSetextHeaders.html b/oldtests/Lists/ListsAndSetextHeaders.html
new file mode 100644
index 0000000..c6af9eb
--- /dev/null
+++ b/oldtests/Lists/ListsAndSetextHeaders.html
@@ -0,0 +1,6 @@
+<ol>
+<li>item</li>
+<li>item
+Not header</li>
+</ol>
+<hr />
diff --git a/oldtests/Lists/ListsAndSetextHeaders.markdown b/oldtests/Lists/ListsAndSetextHeaders.markdown
new file mode 100644
index 0000000..acfa655
--- /dev/null
+++ b/oldtests/Lists/ListsAndSetextHeaders.markdown
@@ -0,0 +1,4 @@
+1. item
+2. item
+Not header
+----------
diff --git a/oldtests/Lists/MultipleBlankLines.html b/oldtests/Lists/MultipleBlankLines.html
new file mode 100644
index 0000000..d894db1
--- /dev/null
+++ b/oldtests/Lists/MultipleBlankLines.html
@@ -0,0 +1,56 @@
+<ol>
+<li><p>First Item</p>
+<ul>
+<li><p>one</p>
+<ul>
+<li>two</li>
+</ul></li>
+<li><p>one</p>
+<ul>
+<li>two</li>
+</ul></li>
+</ul></li>
+<li><p>Second Item</p>
+<ul>
+<li>one
+<ul>
+<li>two</li>
+</ul></li>
+</ul></li>
+</ol>
+<ul>
+<li>one
+<ul>
+<li>two</li>
+</ul></li>
+</ul>
+<ol>
+<li><p>Third Item</p>
+<ul>
+<li>one
+<ul>
+<li>two</li>
+</ul></li>
+</ul></li>
+</ol>
+<ul>
+<li>one
+<ul>
+<li>two</li>
+</ul></li>
+</ul>
+<ol>
+<li><p>Fourth Item</p>
+<ul>
+<li>one
+<ul>
+<li>two</li>
+</ul></li>
+</ul></li>
+</ol>
+<ul>
+<li>one
+<ul>
+<li>two</li>
+</ul></li>
+</ul>
diff --git a/oldtests/Lists/MultipleBlankLines.markdown b/oldtests/Lists/MultipleBlankLines.markdown
new file mode 100644
index 0000000..e24a4f2
--- /dev/null
+++ b/oldtests/Lists/MultipleBlankLines.markdown
@@ -0,0 +1,37 @@
+1. First Item
+
+ * one
+ * two
+
+ * one
+ * two
+
+1. Second Item
+
+ * one
+ * two
+
+
+ * one
+ * two
+
+1. Third Item
+
+ * one
+ * two
+
+
+
+ * one
+ * two
+
+1. Fourth Item
+
+ * one
+ * two
+
+
+
+
+ * one
+ * two
diff --git a/oldtests/Lists/Start.html b/oldtests/Lists/Start.html
new file mode 100644
index 0000000..46e0550
--- /dev/null
+++ b/oldtests/Lists/Start.html
@@ -0,0 +1,11 @@
+<ol start="4">
+<li>this list starts with 4.</li>
+<li>and continues</li>
+<li>the continuation number is irrelevant.</li>
+</ol>
+<ol start="2001">
+<li>a space odyssey</li>
+</ol>
+<ol>
+<li>standard lists get no start attribute</li>
+</ol>
diff --git a/oldtests/Lists/Start.markdown b/oldtests/Lists/Start.markdown
new file mode 100644
index 0000000..175226f
--- /dev/null
+++ b/oldtests/Lists/Start.markdown
@@ -0,0 +1,7 @@
+4. this list starts with 4.
+5. and continues
+1. the continuation number is irrelevant.
+
+2001) a space odyssey
+
+1. standard lists get no start attribute
diff --git a/oldtests/Lists/Sublists.html b/oldtests/Lists/Sublists.html
new file mode 100644
index 0000000..af62915
--- /dev/null
+++ b/oldtests/Lists/Sublists.html
@@ -0,0 +1,49 @@
+<p>Four levels:</p>
+<ul>
+<li>one
+<ul>
+<li>two
+<ul>
+<li>three
+<ul>
+<li>four</li>
+</ul></li>
+</ul></li>
+</ul></li>
+</ul>
+<ol>
+<li>one
+<ol>
+<li>two
+<ol>
+<li>three
+<ol>
+<li>four</li>
+</ol></li>
+</ol></li>
+</ol></li>
+</ol>
+<ol>
+<li>one
+<ul>
+<li>two
+<ol>
+<li>three
+<ul>
+<li>four</li>
+</ul></li>
+</ol></li>
+</ul></li>
+</ol>
+<ul>
+<li>one
+<ul>
+<li>two
+<ol>
+<li>three
+<ol>
+<li>four</li>
+</ol></li>
+</ol></li>
+</ul></li>
+</ul>
diff --git a/oldtests/Lists/Sublists.markdown b/oldtests/Lists/Sublists.markdown
new file mode 100644
index 0000000..9eced9e
--- /dev/null
+++ b/oldtests/Lists/Sublists.markdown
@@ -0,0 +1,24 @@
+Four levels:
+
+- one
+ - two
+ - three
+ - four
+
+
+1. one
+ 1. two
+ 1. three
+ 1. four
+
+
+1) one
+ - two
+ 1) three
+ - four
+
+
+- one
+ - two
+ 1. three
+ 1) four
diff --git a/oldtests/Lists/TightAndLoose.html b/oldtests/Lists/TightAndLoose.html
new file mode 100644
index 0000000..7792ebb
--- /dev/null
+++ b/oldtests/Lists/TightAndLoose.html
@@ -0,0 +1,49 @@
+<ol>
+<li>tight</li>
+</ol>
+<ul>
+<li>tight</li>
+<li>list</li>
+</ul>
+<ul>
+<li><p>loose</p></li>
+<li><p>list</p></li>
+</ul>
+<ol>
+<li>tight</li>
+<li>list</li>
+</ol>
+<ol>
+<li><p>loose</p></li>
+<li><p>list</p></li>
+</ol>
+<ol>
+<li><p>loose</p>
+<ul>
+<li>sublist</li>
+</ul></li>
+</ol>
+<ol>
+<li>tight
+<ul>
+<li>sublist</li>
+</ul></li>
+</ol>
+<ul>
+<li>tight
+<blockquote>
+<p>blockquote
+and</p>
+</blockquote>
+<pre><code>code
+</code></pre></li>
+<li>tight</li>
+</ul>
+<ul>
+<li>tight
+<ul>
+<li><p>with loose</p></li>
+<li><p>sublist</p></li>
+</ul></li>
+<li>tight</li>
+</ul>
diff --git a/oldtests/Lists/TightAndLoose.markdown b/oldtests/Lists/TightAndLoose.markdown
new file mode 100644
index 0000000..263a34c
--- /dev/null
+++ b/oldtests/Lists/TightAndLoose.markdown
@@ -0,0 +1,45 @@
+1. tight
+
+
+- tight
+- list
+
+
+- loose
+
+- list
+
+
+1. tight
+2. list
+
+
+1. loose
+
+2. list
+
+
+1. loose
+
+ - sublist
+
+
+
+1. tight
+ - sublist
+
+
+- tight
+ > blockquote
+ and
+ ```
+ code
+ ```
+- tight
+
+
+- tight
+ - with loose
+
+ - sublist
+- tight
diff --git a/oldtests/Lists/TightLooseBlockquote.html b/oldtests/Lists/TightLooseBlockquote.html
new file mode 100644
index 0000000..7e78214
--- /dev/null
+++ b/oldtests/Lists/TightLooseBlockquote.html
@@ -0,0 +1,32 @@
+<ul>
+<li>tight I
+<blockquote>
+<p>bq</p>
+</blockquote></li>
+<li>tight I</li>
+</ul>
+<ul>
+<li>tight II
+<blockquote>
+<p>bq</p>
+</blockquote>
+foo</li>
+<li>tight II</li>
+</ul>
+<ol>
+<li>Blank lines in bq don't break list
+<blockquote>
+<p>bq</p>
+</blockquote></li>
+<li>Should say (2) in output</li>
+</ol>
+<ul>
+<li>Blank lines in bq don't break LI
+<ul>
+<li>item A
+<blockquote>
+<p>bq</p>
+</blockquote></li>
+<li>item B</li>
+</ul></li>
+</ul>
diff --git a/oldtests/Lists/TightLooseBlockquote.markdown b/oldtests/Lists/TightLooseBlockquote.markdown
new file mode 100644
index 0000000..08200cc
--- /dev/null
+++ b/oldtests/Lists/TightLooseBlockquote.markdown
@@ -0,0 +1,25 @@
+* tight I
+ > bq
+ >
+* tight I
+
+
+* tight II
+ > bq
+ >
+ foo
+* tight II
+
+1. Blank lines in bq don't break list
+ > bq
+ >
+ >
+1. Should say (2) in output
+
+* Blank lines in bq don't break LI
+ * item A
+ > bq
+ >
+ >
+ * item B
+ \ No newline at end of file
diff --git a/oldtests/Lists/TightLooseMore.html b/oldtests/Lists/TightLooseMore.html
new file mode 100644
index 0000000..f26f457
--- /dev/null
+++ b/oldtests/Lists/TightLooseMore.html
@@ -0,0 +1,7 @@
+<ul>
+<li><p>foo</p>
+<ul>
+<li>bar</li>
+</ul>
+<p>blah</p></li>
+</ul>
diff --git a/oldtests/Lists/TightLooseMore.markdown b/oldtests/Lists/TightLooseMore.markdown
new file mode 100644
index 0000000..7ace63f
--- /dev/null
+++ b/oldtests/Lists/TightLooseMore.markdown
@@ -0,0 +1,4 @@
+* foo
+ * bar
+
+ blah \ No newline at end of file
diff --git a/oldtests/Lists/TwoBlankLinesEndList.html b/oldtests/Lists/TwoBlankLinesEndList.html
new file mode 100644
index 0000000..629add1
--- /dev/null
+++ b/oldtests/Lists/TwoBlankLinesEndList.html
@@ -0,0 +1,21 @@
+<ol>
+<li><p>one</p></li>
+<li><p>two</p></li>
+</ol>
+<ol>
+<li>new list</li>
+</ol>
+<blockquote>
+<ul>
+<li><p>one</p></li>
+<li><p>two</p></li>
+</ul>
+<ul>
+<li>new list</li>
+</ul>
+</blockquote>
+<ol>
+<li>one</li>
+</ol>
+<pre><code>code
+</code></pre>
diff --git a/oldtests/Lists/TwoBlankLinesEndList.markdown b/oldtests/Lists/TwoBlankLinesEndList.markdown
new file mode 100644
index 0000000..2984a19
--- /dev/null
+++ b/oldtests/Lists/TwoBlankLinesEndList.markdown
@@ -0,0 +1,20 @@
+1. one
+
+2. two
+
+
+1. new list
+
+
+> - one
+>
+> - two
+>
+>
+> - new list
+
+
+1. one
+
+
+ code
diff --git a/oldtests/Makefile b/oldtests/Makefile
new file mode 100644
index 0000000..c8a30bd
--- /dev/null
+++ b/oldtests/Makefile
@@ -0,0 +1,55 @@
+SHELL=/bin/bash
+TESTDIR ?= *
+PATT ?= .
+TESTS=$(shell ls $(TESTDIR)/*.markdown | grep $(PATT))
+DIFFS=$(patsubst %.markdown,%.diff,$(TESTS))
+PROG ?= ../stmd
+FILTER ?= perl -pe 's/ /␣/g'
+TIDYCMD ?= tidy -asxhtml -utf8 --show-body-only yes --show-warnings no -quiet
+DETAILS ?= 1
+
+# Check to see if echo supports -e option to allow backslash escapes
+ifeq ($(shell echo -e),-e)
+ECHO=echo
+else
+ECHO=echo -e
+endif
+
+all: $(DIFFS)
+ PASS=0;TESTS=0; \
+ for f in $(DIFFS); do \
+ let TESTS=TESTS+1; \
+ [ -s $$f ] || let PASS=PASS+1; \
+ done; \
+ $(ECHO) "\033[1m$$PASS of $$TESTS tests passed.\033[0m"; \
+ if [ $$TESTS -eq $$PASS ]; then exit 0; else exit 1; fi
+
+%.actual.html: %.markdown
+ifeq ($(TIDY),1)
+ -cat $< | $(PROG) | $(TIDYCMD) > $@
+else
+ -cat $< | $(PROG) > $@
+endif
+
+%.expected.html: %.html
+ifeq ($(TIDY),1)
+ -$(TIDYCMD) $< > $@
+else
+ cp $< $@
+endif
+
+%.diff: %.expected.html %.actual.html
+ diff --unified=1 <(cat $(word 1,$^) | $(FILTER)) <(cat $(word 2,$^) | $(FILTER)) > $@ ; \
+ if [ -s $@ ]; then \
+ $(ECHO) "\033[1;31m✘ $(patsubst %.diff,%,$@)\033[0m"; \
+ if [ $(DETAILS) == "1" ]; then \
+ $(ECHO) "\033[0;36m" ; cat $@; $(ECHO) "\033[0m"; \
+ fi \
+ else \
+ $(ECHO) "\033[1;32m✓ $(patsubst %.diff,%,$@)\033[0m"; \
+ fi
+
+.PHONY: all clean
+
+clean:
+ -@rm */*.{diff,actual.html,expected.html}
diff --git a/oldtests/Misc/BackslashEscapes.html b/oldtests/Misc/BackslashEscapes.html
new file mode 100644
index 0000000..3eb2aed
--- /dev/null
+++ b/oldtests/Misc/BackslashEscapes.html
@@ -0,0 +1,14 @@
+<p>*not emphasized*
+\<em>emphasis</em>
+**not bold**
+&lt;br/&gt; not a tag
+[link](/foo) not a link
+<a href="/foo)" title="title&quot;">link</a>
+`not code`</p>
+<p>1. not a list item</p>
+<p>* not a list.</p>
+<p># Not a header</p>
+<p>[foo]: /url &quot;not a reference&quot;</p>
+<p>$ ^ ; can be escaped.
+\a \b \T cannot.
+unicode letters and symbols cannot: \π \‥.</p>
diff --git a/oldtests/Misc/BackslashEscapes.markdown b/oldtests/Misc/BackslashEscapes.markdown
new file mode 100644
index 0000000..23496dc
--- /dev/null
+++ b/oldtests/Misc/BackslashEscapes.markdown
@@ -0,0 +1,19 @@
+\*not emphasized*
+\\*emphasis*
+\*\*not bold**
+\<br/> not a tag
+\[link](/foo) not a link
+[link](/foo\) "title\"")
+\`not code`
+
+1\. not a list item
+
+\* not a list.
+
+\# Not a header
+
+\[foo]: /url "not a reference"
+
+\$ \^ \; can be escaped.
+\a \b \T cannot.
+unicode letters and symbols cannot: \π \‥.
diff --git a/oldtests/Misc/Laziness.html b/oldtests/Misc/Laziness.html
new file mode 100644
index 0000000..e130eb5
--- /dev/null
+++ b/oldtests/Misc/Laziness.html
@@ -0,0 +1,22 @@
+<blockquote>
+<ol>
+<li>one
+two</li>
+</ol>
+</blockquote>
+<p>Laziness only affects paragraph continuations:</p>
+<blockquote>
+<pre><code>code
+</code></pre>
+</blockquote>
+<pre><code>not same code block
+</code></pre>
+<ol>
+<li>hello</li>
+</ol>
+<hr />
+<blockquote>
+<pre><code></code></pre>
+</blockquote>
+<p>code</p>
+<pre><code></code></pre>
diff --git a/oldtests/Misc/Laziness.markdown b/oldtests/Misc/Laziness.markdown
new file mode 100644
index 0000000..2c32870
--- /dev/null
+++ b/oldtests/Misc/Laziness.markdown
@@ -0,0 +1,14 @@
+> 1. one
+> two
+
+Laziness only affects paragraph continuations:
+
+> code
+ not same code block
+
+1. hello
+-----
+
+> ```
+code
+```
diff --git a/oldtests/Misc/LineBreaks.html b/oldtests/Misc/LineBreaks.html
new file mode 100644
index 0000000..2d85e85
--- /dev/null
+++ b/oldtests/Misc/LineBreaks.html
@@ -0,0 +1,11 @@
+<p>Two spaces<br />
+break a line. Or more than two<br />
+and spaces in the following line are absorbed.</p>
+<p>You can also break lines with<br />
+a backslash.</p>
+<p>Two spaces at the end of a paragraph are
+not a line break.</p>
+<p>A backslash at the end of a paragraph is
+not a line break.\</p>
+<h2>Similarly with setext headers</h2>
+<h2>And with backslashes\</h2>
diff --git a/oldtests/Misc/LineBreaks.markdown b/oldtests/Misc/LineBreaks.markdown
new file mode 100644
index 0000000..3632dcb
--- /dev/null
+++ b/oldtests/Misc/LineBreaks.markdown
@@ -0,0 +1,18 @@
+Two spaces
+break a line. Or more than two
+ and spaces in the following line are absorbed.
+
+You can also break lines with\
+a backslash.
+
+Two spaces at the end of a paragraph are
+not a line break.
+
+A backslash at the end of a paragraph is
+not a line break.\
+
+Similarly with setext headers
+-------------------------------
+
+And with backslashes\
+---------------------
diff --git a/oldtests/Misc/Transitions.html b/oldtests/Misc/Transitions.html
new file mode 100644
index 0000000..fceff9f
--- /dev/null
+++ b/oldtests/Misc/Transitions.html
@@ -0,0 +1,26 @@
+<blockquote>
+<p>blockquote</p>
+<blockquote>
+<p>blockquote</p>
+</blockquote>
+</blockquote>
+<ol>
+<li>list</li>
+<li>list
+<ul>
+<li>sublist</li>
+</ul></li>
+</ol>
+<hr />
+<p>paragraph</p>
+<h2>header</h2>
+<h3>header</h3>
+<pre><code>code
+</code></pre>
+<pre><code>code
+</code></pre>
+<div>
+ <div>
+# not a header
+ </div>
+</div>
diff --git a/oldtests/Misc/Transitions.markdown b/oldtests/Misc/Transitions.markdown
new file mode 100644
index 0000000..5f3a9d3
--- /dev/null
+++ b/oldtests/Misc/Transitions.markdown
@@ -0,0 +1,20 @@
+> blockquote
+> > blockquote
+1. list
+2. list
+ - sublist
+* * * * *
+paragraph
+
+header
+------
+### header
+ code
+```
+code
+```
+<div>
+ <div>
+# not a header
+ </div>
+</div>
diff --git a/oldtests/Original/Amps_and_angle_encoding.html b/oldtests/Original/Amps_and_angle_encoding.html
new file mode 100644
index 0000000..fc1b2c3
--- /dev/null
+++ b/oldtests/Original/Amps_and_angle_encoding.html
@@ -0,0 +1,9 @@
+<p>AT&amp;T has an ampersand in their name.</p>
+<p>AT&amp;T is another way to write it.</p>
+<p>This &amp; that.</p>
+<p>4 &lt; 5.</p>
+<p>6 &gt; 5.</p>
+<p>Here's a <a href="http://example.com/?foo=1&amp;bar=2">link</a> with an ampersand in the URL.</p>
+<p>Here's a link with an amersand in the link text: <a href="http://att.com/" title="AT&amp;T">AT&amp;T</a>.</p>
+<p>Here's an inline <a href="/script?foo=1&amp;bar=2">link</a>.</p>
+<p>Here's an inline <a href="/script?foo=1&amp;bar=2">link</a>.</p>
diff --git a/oldtests/Original/Amps_and_angle_encoding.markdown b/oldtests/Original/Amps_and_angle_encoding.markdown
new file mode 100644
index 0000000..0e9527f
--- /dev/null
+++ b/oldtests/Original/Amps_and_angle_encoding.markdown
@@ -0,0 +1,21 @@
+AT&T has an ampersand in their name.
+
+AT&amp;T is another way to write it.
+
+This & that.
+
+4 < 5.
+
+6 > 5.
+
+Here's a [link] [1] with an ampersand in the URL.
+
+Here's a link with an amersand in the link text: [AT&T] [2].
+
+Here's an inline [link](/script?foo=1&bar=2).
+
+Here's an inline [link](</script?foo=1&bar=2>).
+
+
+[1]: http://example.com/?foo=1&bar=2
+[2]: http://att.com/ "AT&T" \ No newline at end of file
diff --git a/oldtests/Original/Auto_links.html b/oldtests/Original/Auto_links.html
new file mode 100644
index 0000000..f517fe6
--- /dev/null
+++ b/oldtests/Original/Auto_links.html
@@ -0,0 +1,13 @@
+<p>Link: <a href="http://example.com/">http://example.com/</a>.</p>
+<p>With an ampersand: <a href="http://example.com/?foo=1&amp;bar=2">http://example.com/?foo=1&amp;bar=2</a></p>
+<ul>
+<li>In a list?</li>
+<li><a href="http://example.com/">http://example.com/</a></li>
+<li>It should.</li>
+</ul>
+<blockquote>
+<p>Blockquoted: <a href="http://example.com/">http://example.com/</a></p>
+</blockquote>
+<p>Auto-links should not occur here: <code>&lt;http://example.com/&gt;</code></p>
+<pre><code>or here: &lt;http://example.com/&gt;
+</code></pre>
diff --git a/oldtests/Original/Auto_links.markdown b/oldtests/Original/Auto_links.markdown
new file mode 100644
index 0000000..abbc488
--- /dev/null
+++ b/oldtests/Original/Auto_links.markdown
@@ -0,0 +1,13 @@
+Link: <http://example.com/>.
+
+With an ampersand: <http://example.com/?foo=1&bar=2>
+
+* In a list?
+* <http://example.com/>
+* It should.
+
+> Blockquoted: <http://example.com/>
+
+Auto-links should not occur here: `<http://example.com/>`
+
+ or here: <http://example.com/> \ No newline at end of file
diff --git a/oldtests/Original/Backslash_escapes.html b/oldtests/Original/Backslash_escapes.html
new file mode 100644
index 0000000..9a83379
--- /dev/null
+++ b/oldtests/Original/Backslash_escapes.html
@@ -0,0 +1,75 @@
+<p>These should all get escaped:</p>
+<p>Backslash: \</p>
+<p>Backtick: `</p>
+<p>Asterisk: *</p>
+<p>Underscore: _</p>
+<p>Left brace: {</p>
+<p>Right brace: }</p>
+<p>Left bracket: [</p>
+<p>Right bracket: ]</p>
+<p>Left paren: (</p>
+<p>Right paren: )</p>
+<p>Greater-than: &gt;</p>
+<p>Hash: #</p>
+<p>Period: .</p>
+<p>Bang: !</p>
+<p>Plus: +</p>
+<p>Minus: -</p>
+<p>These should not, because they occur within a code block:</p>
+<pre><code>Backslash: \\
+
+Backtick: \`
+
+Asterisk: \*
+
+Underscore: \_
+
+Left brace: \{
+
+Right brace: \}
+
+Left bracket: \[
+
+Right bracket: \]
+
+Left paren: \(
+
+Right paren: \)
+
+Greater-than: \&gt;
+
+Hash: \#
+
+Period: \.
+
+Bang: \!
+
+Plus: \+
+
+Minus: \-
+</code></pre>
+<p>Nor should these, which occur in code spans:</p>
+<p>Backslash: <code>\\</code></p>
+<p>Backtick: <code>\`</code></p>
+<p>Asterisk: <code>\*</code></p>
+<p>Underscore: <code>\_</code></p>
+<p>Left brace: <code>\{</code></p>
+<p>Right brace: <code>\}</code></p>
+<p>Left bracket: <code>\[</code></p>
+<p>Right bracket: <code>\]</code></p>
+<p>Left paren: <code>\(</code></p>
+<p>Right paren: <code>\)</code></p>
+<p>Greater-than: <code>\&gt;</code></p>
+<p>Hash: <code>\#</code></p>
+<p>Period: <code>\.</code></p>
+<p>Bang: <code>\!</code></p>
+<p>Plus: <code>\+</code></p>
+<p>Minus: <code>\-</code></p>
+<p>These should get escaped, even though they're matching pairs for
+other Markdown constructs:</p>
+<p>*asterisks*</p>
+<p>_underscores_</p>
+<p>`backticks`</p>
+<p>This is a code span with a literal backslash-backtick sequence: <code>\`</code></p>
+<p>This is a tag with unescaped backticks <span attr='`ticks`'>bar</span>.</p>
+<p>This is a tag with backslashes <span attr='\\backslashes\\'>bar</span>.</p>
diff --git a/oldtests/Original/Backslash_escapes.markdown b/oldtests/Original/Backslash_escapes.markdown
new file mode 100644
index 0000000..5b014cb
--- /dev/null
+++ b/oldtests/Original/Backslash_escapes.markdown
@@ -0,0 +1,120 @@
+These should all get escaped:
+
+Backslash: \\
+
+Backtick: \`
+
+Asterisk: \*
+
+Underscore: \_
+
+Left brace: \{
+
+Right brace: \}
+
+Left bracket: \[
+
+Right bracket: \]
+
+Left paren: \(
+
+Right paren: \)
+
+Greater-than: \>
+
+Hash: \#
+
+Period: \.
+
+Bang: \!
+
+Plus: \+
+
+Minus: \-
+
+
+
+These should not, because they occur within a code block:
+
+ Backslash: \\
+
+ Backtick: \`
+
+ Asterisk: \*
+
+ Underscore: \_
+
+ Left brace: \{
+
+ Right brace: \}
+
+ Left bracket: \[
+
+ Right bracket: \]
+
+ Left paren: \(
+
+ Right paren: \)
+
+ Greater-than: \>
+
+ Hash: \#
+
+ Period: \.
+
+ Bang: \!
+
+ Plus: \+
+
+ Minus: \-
+
+
+Nor should these, which occur in code spans:
+
+Backslash: `\\`
+
+Backtick: `` \` ``
+
+Asterisk: `\*`
+
+Underscore: `\_`
+
+Left brace: `\{`
+
+Right brace: `\}`
+
+Left bracket: `\[`
+
+Right bracket: `\]`
+
+Left paren: `\(`
+
+Right paren: `\)`
+
+Greater-than: `\>`
+
+Hash: `\#`
+
+Period: `\.`
+
+Bang: `\!`
+
+Plus: `\+`
+
+Minus: `\-`
+
+
+These should get escaped, even though they're matching pairs for
+other Markdown constructs:
+
+\*asterisks\*
+
+\_underscores\_
+
+\`backticks\`
+
+This is a code span with a literal backslash-backtick sequence: `` \` ``
+
+This is a tag with unescaped backticks <span attr='`ticks`'>bar</span>.
+
+This is a tag with backslashes <span attr='\\backslashes\\'>bar</span>.
diff --git a/oldtests/Original/Blockquotes_with_code_blocks.html b/oldtests/Original/Blockquotes_with_code_blocks.html
new file mode 100644
index 0000000..fd1cb1b
--- /dev/null
+++ b/oldtests/Original/Blockquotes_with_code_blocks.html
@@ -0,0 +1,12 @@
+<blockquote>
+<p>Example:</p>
+<pre><code>sub status {
+ print &quot;working&quot;;
+}
+</code></pre>
+<p>Or:</p>
+<pre><code>sub status {
+ return &quot;working&quot;;
+}
+</code></pre>
+</blockquote>
diff --git a/oldtests/Original/Blockquotes_with_code_blocks.markdown b/oldtests/Original/Blockquotes_with_code_blocks.markdown
new file mode 100644
index 0000000..c31d171
--- /dev/null
+++ b/oldtests/Original/Blockquotes_with_code_blocks.markdown
@@ -0,0 +1,11 @@
+> Example:
+>
+> sub status {
+> print "working";
+> }
+>
+> Or:
+>
+> sub status {
+> return "working";
+> }
diff --git a/oldtests/Original/Code_Blocks.html b/oldtests/Original/Code_Blocks.html
new file mode 100644
index 0000000..7d89615
--- /dev/null
+++ b/oldtests/Original/Code_Blocks.html
@@ -0,0 +1,12 @@
+<pre><code>code block on the first line
+</code></pre>
+<p>Regular text.</p>
+<pre><code>code block indented by spaces
+</code></pre>
+<p>Regular text.</p>
+<pre><code>the lines in this block
+all contain trailing spaces
+</code></pre>
+<p>Regular Text.</p>
+<pre><code>code block on the last line
+</code></pre>
diff --git a/oldtests/Original/Code_Blocks.markdown b/oldtests/Original/Code_Blocks.markdown
new file mode 100644
index 0000000..b54b092
--- /dev/null
+++ b/oldtests/Original/Code_Blocks.markdown
@@ -0,0 +1,14 @@
+ code block on the first line
+
+Regular text.
+
+ code block indented by spaces
+
+Regular text.
+
+ the lines in this block
+ all contain trailing spaces
+
+Regular Text.
+
+ code block on the last line \ No newline at end of file
diff --git a/oldtests/Original/Code_Spans.html b/oldtests/Original/Code_Spans.html
new file mode 100644
index 0000000..27acea1
--- /dev/null
+++ b/oldtests/Original/Code_Spans.html
@@ -0,0 +1,3 @@
+<p><code>&lt;test a=&quot;</code> content of attribute <code>&quot;&gt;</code></p>
+<p>Fix for backticks within HTML tag: <span attr='`ticks`'>like this</span></p>
+<p>Here's how you put <code>`backticks`</code> in a code span.</p>
diff --git a/oldtests/Original/Code_Spans.markdown b/oldtests/Original/Code_Spans.markdown
new file mode 100644
index 0000000..5c229c7
--- /dev/null
+++ b/oldtests/Original/Code_Spans.markdown
@@ -0,0 +1,5 @@
+`<test a="` content of attribute `">`
+
+Fix for backticks within HTML tag: <span attr='`ticks`'>like this</span>
+
+Here's how you put `` `backticks` `` in a code span. \ No newline at end of file
diff --git a/oldtests/Original/Horizontal_rules.html b/oldtests/Original/Horizontal_rules.html
new file mode 100644
index 0000000..a89efdb
--- /dev/null
+++ b/oldtests/Original/Horizontal_rules.html
@@ -0,0 +1,39 @@
+<p>Dashes:</p>
+<hr />
+<hr />
+<hr />
+<hr />
+<pre><code>---
+</code></pre>
+<hr />
+<hr />
+<hr />
+<hr />
+<pre><code>- - -
+</code></pre>
+<p>Asterisks:</p>
+<hr />
+<hr />
+<hr />
+<hr />
+<pre><code>***
+</code></pre>
+<hr />
+<hr />
+<hr />
+<hr />
+<pre><code>* * *
+</code></pre>
+<p>Underscores:</p>
+<hr />
+<hr />
+<hr />
+<hr />
+<pre><code>___
+</code></pre>
+<hr />
+<hr />
+<hr />
+<hr />
+<pre><code>_ _ _
+</code></pre>
diff --git a/oldtests/Original/Horizontal_rules.markdown b/oldtests/Original/Horizontal_rules.markdown
new file mode 100644
index 0000000..1594bda
--- /dev/null
+++ b/oldtests/Original/Horizontal_rules.markdown
@@ -0,0 +1,67 @@
+Dashes:
+
+---
+
+ ---
+
+ ---
+
+ ---
+
+ ---
+
+- - -
+
+ - - -
+
+ - - -
+
+ - - -
+
+ - - -
+
+
+Asterisks:
+
+***
+
+ ***
+
+ ***
+
+ ***
+
+ ***
+
+* * *
+
+ * * *
+
+ * * *
+
+ * * *
+
+ * * *
+
+
+Underscores:
+
+___
+
+ ___
+
+ ___
+
+ ___
+
+ ___
+
+_ _ _
+
+ _ _ _
+
+ _ _ _
+
+ _ _ _
+
+ _ _ _
diff --git a/oldtests/Original/Images.html b/oldtests/Original/Images.html
new file mode 100644
index 0000000..bd5a7e0
--- /dev/null
+++ b/oldtests/Original/Images.html
@@ -0,0 +1,11 @@
+<p><img src="/path/to/img.jpg" alt="Alt text" /></p>
+<p><img src="/path/to/img.jpg" alt="Alt text" title="Optional title" /></p>
+<p>Inline within a paragraph: <a href="/url/">alt text</a>.</p>
+<p><img src="/url/" alt="alt text" title="title preceded by two spaces" /></p>
+<p><img src="/url/" alt="alt text" title="title has spaces afterward" /></p>
+<p><img src="/url/" alt="alt text" /></p>
+<p><img src="/url/" alt="alt text" title="with a title" />.</p>
+<p><img src="" alt="Empty" /></p>
+<p><img src="http://example.com/(parens).jpg" alt="this is a stupid URL" /></p>
+<p><img src="/url/" alt="alt text" /></p>
+<p><img src="/url/" alt="alt text" title="Title here" /></p>
diff --git a/oldtests/Original/Images.markdown b/oldtests/Original/Images.markdown
new file mode 100644
index 0000000..5707590
--- /dev/null
+++ b/oldtests/Original/Images.markdown
@@ -0,0 +1,26 @@
+![Alt text](/path/to/img.jpg)
+
+![Alt text](/path/to/img.jpg "Optional title")
+
+Inline within a paragraph: [alt text](/url/).
+
+![alt text](/url/ "title preceded by two spaces")
+
+![alt text](/url/ "title has spaces afterward" )
+
+![alt text](</url/>)
+
+![alt text](</url/> "with a title").
+
+![Empty]()
+
+![this is a stupid URL](http://example.com/(parens).jpg)
+
+
+![alt text][foo]
+
+ [foo]: /url/
+
+![alt text][bar]
+
+ [bar]: /url/ "Title here" \ No newline at end of file
diff --git a/oldtests/Original/Inline_HTML_Advanced.html b/oldtests/Original/Inline_HTML_Advanced.html
new file mode 100644
index 0000000..631c135
--- /dev/null
+++ b/oldtests/Original/Inline_HTML_Advanced.html
@@ -0,0 +1,23 @@
+<p>Simple block on one line:</p>
+<div>foo</div>
+<p>And nested without indentation:</p>
+<div>
+<div>
+<div>
+foo
+</div>
+<div style=">"/>
+</div>
+<div>bar</div>
+</div>
+<p>And with attributes:</p>
+<div>
+ <div id="foo">
+ </div>
+</div>
+<p>This was broken in 1.0.2b7:</p>
+<div class="inlinepage">
+<div class="toggleableend">
+foo
+</div>
+</div>
diff --git a/oldtests/Original/Inline_HTML_Advanced.markdown b/oldtests/Original/Inline_HTML_Advanced.markdown
new file mode 100644
index 0000000..3633f81
--- /dev/null
+++ b/oldtests/Original/Inline_HTML_Advanced.markdown
@@ -0,0 +1,30 @@
+Simple block on one line:
+
+<div>foo</div>
+
+And nested without indentation:
+
+<div>
+<div>
+<div>
+foo
+</div>
+<div style=">"/>
+</div>
+<div>bar</div>
+</div>
+
+And with attributes:
+
+<div>
+ <div id="foo">
+ </div>
+</div>
+
+This was broken in 1.0.2b7:
+
+<div class="inlinepage">
+<div class="toggleableend">
+foo
+</div>
+</div>
diff --git a/oldtests/Original/Inline_HTML_Simple.html b/oldtests/Original/Inline_HTML_Simple.html
new file mode 100644
index 0000000..923a18c
--- /dev/null
+++ b/oldtests/Original/Inline_HTML_Simple.html
@@ -0,0 +1,45 @@
+<p>Here's a simple block:</p>
+<div>
+ foo
+</div>
+<p>This should be a code block, though:</p>
+<pre><code>&lt;div&gt;
+ foo
+&lt;/div&gt;
+</code></pre>
+<p>As should this:</p>
+<pre><code>&lt;div&gt;foo&lt;/div&gt;
+</code></pre>
+<p>Now, nested:</p>
+<div>
+ <div>
+ <div>
+ foo
+ </div>
+ </div>
+</div>
+<p>This should just be an HTML comment:</p>
+<!-- Comment -->
+<p>Multiline:</p>
+<!--
+Blah
+Blah
+-->
+<p>Code block:</p>
+<pre><code>&lt;!-- Comment --&gt;
+</code></pre>
+<p>Just plain comment, with trailing spaces on the line:</p>
+<!-- foo -->
+<p>Code:</p>
+<pre><code>&lt;hr /&gt;
+</code></pre>
+<p>Hr's:</p>
+<hr>
+<hr/>
+<hr />
+<hr>
+<hr/>
+<hr />
+<hr class="foo" id="bar" />
+<hr class="foo" id="bar"/>
+<hr class="foo" id="bar" >
diff --git a/oldtests/Original/Inline_HTML_Simple.markdown b/oldtests/Original/Inline_HTML_Simple.markdown
new file mode 100644
index 0000000..14aa2dc
--- /dev/null
+++ b/oldtests/Original/Inline_HTML_Simple.markdown
@@ -0,0 +1,69 @@
+Here's a simple block:
+
+<div>
+ foo
+</div>
+
+This should be a code block, though:
+
+ <div>
+ foo
+ </div>
+
+As should this:
+
+ <div>foo</div>
+
+Now, nested:
+
+<div>
+ <div>
+ <div>
+ foo
+ </div>
+ </div>
+</div>
+
+This should just be an HTML comment:
+
+<!-- Comment -->
+
+Multiline:
+
+<!--
+Blah
+Blah
+-->
+
+Code block:
+
+ <!-- Comment -->
+
+Just plain comment, with trailing spaces on the line:
+
+<!-- foo -->
+
+Code:
+
+ <hr />
+
+Hr's:
+
+<hr>
+
+<hr/>
+
+<hr />
+
+<hr>
+
+<hr/>
+
+<hr />
+
+<hr class="foo" id="bar" />
+
+<hr class="foo" id="bar"/>
+
+<hr class="foo" id="bar" >
+
diff --git a/oldtests/Original/Inline_HTML_comments.html b/oldtests/Original/Inline_HTML_comments.html
new file mode 100644
index 0000000..ebc4818
--- /dev/null
+++ b/oldtests/Original/Inline_HTML_comments.html
@@ -0,0 +1,8 @@
+<p>Paragraph one.</p>
+<!-- This is a simple comment -->
+<!--
+ This is another comment.
+-->
+<p>Paragraph two.</p>
+<!-- one comment block -- -- with two comments -->
+<p>The end.</p>
diff --git a/oldtests/Original/Inline_HTML_comments.markdown b/oldtests/Original/Inline_HTML_comments.markdown
new file mode 100644
index 0000000..41d830d
--- /dev/null
+++ b/oldtests/Original/Inline_HTML_comments.markdown
@@ -0,0 +1,13 @@
+Paragraph one.
+
+<!-- This is a simple comment -->
+
+<!--
+ This is another comment.
+-->
+
+Paragraph two.
+
+<!-- one comment block -- -- with two comments -->
+
+The end.
diff --git a/oldtests/Original/Links_inline_style.html b/oldtests/Original/Links_inline_style.html
new file mode 100644
index 0000000..feb4637
--- /dev/null
+++ b/oldtests/Original/Links_inline_style.html
@@ -0,0 +1,12 @@
+<p>Just a <a href="/url/">URL</a>.</p>
+<p><a href="/url/" title="title">URL and title</a>.</p>
+<p><a href="/url/" title="title preceded by two spaces">URL and title</a>.</p>
+<p><a href="/url/" title="title preceded by a tab">URL and title</a>.</p>
+<p><a href="/url/" title="title has spaces afterward">URL and title</a>.</p>
+<p><a href="/url/">URL wrapped in angle brackets</a>.</p>
+<p><a href="/url/" title="Here's the title">URL w/ angle brackets + title</a>.</p>
+<p><a href="">Empty</a>.</p>
+<p><a href="http://en.wikipedia.org/wiki/WIMP_(computing)">With parens in the URL</a></p>
+<p>(With outer parens and <a href="/foo(bar)">parens in url</a>)</p>
+<p><a href="/foo(bar)" title="and a title">With parens in the URL</a></p>
+<p>(With outer parens and <a href="/foo(bar)" title="and a title">parens in url</a>)</p>
diff --git a/oldtests/Original/Links_inline_style.markdown b/oldtests/Original/Links_inline_style.markdown
new file mode 100644
index 0000000..aba9658
--- /dev/null
+++ b/oldtests/Original/Links_inline_style.markdown
@@ -0,0 +1,24 @@
+Just a [URL](/url/).
+
+[URL and title](/url/ "title").
+
+[URL and title](/url/ "title preceded by two spaces").
+
+[URL and title](/url/ "title preceded by a tab").
+
+[URL and title](/url/ "title has spaces afterward" ).
+
+[URL wrapped in angle brackets](</url/>).
+
+[URL w/ angle brackets + title](</url/> "Here's the title").
+
+[Empty]().
+
+[With parens in the URL](http://en.wikipedia.org/wiki/WIMP_(computing))
+
+(With outer parens and [parens in url](/foo(bar)))
+
+
+[With parens in the URL](/foo(bar) "and a title")
+
+(With outer parens and [parens in url](/foo(bar) "and a title"))
diff --git a/oldtests/Original/Links_reference_style.html b/oldtests/Original/Links_reference_style.html
new file mode 100644
index 0000000..6d78b96
--- /dev/null
+++ b/oldtests/Original/Links_reference_style.html
@@ -0,0 +1,28 @@
+<p>Foo <a href="/url/" title="Title">bar</a>.</p>
+<p>Foo <a href="/url/" title="Title">bar</a>.</p>
+<p>Foo <a href="/url/" title="Title">bar</a>.</p>
+<p>With <a href="/url/">embedded [brackets]</a>.</p>
+<p>Indented <a href="/url">once</a>.</p>
+<p>Indented <a href="/url">twice</a>.</p>
+<p>Indented <a href="/url">thrice</a>.</p>
+<p>Indented [four][] times.</p>
+<pre><code>[four]: /url
+</code></pre>
+<hr />
+<p><a href="foo">this</a> should work</p>
+<p>So should <a href="foo">this</a>.</p>
+<p>And <a href="foo">this</a>.</p>
+<p>And <a href="foo">this</a>.</p>
+<p>And <a href="foo">this</a>.</p>
+<p>But not [that] [].</p>
+<p>Nor [that][].</p>
+<p>Nor [that].</p>
+<p>[Something in brackets like <a href="foo">this</a> should work]</p>
+<p>[Same with <a href="foo">this</a>.]</p>
+<p>In this case, <a href="/somethingelse/">this</a> points to something else.</p>
+<p>Backslashing should suppress [this] and [this].</p>
+<hr />
+<p>Here's one where the <a href="/url/">link
+breaks</a> across lines.</p>
+<p>Here's another where the <a href="/url/">link
+breaks</a> across lines, but with a line-ending space.</p>
diff --git a/oldtests/Original/Links_reference_style.markdown b/oldtests/Original/Links_reference_style.markdown
new file mode 100644
index 0000000..341ec88
--- /dev/null
+++ b/oldtests/Original/Links_reference_style.markdown
@@ -0,0 +1,71 @@
+Foo [bar] [1].
+
+Foo [bar][1].
+
+Foo [bar]
+[1].
+
+[1]: /url/ "Title"
+
+
+With [embedded [brackets]] [b].
+
+
+Indented [once][].
+
+Indented [twice][].
+
+Indented [thrice][].
+
+Indented [four][] times.
+
+ [once]: /url
+
+ [twice]: /url
+
+ [thrice]: /url
+
+ [four]: /url
+
+
+[b]: /url/
+
+* * *
+
+[this] [this] should work
+
+So should [this][this].
+
+And [this] [].
+
+And [this][].
+
+And [this].
+
+But not [that] [].
+
+Nor [that][].
+
+Nor [that].
+
+[Something in brackets like [this][] should work]
+
+[Same with [this].]
+
+In this case, [this](/somethingelse/) points to something else.
+
+Backslashing should suppress \[this] and [this\].
+
+[this]: foo
+
+
+* * *
+
+Here's one where the [link
+breaks] across lines.
+
+Here's another where the [link
+breaks] across lines, but with a line-ending space.
+
+
+[link breaks]: /url/
diff --git a/oldtests/Original/Links_shortcut_references.html b/oldtests/Original/Links_shortcut_references.html
new file mode 100644
index 0000000..8163ade
--- /dev/null
+++ b/oldtests/Original/Links_shortcut_references.html
@@ -0,0 +1,6 @@
+<p>This is the <a href="/simple">simple case</a>.</p>
+<p>This one has a <a href="/foo">line
+break</a>.</p>
+<p>This one has a <a href="/foo">line
+break</a> with a line-ending space.</p>
+<p><a href="/that">this</a> and the <a href="/other">other</a></p>
diff --git a/oldtests/Original/Links_shortcut_references.markdown b/oldtests/Original/Links_shortcut_references.markdown
new file mode 100644
index 0000000..8c44c98
--- /dev/null
+++ b/oldtests/Original/Links_shortcut_references.markdown
@@ -0,0 +1,20 @@
+This is the [simple case].
+
+[simple case]: /simple
+
+
+
+This one has a [line
+break].
+
+This one has a [line
+break] with a line-ending space.
+
+[line break]: /foo
+
+
+[this] [that] and the [other]
+
+[this]: /this
+[that]: /that
+[other]: /other
diff --git a/oldtests/Original/Literal_quotes_in_titles.html b/oldtests/Original/Literal_quotes_in_titles.html
new file mode 100644
index 0000000..62e8641
--- /dev/null
+++ b/oldtests/Original/Literal_quotes_in_titles.html
@@ -0,0 +1,2 @@
+<p>Foo <a href="/url/" title="Title with &quot;quotes&quot; inside">bar</a>.</p>
+<p>Foo <a href="/url/" title="Title with &quot;quotes&quot; inside">bar</a>.</p>
diff --git a/oldtests/Original/Literal_quotes_in_titles.markdown b/oldtests/Original/Literal_quotes_in_titles.markdown
new file mode 100644
index 0000000..29d0e42
--- /dev/null
+++ b/oldtests/Original/Literal_quotes_in_titles.markdown
@@ -0,0 +1,7 @@
+Foo [bar][].
+
+Foo [bar](/url/ "Title with "quotes" inside").
+
+
+ [bar]: /url/ "Title with "quotes" inside"
+
diff --git a/oldtests/Original/Markdown_Documentation_Basics.html b/oldtests/Original/Markdown_Documentation_Basics.html
new file mode 100644
index 0000000..0dee67f
--- /dev/null
+++ b/oldtests/Original/Markdown_Documentation_Basics.html
@@ -0,0 +1,242 @@
+<h1>Markdown: Basics</h1>
+<ul id="ProjectSubmenu">
+ <li><a href="/projects/markdown/" title="Markdown Project Page">Main</a></li>
+ <li><a class="selected" title="Markdown Basics">Basics</a></li>
+ <li><a href="/projects/markdown/syntax" title="Markdown Syntax Documentation">Syntax</a></li>
+ <li><a href="/projects/markdown/license" title="Pricing and License Information">License</a></li>
+ <li><a href="/projects/markdown/dingus" title="Online Markdown Web Form">Dingus</a></li>
+</ul>
+<h2>Getting the Gist of Markdown's Formatting Syntax</h2>
+<p>This page offers a brief overview of what it's like to use Markdown.
+The <a href="/projects/markdown/syntax" title="Markdown Syntax">syntax page</a> provides complete, detailed documentation for
+every feature, but Markdown should be very easy to pick up simply by
+looking at a few examples of it in action. The examples on this page
+are written in a before/after style, showing example syntax and the
+HTML output produced by Markdown.</p>
+<p>It's also helpful to simply try Markdown out; the <a href="/projects/markdown/dingus" title="Markdown Dingus">Dingus</a> is a
+web application that allows you type your own Markdown-formatted text
+and translate it to XHTML.</p>
+<p><strong>Note:</strong> This document is itself written using Markdown; you
+can <a href="/projects/markdown/basics.text">see the source for it by adding '.text' to the URL</a>.</p>
+<h2>Paragraphs, Headers, Blockquotes</h2>
+<p>A paragraph is simply one or more consecutive lines of text, separated
+by one or more blank lines. (A blank line is any line that looks like a
+blank line -- a line containing nothing spaces or tabs is considered
+blank.) Normal paragraphs should not be intended with spaces or tabs.</p>
+<p>Markdown offers two styles of headers: <em>Setext</em> and <em>atx</em>.
+Setext-style headers for <code>&lt;h1&gt;</code> and <code>&lt;h2&gt;</code> are created by
+&quot;underlining&quot; with equal signs (<code>=</code>) and hyphens (<code>-</code>), respectively.
+To create an atx-style header, you put 1-6 hash marks (<code>#</code>) at the
+beginning of the line -- the number of hashes equals the resulting
+HTML header level.</p>
+<p>Blockquotes are indicated using email-style '<code>&gt;</code>' angle brackets.</p>
+<p>Markdown:</p>
+<pre><code>A First Level Header
+====================
+
+A Second Level Header
+---------------------
+
+Now is the time for all good men to come to
+the aid of their country. This is just a
+regular paragraph.
+
+The quick brown fox jumped over the lazy
+dog's back.
+
+### Header 3
+
+&gt; This is a blockquote.
+&gt;
+&gt; This is the second paragraph in the blockquote.
+&gt;
+&gt; ## This is an H2 in a blockquote
+</code></pre>
+<p>Output:</p>
+<pre><code>&lt;h1&gt;A First Level Header&lt;/h1&gt;
+
+&lt;h2&gt;A Second Level Header&lt;/h2&gt;
+
+&lt;p&gt;Now is the time for all good men to come to
+the aid of their country. This is just a
+regular paragraph.&lt;/p&gt;
+
+&lt;p&gt;The quick brown fox jumped over the lazy
+dog's back.&lt;/p&gt;
+
+&lt;h3&gt;Header 3&lt;/h3&gt;
+
+&lt;blockquote&gt;
+ &lt;p&gt;This is a blockquote.&lt;/p&gt;
+
+ &lt;p&gt;This is the second paragraph in the blockquote.&lt;/p&gt;
+
+ &lt;h2&gt;This is an H2 in a blockquote&lt;/h2&gt;
+&lt;/blockquote&gt;
+</code></pre>
+<h3>Phrase Emphasis</h3>
+<p>Markdown uses asterisks and underscores to indicate spans of emphasis.</p>
+<p>Markdown:</p>
+<pre><code>Some of these words *are emphasized*.
+Some of these words _are emphasized also_.
+
+Use two asterisks for **strong emphasis**.
+Or, if you prefer, __use two underscores instead__.
+</code></pre>
+<p>Output:</p>
+<pre><code>&lt;p&gt;Some of these words &lt;em&gt;are emphasized&lt;/em&gt;.
+Some of these words &lt;em&gt;are emphasized also&lt;/em&gt;.&lt;/p&gt;
+
+&lt;p&gt;Use two asterisks for &lt;strong&gt;strong emphasis&lt;/strong&gt;.
+Or, if you prefer, &lt;strong&gt;use two underscores instead&lt;/strong&gt;.&lt;/p&gt;
+</code></pre>
+<h2>Lists</h2>
+<p>Unordered (bulleted) lists use asterisks, pluses, and hyphens (<code>*</code>,
+<code>+</code>, and <code>-</code>) as list markers. These three markers are
+interchangable; this:</p>
+<pre><code>* Candy.
+* Gum.
+* Booze.
+</code></pre>
+<p>this:</p>
+<pre><code>+ Candy.
++ Gum.
++ Booze.
+</code></pre>
+<p>and this:</p>
+<pre><code>- Candy.
+- Gum.
+- Booze.
+</code></pre>
+<p>all produce the same output:</p>
+<pre><code>&lt;ul&gt;
+&lt;li&gt;Candy.&lt;/li&gt;
+&lt;li&gt;Gum.&lt;/li&gt;
+&lt;li&gt;Booze.&lt;/li&gt;
+&lt;/ul&gt;
+</code></pre>
+<p>Ordered (numbered) lists use regular numbers, followed by periods, as
+list markers:</p>
+<pre><code>1. Red
+2. Green
+3. Blue
+</code></pre>
+<p>Output:</p>
+<pre><code>&lt;ol&gt;
+&lt;li&gt;Red&lt;/li&gt;
+&lt;li&gt;Green&lt;/li&gt;
+&lt;li&gt;Blue&lt;/li&gt;
+&lt;/ol&gt;
+</code></pre>
+<p>If you put blank lines between items, you'll get <code>&lt;p&gt;</code> tags for the
+list item text. You can create multi-paragraph list items by indenting
+the paragraphs by 4 spaces or 1 tab:</p>
+<pre><code>* A list item.
+
+ With multiple paragraphs.
+
+* Another item in the list.
+</code></pre>
+<p>Output:</p>
+<pre><code>&lt;ul&gt;
+&lt;li&gt;&lt;p&gt;A list item.&lt;/p&gt;
+&lt;p&gt;With multiple paragraphs.&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;Another item in the list.&lt;/p&gt;&lt;/li&gt;
+&lt;/ul&gt;
+</code></pre>
+<h3>Links</h3>
+<p>Markdown supports two styles for creating links: <em>inline</em> and
+<em>reference</em>. With both styles, you use square brackets to delimit the
+text you want to turn into a link.</p>
+<p>Inline-style links use parentheses immediately after the link text.
+For example:</p>
+<pre><code>This is an [example link](http://example.com/).
+</code></pre>
+<p>Output:</p>
+<pre><code>&lt;p&gt;This is an &lt;a href=&quot;http://example.com/&quot;&gt;
+example link&lt;/a&gt;.&lt;/p&gt;
+</code></pre>
+<p>Optionally, you may include a title attribute in the parentheses:</p>
+<pre><code>This is an [example link](http://example.com/ &quot;With a Title&quot;).
+</code></pre>
+<p>Output:</p>
+<pre><code>&lt;p&gt;This is an &lt;a href=&quot;http://example.com/&quot; title=&quot;With a Title&quot;&gt;
+example link&lt;/a&gt;.&lt;/p&gt;
+</code></pre>
+<p>Reference-style links allow you to refer to your links by names, which
+you define elsewhere in your document:</p>
+<pre><code>I get 10 times more traffic from [Google][1] than from
+[Yahoo][2] or [MSN][3].
+
+[1]: http://google.com/ &quot;Google&quot;
+[2]: http://search.yahoo.com/ &quot;Yahoo Search&quot;
+[3]: http://search.msn.com/ &quot;MSN Search&quot;
+</code></pre>
+<p>Output:</p>
+<pre><code>&lt;p&gt;I get 10 times more traffic from &lt;a href=&quot;http://google.com/&quot;
+title=&quot;Google&quot;&gt;Google&lt;/a&gt; than from &lt;a href=&quot;http://search.yahoo.com/&quot;
+title=&quot;Yahoo Search&quot;&gt;Yahoo&lt;/a&gt; or &lt;a href=&quot;http://search.msn.com/&quot;
+title=&quot;MSN Search&quot;&gt;MSN&lt;/a&gt;.&lt;/p&gt;
+</code></pre>
+<p>The title attribute is optional. Link names may contain letters,
+numbers and spaces, but are <em>not</em> case sensitive:</p>
+<pre><code>I start my morning with a cup of coffee and
+[The New York Times][NY Times].
+
+[ny times]: http://www.nytimes.com/
+</code></pre>
+<p>Output:</p>
+<pre><code>&lt;p&gt;I start my morning with a cup of coffee and
+&lt;a href=&quot;http://www.nytimes.com/&quot;&gt;The New York Times&lt;/a&gt;.&lt;/p&gt;
+</code></pre>
+<h3>Images</h3>
+<p>Image syntax is very much like link syntax.</p>
+<p>Inline (titles are optional):</p>
+<pre><code>![alt text](/path/to/img.jpg &quot;Title&quot;)
+</code></pre>
+<p>Reference-style:</p>
+<pre><code>![alt text][id]
+
+[id]: /path/to/img.jpg &quot;Title&quot;
+</code></pre>
+<p>Both of the above examples produce the same output:</p>
+<pre><code>&lt;img src=&quot;/path/to/img.jpg&quot; alt=&quot;alt text&quot; title=&quot;Title&quot; /&gt;
+</code></pre>
+<h3>Code</h3>
+<p>In a regular paragraph, you can create code span by wrapping text in
+backtick quotes. Any ampersands (<code>&amp;</code>) and angle brackets (<code>&lt;</code> or
+<code>&gt;</code>) will automatically be translated into HTML entities. This makes
+it easy to use Markdown to write about HTML example code:</p>
+<pre><code>I strongly recommend against using any `&lt;blink&gt;` tags.
+
+I wish SmartyPants used named entities like `&amp;mdash;`
+instead of decimal-encoded entites like `&amp;#8212;`.
+</code></pre>
+<p>Output:</p>
+<pre><code>&lt;p&gt;I strongly recommend against using any
+&lt;code&gt;&amp;lt;blink&amp;gt;&lt;/code&gt; tags.&lt;/p&gt;
+
+&lt;p&gt;I wish SmartyPants used named entities like
+&lt;code&gt;&amp;amp;mdash;&lt;/code&gt; instead of decimal-encoded
+entites like &lt;code&gt;&amp;amp;#8212;&lt;/code&gt;.&lt;/p&gt;
+</code></pre>
+<p>To specify an entire block of pre-formatted code, indent every line of
+the block by 4 spaces or 1 tab. Just like with code spans, <code>&amp;</code>, <code>&lt;</code>,
+and <code>&gt;</code> characters will be escaped automatically.</p>
+<p>Markdown:</p>
+<pre><code>If you want your page to validate under XHTML 1.0 Strict,
+you've got to put paragraph tags in your blockquotes:
+
+ &lt;blockquote&gt;
+ &lt;p&gt;For example.&lt;/p&gt;
+ &lt;/blockquote&gt;
+</code></pre>
+<p>Output:</p>
+<pre><code>&lt;p&gt;If you want your page to validate under XHTML 1.0 Strict,
+you've got to put paragraph tags in your blockquotes:&lt;/p&gt;
+
+&lt;pre&gt;&lt;code&gt;&amp;lt;blockquote&amp;gt;
+ &amp;lt;p&amp;gt;For example.&amp;lt;/p&amp;gt;
+&amp;lt;/blockquote&amp;gt;
+&lt;/code&gt;&lt;/pre&gt;
+</code></pre>
diff --git a/oldtests/Original/Markdown_Documentation_Basics.markdown b/oldtests/Original/Markdown_Documentation_Basics.markdown
new file mode 100644
index 0000000..24eba65
--- /dev/null
+++ b/oldtests/Original/Markdown_Documentation_Basics.markdown
@@ -0,0 +1,306 @@
+Markdown: Basics
+================
+
+<ul id="ProjectSubmenu">
+ <li><a href="/projects/markdown/" title="Markdown Project Page">Main</a></li>
+ <li><a class="selected" title="Markdown Basics">Basics</a></li>
+ <li><a href="/projects/markdown/syntax" title="Markdown Syntax Documentation">Syntax</a></li>
+ <li><a href="/projects/markdown/license" title="Pricing and License Information">License</a></li>
+ <li><a href="/projects/markdown/dingus" title="Online Markdown Web Form">Dingus</a></li>
+</ul>
+
+
+Getting the Gist of Markdown's Formatting Syntax
+------------------------------------------------
+
+This page offers a brief overview of what it's like to use Markdown.
+The [syntax page] [s] provides complete, detailed documentation for
+every feature, but Markdown should be very easy to pick up simply by
+looking at a few examples of it in action. The examples on this page
+are written in a before/after style, showing example syntax and the
+HTML output produced by Markdown.
+
+It's also helpful to simply try Markdown out; the [Dingus] [d] is a
+web application that allows you type your own Markdown-formatted text
+and translate it to XHTML.
+
+**Note:** This document is itself written using Markdown; you
+can [see the source for it by adding '.text' to the URL] [src].
+
+ [s]: /projects/markdown/syntax "Markdown Syntax"
+ [d]: /projects/markdown/dingus "Markdown Dingus"
+ [src]: /projects/markdown/basics.text
+
+
+## Paragraphs, Headers, Blockquotes ##
+
+A paragraph is simply one or more consecutive lines of text, separated
+by one or more blank lines. (A blank line is any line that looks like a
+blank line -- a line containing nothing spaces or tabs is considered
+blank.) Normal paragraphs should not be intended with spaces or tabs.
+
+Markdown offers two styles of headers: *Setext* and *atx*.
+Setext-style headers for `<h1>` and `<h2>` are created by
+"underlining" with equal signs (`=`) and hyphens (`-`), respectively.
+To create an atx-style header, you put 1-6 hash marks (`#`) at the
+beginning of the line -- the number of hashes equals the resulting
+HTML header level.
+
+Blockquotes are indicated using email-style '`>`' angle brackets.
+
+Markdown:
+
+ A First Level Header
+ ====================
+
+ A Second Level Header
+ ---------------------
+
+ Now is the time for all good men to come to
+ the aid of their country. This is just a
+ regular paragraph.
+
+ The quick brown fox jumped over the lazy
+ dog's back.
+
+ ### Header 3
+
+ > This is a blockquote.
+ >
+ > This is the second paragraph in the blockquote.
+ >
+ > ## This is an H2 in a blockquote
+
+
+Output:
+
+ <h1>A First Level Header</h1>
+
+ <h2>A Second Level Header</h2>
+
+ <p>Now is the time for all good men to come to
+ the aid of their country. This is just a
+ regular paragraph.</p>
+
+ <p>The quick brown fox jumped over the lazy
+ dog's back.</p>
+
+ <h3>Header 3</h3>
+
+ <blockquote>
+ <p>This is a blockquote.</p>
+
+ <p>This is the second paragraph in the blockquote.</p>
+
+ <h2>This is an H2 in a blockquote</h2>
+ </blockquote>
+
+
+
+### Phrase Emphasis ###
+
+Markdown uses asterisks and underscores to indicate spans of emphasis.
+
+Markdown:
+
+ Some of these words *are emphasized*.
+ Some of these words _are emphasized also_.
+
+ Use two asterisks for **strong emphasis**.
+ Or, if you prefer, __use two underscores instead__.
+
+Output:
+
+ <p>Some of these words <em>are emphasized</em>.
+ Some of these words <em>are emphasized also</em>.</p>
+
+ <p>Use two asterisks for <strong>strong emphasis</strong>.
+ Or, if you prefer, <strong>use two underscores instead</strong>.</p>
+
+
+
+## Lists ##
+
+Unordered (bulleted) lists use asterisks, pluses, and hyphens (`*`,
+`+`, and `-`) as list markers. These three markers are
+interchangable; this:
+
+ * Candy.
+ * Gum.
+ * Booze.
+
+this:
+
+ + Candy.
+ + Gum.
+ + Booze.
+
+and this:
+
+ - Candy.
+ - Gum.
+ - Booze.
+
+all produce the same output:
+
+ <ul>
+ <li>Candy.</li>
+ <li>Gum.</li>
+ <li>Booze.</li>
+ </ul>
+
+Ordered (numbered) lists use regular numbers, followed by periods, as
+list markers:
+
+ 1. Red
+ 2. Green
+ 3. Blue
+
+Output:
+
+ <ol>
+ <li>Red</li>
+ <li>Green</li>
+ <li>Blue</li>
+ </ol>
+
+If you put blank lines between items, you'll get `<p>` tags for the
+list item text. You can create multi-paragraph list items by indenting
+the paragraphs by 4 spaces or 1 tab:
+
+ * A list item.
+
+ With multiple paragraphs.
+
+ * Another item in the list.
+
+Output:
+
+ <ul>
+ <li><p>A list item.</p>
+ <p>With multiple paragraphs.</p></li>
+ <li><p>Another item in the list.</p></li>
+ </ul>
+
+
+
+### Links ###
+
+Markdown supports two styles for creating links: *inline* and
+*reference*. With both styles, you use square brackets to delimit the
+text you want to turn into a link.
+
+Inline-style links use parentheses immediately after the link text.
+For example:
+
+ This is an [example link](http://example.com/).
+
+Output:
+
+ <p>This is an <a href="http://example.com/">
+ example link</a>.</p>
+
+Optionally, you may include a title attribute in the parentheses:
+
+ This is an [example link](http://example.com/ "With a Title").
+
+Output:
+
+ <p>This is an <a href="http://example.com/" title="With a Title">
+ example link</a>.</p>
+
+Reference-style links allow you to refer to your links by names, which
+you define elsewhere in your document:
+
+ I get 10 times more traffic from [Google][1] than from
+ [Yahoo][2] or [MSN][3].
+
+ [1]: http://google.com/ "Google"
+ [2]: http://search.yahoo.com/ "Yahoo Search"
+ [3]: http://search.msn.com/ "MSN Search"
+
+Output:
+
+ <p>I get 10 times more traffic from <a href="http://google.com/"
+ title="Google">Google</a> than from <a href="http://search.yahoo.com/"
+ title="Yahoo Search">Yahoo</a> or <a href="http://search.msn.com/"
+ title="MSN Search">MSN</a>.</p>
+
+The title attribute is optional. Link names may contain letters,
+numbers and spaces, but are *not* case sensitive:
+
+ I start my morning with a cup of coffee and
+ [The New York Times][NY Times].
+
+ [ny times]: http://www.nytimes.com/
+
+Output:
+
+ <p>I start my morning with a cup of coffee and
+ <a href="http://www.nytimes.com/">The New York Times</a>.</p>
+
+
+### Images ###
+
+Image syntax is very much like link syntax.
+
+Inline (titles are optional):
+
+ ![alt text](/path/to/img.jpg "Title")
+
+Reference-style:
+
+ ![alt text][id]
+
+ [id]: /path/to/img.jpg "Title"
+
+Both of the above examples produce the same output:
+
+ <img src="/path/to/img.jpg" alt="alt text" title="Title" />
+
+
+
+### Code ###
+
+In a regular paragraph, you can create code span by wrapping text in
+backtick quotes. Any ampersands (`&`) and angle brackets (`<` or
+`>`) will automatically be translated into HTML entities. This makes
+it easy to use Markdown to write about HTML example code:
+
+ I strongly recommend against using any `<blink>` tags.
+
+ I wish SmartyPants used named entities like `&mdash;`
+ instead of decimal-encoded entites like `&#8212;`.
+
+Output:
+
+ <p>I strongly recommend against using any
+ <code>&lt;blink&gt;</code> tags.</p>
+
+ <p>I wish SmartyPants used named entities like
+ <code>&amp;mdash;</code> instead of decimal-encoded
+ entites like <code>&amp;#8212;</code>.</p>
+
+
+To specify an entire block of pre-formatted code, indent every line of
+the block by 4 spaces or 1 tab. Just like with code spans, `&`, `<`,
+and `>` characters will be escaped automatically.
+
+Markdown:
+
+ If you want your page to validate under XHTML 1.0 Strict,
+ you've got to put paragraph tags in your blockquotes:
+
+ <blockquote>
+ <p>For example.</p>
+ </blockquote>
+
+Output:
+
+ <p>If you want your page to validate under XHTML 1.0 Strict,
+ you've got to put paragraph tags in your blockquotes:</p>
+
+ <pre><code>&lt;blockquote&gt;
+ &lt;p&gt;For example.&lt;/p&gt;
+ &lt;/blockquote&gt;
+ </code></pre>
diff --git a/oldtests/Original/Markdown_Documentation_Syntax.html b/oldtests/Original/Markdown_Documentation_Syntax.html
new file mode 100644
index 0000000..f379dcf
--- /dev/null
+++ b/oldtests/Original/Markdown_Documentation_Syntax.html
@@ -0,0 +1,708 @@
+<h1>Markdown: Syntax</h1>
+<ul id="ProjectSubmenu">
+ <li><a href="/projects/markdown/" title="Markdown Project Page">Main</a></li>
+ <li><a href="/projects/markdown/basics" title="Markdown Basics">Basics</a></li>
+ <li><a class="selected" title="Markdown Syntax Documentation">Syntax</a></li>
+ <li><a href="/projects/markdown/license" title="Pricing and License Information">License</a></li>
+ <li><a href="/projects/markdown/dingus" title="Online Markdown Web Form">Dingus</a></li>
+</ul>
+<ul>
+<li><a href="#overview">Overview</a>
+<ul>
+<li><a href="#philosophy">Philosophy</a></li>
+<li><a href="#html">Inline HTML</a></li>
+<li><a href="#autoescape">Automatic Escaping for Special Characters</a></li>
+</ul></li>
+<li><a href="#block">Block Elements</a>
+<ul>
+<li><a href="#p">Paragraphs and Line Breaks</a></li>
+<li><a href="#header">Headers</a></li>
+<li><a href="#blockquote">Blockquotes</a></li>
+<li><a href="#list">Lists</a></li>
+<li><a href="#precode">Code Blocks</a></li>
+<li><a href="#hr">Horizontal Rules</a></li>
+</ul></li>
+<li><a href="#span">Span Elements</a>
+<ul>
+<li><a href="#link">Links</a></li>
+<li><a href="#em">Emphasis</a></li>
+<li><a href="#code">Code</a></li>
+<li><a href="#img">Images</a></li>
+</ul></li>
+<li><a href="#misc">Miscellaneous</a>
+<ul>
+<li><a href="#backslash">Backslash Escapes</a></li>
+<li><a href="#autolink">Automatic Links</a></li>
+</ul></li>
+</ul>
+<p><strong>Note:</strong> This document is itself written using Markdown; you
+can <a href="/projects/markdown/syntax.text">see the source for it by adding '.text' to the URL</a>.</p>
+<hr />
+<h2 id="overview">Overview</h2>
+<h3 id="philosophy">Philosophy</h3>
+<p>Markdown is intended to be as easy-to-read and easy-to-write as is feasible.</p>
+<p>Readability, however, is emphasized above all else. A Markdown-formatted
+document should be publishable as-is, as plain text, without looking
+like it's been marked up with tags or formatting instructions. While
+Markdown's syntax has been influenced by several existing text-to-HTML
+filters -- including <a href="http://docutils.sourceforge.net/mirror/setext.html">Setext</a>, <a href="http://www.aaronsw.com/2002/atx/">atx</a>, <a href="http://textism.com/tools/textile/">Textile</a>, <a href="http://docutils.sourceforge.net/rst.html">reStructuredText</a>,
+<a href="http://www.triptico.com/software/grutatxt.html">Grutatext</a>, and <a href="http://ettext.taint.org/doc/">EtText</a> -- the single biggest source of
+inspiration for Markdown's syntax is the format of plain text email.</p>
+<p>To this end, Markdown's syntax is comprised entirely of punctuation
+characters, which punctuation characters have been carefully chosen so
+as to look like what they mean. E.g., asterisks around a word actually
+look like *emphasis*. Markdown lists look like, well, lists. Even
+blockquotes look like quoted passages of text, assuming you've ever
+used email.</p>
+<h3 id="html">Inline HTML</h3>
+<p>Markdown's syntax is intended for one purpose: to be used as a
+format for <em>writing</em> for the web.</p>
+<p>Markdown is not a replacement for HTML, or even close to it. Its
+syntax is very small, corresponding only to a very small subset of
+HTML tags. The idea is <em>not</em> to create a syntax that makes it easier
+to insert HTML tags. In my opinion, HTML tags are already easy to
+insert. The idea for Markdown is to make it easy to read, write, and
+edit prose. HTML is a <em>publishing</em> format; Markdown is a <em>writing</em>
+format. Thus, Markdown's formatting syntax only addresses issues that
+can be conveyed in plain text.</p>
+<p>For any markup that is not covered by Markdown's syntax, you simply
+use HTML itself. There's no need to preface it or delimit it to
+indicate that you're switching from Markdown to HTML; you just use
+the tags.</p>
+<p>The only restrictions are that block-level HTML elements -- e.g. <code>&lt;div&gt;</code>,
+<code>&lt;table&gt;</code>, <code>&lt;pre&gt;</code>, <code>&lt;p&gt;</code>, etc. -- must be separated from surrounding
+content by blank lines, and the start and end tags of the block should
+not be indented with tabs or spaces. Markdown is smart enough not
+to add extra (unwanted) <code>&lt;p&gt;</code> tags around HTML block-level tags.</p>
+<p>For example, to add an HTML table to a Markdown article:</p>
+<pre><code>This is a regular paragraph.
+
+&lt;table&gt;
+ &lt;tr&gt;
+ &lt;td&gt;Foo&lt;/td&gt;
+ &lt;/tr&gt;
+&lt;/table&gt;
+
+This is another regular paragraph.
+</code></pre>
+<p>Note that Markdown formatting syntax is not processed within block-level
+HTML tags. E.g., you can't use Markdown-style <code>*emphasis*</code> inside an
+HTML block.</p>
+<p>Span-level HTML tags -- e.g. <code>&lt;span&gt;</code>, <code>&lt;cite&gt;</code>, or <code>&lt;del&gt;</code> -- can be
+used anywhere in a Markdown paragraph, list item, or header. If you
+want, you can even use HTML tags instead of Markdown formatting; e.g. if
+you'd prefer to use HTML <code>&lt;a&gt;</code> or <code>&lt;img&gt;</code> tags instead of Markdown's
+link or image syntax, go right ahead.</p>
+<p>Unlike block-level HTML tags, Markdown syntax <em>is</em> processed within
+span-level tags.</p>
+<h3 id="autoescape">Automatic Escaping for Special Characters</h3>
+<p>In HTML, there are two characters that demand special treatment: <code>&lt;</code>
+and <code>&amp;</code>. Left angle brackets are used to start tags; ampersands are
+used to denote HTML entities. If you want to use them as literal
+characters, you must escape them as entities, e.g. <code>&amp;lt;</code>, and
+<code>&amp;amp;</code>.</p>
+<p>Ampersands in particular are bedeviling for web writers. If you want to
+write about 'AT&amp;T', you need to write '<code>AT&amp;amp;T</code>'. You even need to
+escape ampersands within URLs. Thus, if you want to link to:</p>
+<pre><code>http://images.google.com/images?num=30&amp;q=larry+bird
+</code></pre>
+<p>you need to encode the URL as:</p>
+<pre><code>http://images.google.com/images?num=30&amp;amp;q=larry+bird
+</code></pre>
+<p>in your anchor tag <code>href</code> attribute. Needless to say, this is easy to
+forget, and is probably the single most common source of HTML validation
+errors in otherwise well-marked-up web sites.</p>
+<p>Markdown allows you to use these characters naturally, taking care of
+all the necessary escaping for you. If you use an ampersand as part of
+an HTML entity, it remains unchanged; otherwise it will be translated
+into <code>&amp;amp;</code>.</p>
+<p>So, if you want to include a copyright symbol in your article, you can write:</p>
+<pre><code>&amp;copy;
+</code></pre>
+<p>and Markdown will leave it alone. But if you write:</p>
+<pre><code>AT&amp;T
+</code></pre>
+<p>Markdown will translate it to:</p>
+<pre><code>AT&amp;amp;T
+</code></pre>
+<p>Similarly, because Markdown supports <a href="#html">inline HTML</a>, if you use
+angle brackets as delimiters for HTML tags, Markdown will treat them as
+such. But if you write:</p>
+<pre><code>4 &lt; 5
+</code></pre>
+<p>Markdown will translate it to:</p>
+<pre><code>4 &amp;lt; 5
+</code></pre>
+<p>However, inside Markdown code spans and blocks, angle brackets and
+ampersands are <em>always</em> encoded automatically. This makes it easy to use
+Markdown to write about HTML code. (As opposed to raw HTML, which is a
+terrible format for writing about HTML syntax, because every single <code>&lt;</code>
+and <code>&amp;</code> in your example code needs to be escaped.)</p>
+<hr />
+<h2 id="block">Block Elements</h2>
+<h3 id="p">Paragraphs and Line Breaks</h3>
+<p>A paragraph is simply one or more consecutive lines of text, separated
+by one or more blank lines. (A blank line is any line that looks like a
+blank line -- a line containing nothing but spaces or tabs is considered
+blank.) Normal paragraphs should not be intended with spaces or tabs.</p>
+<p>The implication of the &quot;one or more consecutive lines of text&quot; rule is
+that Markdown supports &quot;hard-wrapped&quot; text paragraphs. This differs
+significantly from most other text-to-HTML formatters (including Movable
+Type's &quot;Convert Line Breaks&quot; option) which translate every line break
+character in a paragraph into a <code>&lt;br /&gt;</code> tag.</p>
+<p>When you <em>do</em> want to insert a <code>&lt;br /&gt;</code> break tag using Markdown, you
+end a line with two or more spaces, then type return.</p>
+<p>Yes, this takes a tad more effort to create a <code>&lt;br /&gt;</code>, but a simplistic
+&quot;every line break is a <code>&lt;br /&gt;</code>&quot; rule wouldn't work for Markdown.
+Markdown's email-style <a href="#blockquote">blockquoting</a> and multi-paragraph <a href="#list">list items</a>
+work best -- and look better -- when you format them with hard breaks.</p>
+<h3 id="header">Headers</h3>
+<p>Markdown supports two styles of headers, <a href="http://docutils.sourceforge.net/mirror/setext.html">Setext</a> and <a href="http://www.aaronsw.com/2002/atx/">atx</a>.</p>
+<p>Setext-style headers are &quot;underlined&quot; using equal signs (for first-level
+headers) and dashes (for second-level headers). For example:</p>
+<pre><code>This is an H1
+=============
+
+This is an H2
+-------------
+</code></pre>
+<p>Any number of underlining <code>=</code>'s or <code>-</code>'s will work.</p>
+<p>Atx-style headers use 1-6 hash characters at the start of the line,
+corresponding to header levels 1-6. For example:</p>
+<pre><code># This is an H1
+
+## This is an H2
+
+###### This is an H6
+</code></pre>
+<p>Optionally, you may &quot;close&quot; atx-style headers. This is purely
+cosmetic -- you can use this if you think it looks better. The
+closing hashes don't even need to match the number of hashes
+used to open the header. (The number of opening hashes
+determines the header level.) :</p>
+<pre><code># This is an H1 #
+
+## This is an H2 ##
+
+### This is an H3 ######
+</code></pre>
+<h3 id="blockquote">Blockquotes</h3>
+<p>Markdown uses email-style <code>&gt;</code> characters for blockquoting. If you're
+familiar with quoting passages of text in an email message, then you
+know how to create a blockquote in Markdown. It looks best if you hard
+wrap the text and put a <code>&gt;</code> before every line:</p>
+<pre><code>&gt; This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
+&gt; consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
+&gt; Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
+&gt;
+&gt; Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
+&gt; id sem consectetuer libero luctus adipiscing.
+</code></pre>
+<p>Markdown allows you to be lazy and only put the <code>&gt;</code> before the first
+line of a hard-wrapped paragraph:</p>
+<pre><code>&gt; This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
+consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
+Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
+
+&gt; Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
+id sem consectetuer libero luctus adipiscing.
+</code></pre>
+<p>Blockquotes can be nested (i.e. a blockquote-in-a-blockquote) by
+adding additional levels of <code>&gt;</code>:</p>
+<pre><code>&gt; This is the first level of quoting.
+&gt;
+&gt; &gt; This is nested blockquote.
+&gt;
+&gt; Back to the first level.
+</code></pre>
+<p>Blockquotes can contain other Markdown elements, including headers, lists,
+and code blocks:</p>
+<pre><code>&gt; ## This is a header.
+&gt;
+&gt; 1. This is the first list item.
+&gt; 2. This is the second list item.
+&gt;
+&gt; Here's some example code:
+&gt;
+&gt; return shell_exec(&quot;echo $input | $markdown_script&quot;);
+</code></pre>
+<p>Any decent text editor should make email-style quoting easy. For
+example, with BBEdit, you can make a selection and choose Increase
+Quote Level from the Text menu.</p>
+<h3 id="list">Lists</h3>
+<p>Markdown supports ordered (numbered) and unordered (bulleted) lists.</p>
+<p>Unordered lists use asterisks, pluses, and hyphens -- interchangably
+-- as list markers:</p>
+<pre><code>* Red
+* Green
+* Blue
+</code></pre>
+<p>is equivalent to:</p>
+<pre><code>+ Red
++ Green
++ Blue
+</code></pre>
+<p>and:</p>
+<pre><code>- Red
+- Green
+- Blue
+</code></pre>
+<p>Ordered lists use numbers followed by periods:</p>
+<pre><code>1. Bird
+2. McHale
+3. Parish
+</code></pre>
+<p>It's important to note that the actual numbers you use to mark the
+list have no effect on the HTML output Markdown produces. The HTML
+Markdown produces from the above list is:</p>
+<pre><code>&lt;ol&gt;
+&lt;li&gt;Bird&lt;/li&gt;
+&lt;li&gt;McHale&lt;/li&gt;
+&lt;li&gt;Parish&lt;/li&gt;
+&lt;/ol&gt;
+</code></pre>
+<p>If you instead wrote the list in Markdown like this:</p>
+<pre><code>1. Bird
+1. McHale
+1. Parish
+</code></pre>
+<p>or even:</p>
+<pre><code>3. Bird
+1. McHale
+8. Parish
+</code></pre>
+<p>you'd get the exact same HTML output. The point is, if you want to,
+you can use ordinal numbers in your ordered Markdown lists, so that
+the numbers in your source match the numbers in your published HTML.
+But if you want to be lazy, you don't have to.</p>
+<p>If you do use lazy list numbering, however, you should still start the
+list with the number 1. At some point in the future, Markdown may support
+starting ordered lists at an arbitrary number.</p>
+<p>List markers typically start at the left margin, but may be indented by
+up to three spaces. List markers must be followed by one or more spaces
+or a tab.</p>
+<p>To make lists look nice, you can wrap items with hanging indents:</p>
+<pre><code>* Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
+ Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
+ viverra nec, fringilla in, laoreet vitae, risus.
+* Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
+ Suspendisse id sem consectetuer libero luctus adipiscing.
+</code></pre>
+<p>But if you want to be lazy, you don't have to:</p>
+<pre><code>* Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
+Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
+viverra nec, fringilla in, laoreet vitae, risus.
+* Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
+Suspendisse id sem consectetuer libero luctus adipiscing.
+</code></pre>
+<p>If list items are separated by blank lines, Markdown will wrap the
+items in <code>&lt;p&gt;</code> tags in the HTML output. For example, this input:</p>
+<pre><code>* Bird
+* Magic
+</code></pre>
+<p>will turn into:</p>
+<pre><code>&lt;ul&gt;
+&lt;li&gt;Bird&lt;/li&gt;
+&lt;li&gt;Magic&lt;/li&gt;
+&lt;/ul&gt;
+</code></pre>
+<p>But this:</p>
+<pre><code>* Bird
+
+* Magic
+</code></pre>
+<p>will turn into:</p>
+<pre><code>&lt;ul&gt;
+&lt;li&gt;&lt;p&gt;Bird&lt;/p&gt;&lt;/li&gt;
+&lt;li&gt;&lt;p&gt;Magic&lt;/p&gt;&lt;/li&gt;
+&lt;/ul&gt;
+</code></pre>
+<p>List items may consist of multiple paragraphs. Each subsequent
+paragraph in a list item must be intended by either 4 spaces
+or one tab:</p>
+<pre><code>1. This is a list item with two paragraphs. Lorem ipsum dolor
+ sit amet, consectetuer adipiscing elit. Aliquam hendrerit
+ mi posuere lectus.
+
+ Vestibulum enim wisi, viverra nec, fringilla in, laoreet
+ vitae, risus. Donec sit amet nisl. Aliquam semper ipsum
+ sit amet velit.
+
+2. Suspendisse id sem consectetuer libero luctus adipiscing.
+</code></pre>
+<p>It looks nice if you indent every line of the subsequent
+paragraphs, but here again, Markdown will allow you to be
+lazy:</p>
+<pre><code>* This is a list item with two paragraphs.
+
+ This is the second paragraph in the list item. You're
+only required to indent the first line. Lorem ipsum dolor
+sit amet, consectetuer adipiscing elit.
+
+* Another item in the same list.
+</code></pre>
+<p>To put a blockquote within a list item, the blockquote's <code>&gt;</code>
+delimiters need to be indented:</p>
+<pre><code>* A list item with a blockquote:
+
+ &gt; This is a blockquote
+ &gt; inside a list item.
+</code></pre>
+<p>To put a code block within a list item, the code block needs
+to be indented <em>twice</em> -- 8 spaces or two tabs:</p>
+<pre><code>* A list item with a code block:
+
+ &lt;code goes here&gt;
+</code></pre>
+<p>It's worth noting that it's possible to trigger an ordered list by
+accident, by writing something like this:</p>
+<pre><code>1986. What a great season.
+</code></pre>
+<p>In other words, a <em>number-period-space</em> sequence at the beginning of a
+line. To avoid this, you can backslash-escape the period:</p>
+<pre><code>1986\. What a great season.
+</code></pre>
+<h3 id="precode">Code Blocks</h3>
+<p>Pre-formatted code blocks are used for writing about programming or
+markup source code. Rather than forming normal paragraphs, the lines
+of a code block are interpreted literally. Markdown wraps a code block
+in both <code>&lt;pre&gt;</code> and <code>&lt;code&gt;</code> tags.</p>
+<p>To produce a code block in Markdown, simply indent every line of the
+block by at least 4 spaces or 1 tab. For example, given this input:</p>
+<pre><code>This is a normal paragraph:
+
+ This is a code block.
+</code></pre>
+<p>Markdown will generate:</p>
+<pre><code>&lt;p&gt;This is a normal paragraph:&lt;/p&gt;
+
+&lt;pre&gt;&lt;code&gt;This is a code block.
+&lt;/code&gt;&lt;/pre&gt;
+</code></pre>
+<p>One level of indentation -- 4 spaces or 1 tab -- is removed from each
+line of the code block. For example, this:</p>
+<pre><code>Here is an example of AppleScript:
+
+ tell application &quot;Foo&quot;
+ beep
+ end tell
+</code></pre>
+<p>will turn into:</p>
+<pre><code>&lt;p&gt;Here is an example of AppleScript:&lt;/p&gt;
+
+&lt;pre&gt;&lt;code&gt;tell application &quot;Foo&quot;
+ beep
+end tell
+&lt;/code&gt;&lt;/pre&gt;
+</code></pre>
+<p>A code block continues until it reaches a line that is not indented
+(or the end of the article).</p>
+<p>Within a code block, ampersands (<code>&amp;</code>) and angle brackets (<code>&lt;</code> and <code>&gt;</code>)
+are automatically converted into HTML entities. This makes it very
+easy to include example HTML source code using Markdown -- just paste
+it and indent it, and Markdown will handle the hassle of encoding the
+ampersands and angle brackets. For example, this:</p>
+<pre><code> &lt;div class=&quot;footer&quot;&gt;
+ &amp;copy; 2004 Foo Corporation
+ &lt;/div&gt;
+</code></pre>
+<p>will turn into:</p>
+<pre><code>&lt;pre&gt;&lt;code&gt;&amp;lt;div class=&quot;footer&quot;&amp;gt;
+ &amp;amp;copy; 2004 Foo Corporation
+&amp;lt;/div&amp;gt;
+&lt;/code&gt;&lt;/pre&gt;
+</code></pre>
+<p>Regular Markdown syntax is not processed within code blocks. E.g.,
+asterisks are just literal asterisks within a code block. This means
+it's also easy to use Markdown to write about Markdown's own syntax.</p>
+<h3 id="hr">Horizontal Rules</h3>
+<p>You can produce a horizontal rule tag (<code>&lt;hr /&gt;</code>) by placing three or
+more hyphens, asterisks, or underscores on a line by themselves. If you
+wish, you may use spaces between the hyphens or asterisks. Each of the
+following lines will produce a horizontal rule:</p>
+<pre><code>* * *
+
+***
+
+*****
+
+- - -
+
+---------------------------------------
+
+_ _ _
+</code></pre>
+<hr />
+<h2 id="span">Span Elements</h2>
+<h3 id="link">Links</h3>
+<p>Markdown supports two style of links: <em>inline</em> and <em>reference</em>.</p>
+<p>In both styles, the link text is delimited by [square brackets].</p>
+<p>To create an inline link, use a set of regular parentheses immediately
+after the link text's closing square bracket. Inside the parentheses,
+put the URL where you want the link to point, along with an <em>optional</em>
+title for the link, surrounded in quotes. For example:</p>
+<pre><code>This is [an example](http://example.com/ &quot;Title&quot;) inline link.
+
+[This link](http://example.net/) has no title attribute.
+</code></pre>
+<p>Will produce:</p>
+<pre><code>&lt;p&gt;This is &lt;a href=&quot;http://example.com/&quot; title=&quot;Title&quot;&gt;
+an example&lt;/a&gt; inline link.&lt;/p&gt;
+
+&lt;p&gt;&lt;a href=&quot;http://example.net/&quot;&gt;This link&lt;/a&gt; has no
+title attribute.&lt;/p&gt;
+</code></pre>
+<p>If you're referring to a local resource on the same server, you can
+use relative paths:</p>
+<pre><code>See my [About](/about/) page for details.
+</code></pre>
+<p>Reference-style links use a second set of square brackets, inside
+which you place a label of your choosing to identify the link:</p>
+<pre><code>This is [an example][id] reference-style link.
+</code></pre>
+<p>You can optionally use a space to separate the sets of brackets:</p>
+<pre><code>This is [an example] [id] reference-style link.
+</code></pre>
+<p>Then, anywhere in the document, you define your link label like this,
+on a line by itself:</p>
+<pre><code>[id]: http://example.com/ &quot;Optional Title Here&quot;
+</code></pre>
+<p>That is:</p>
+<ul>
+<li>Square brackets containing the link identifier (optionally
+indented from the left margin using up to three spaces);</li>
+<li>followed by a colon;</li>
+<li>followed by one or more spaces (or tabs);</li>
+<li>followed by the URL for the link;</li>
+<li>optionally followed by a title attribute for the link, enclosed
+in double or single quotes.</li>
+</ul>
+<p>The link URL may, optionally, be surrounded by angle brackets:</p>
+<pre><code>[id]: &lt;http://example.com/&gt; &quot;Optional Title Here&quot;
+</code></pre>
+<p>You can put the title attribute on the next line and use extra spaces
+or tabs for padding, which tends to look better with longer URLs:</p>
+<pre><code>[id]: http://example.com/longish/path/to/resource/here
+ &quot;Optional Title Here&quot;
+</code></pre>
+<p>Link definitions are only used for creating links during Markdown
+processing, and are stripped from your document in the HTML output.</p>
+<p>Link definition names may constist of letters, numbers, spaces, and punctuation -- but they are <em>not</em> case sensitive. E.g. these two links:</p>
+<pre><code>[link text][a]
+[link text][A]
+</code></pre>
+<p>are equivalent.</p>
+<p>The <em>implicit link name</em> shortcut allows you to omit the name of the
+link, in which case the link text itself is used as the name.
+Just use an empty set of square brackets -- e.g., to link the word
+&quot;Google&quot; to the google.com web site, you could simply write:</p>
+<pre><code>[Google][]
+</code></pre>
+<p>And then define the link:</p>
+<pre><code>[Google]: http://google.com/
+</code></pre>
+<p>Because link names may contain spaces, this shortcut even works for
+multiple words in the link text:</p>
+<pre><code>Visit [Daring Fireball][] for more information.
+</code></pre>
+<p>And then define the link:</p>
+<pre><code>[Daring Fireball]: http://daringfireball.net/
+</code></pre>
+<p>Link definitions can be placed anywhere in your Markdown document. I
+tend to put them immediately after each paragraph in which they're
+used, but if you want, you can put them all at the end of your
+document, sort of like footnotes.</p>
+<p>Here's an example of reference links in action:</p>
+<pre><code>I get 10 times more traffic from [Google] [1] than from
+[Yahoo] [2] or [MSN] [3].
+
+ [1]: http://google.com/ &quot;Google&quot;
+ [2]: http://search.yahoo.com/ &quot;Yahoo Search&quot;
+ [3]: http://search.msn.com/ &quot;MSN Search&quot;
+</code></pre>
+<p>Using the implicit link name shortcut, you could instead write:</p>
+<pre><code>I get 10 times more traffic from [Google][] than from
+[Yahoo][] or [MSN][].
+
+ [google]: http://google.com/ &quot;Google&quot;
+ [yahoo]: http://search.yahoo.com/ &quot;Yahoo Search&quot;
+ [msn]: http://search.msn.com/ &quot;MSN Search&quot;
+</code></pre>
+<p>Both of the above examples will produce the following HTML output:</p>
+<pre><code>&lt;p&gt;I get 10 times more traffic from &lt;a href=&quot;http://google.com/&quot;
+title=&quot;Google&quot;&gt;Google&lt;/a&gt; than from
+&lt;a href=&quot;http://search.yahoo.com/&quot; title=&quot;Yahoo Search&quot;&gt;Yahoo&lt;/a&gt;
+or &lt;a href=&quot;http://search.msn.com/&quot; title=&quot;MSN Search&quot;&gt;MSN&lt;/a&gt;.&lt;/p&gt;
+</code></pre>
+<p>For comparison, here is the same paragraph written using
+Markdown's inline link style:</p>
+<pre><code>I get 10 times more traffic from [Google](http://google.com/ &quot;Google&quot;)
+than from [Yahoo](http://search.yahoo.com/ &quot;Yahoo Search&quot;) or
+[MSN](http://search.msn.com/ &quot;MSN Search&quot;).
+</code></pre>
+<p>The point of reference-style links is not that they're easier to
+write. The point is that with reference-style links, your document
+source is vastly more readable. Compare the above examples: using
+reference-style links, the paragraph itself is only 81 characters
+long; with inline-style links, it's 176 characters; and as raw HTML,
+it's 234 characters. In the raw HTML, there's more markup than there
+is text.</p>
+<p>With Markdown's reference-style links, a source document much more
+closely resembles the final output, as rendered in a browser. By
+allowing you to move the markup-related metadata out of the paragraph,
+you can add links without interrupting the narrative flow of your
+prose.</p>
+<h3 id="em">Emphasis</h3>
+<p>Markdown treats asterisks (<code>*</code>) and underscores (<code>_</code>) as indicators of
+emphasis. Text wrapped with one <code>*</code> or <code>_</code> will be wrapped with an
+HTML <code>&lt;em&gt;</code> tag; double <code>*</code>'s or <code>_</code>'s will be wrapped with an HTML
+<code>&lt;strong&gt;</code> tag. E.g., this input:</p>
+<pre><code>*single asterisks*
+
+_single underscores_
+
+**double asterisks**
+
+__double underscores__
+</code></pre>
+<p>will produce:</p>
+<pre><code>&lt;em&gt;single asterisks&lt;/em&gt;
+
+&lt;em&gt;single underscores&lt;/em&gt;
+
+&lt;strong&gt;double asterisks&lt;/strong&gt;
+
+&lt;strong&gt;double underscores&lt;/strong&gt;
+</code></pre>
+<p>You can use whichever style you prefer; the lone restriction is that
+the same character must be used to open and close an emphasis span.</p>
+<p>Emphasis can be used in the middle of a word:</p>
+<pre><code>un*fucking*believable
+</code></pre>
+<p>But if you surround an <code>*</code> or <code>_</code> with spaces, it'll be treated as a
+literal asterisk or underscore.</p>
+<p>To produce a literal asterisk or underscore at a position where it
+would otherwise be used as an emphasis delimiter, you can backslash
+escape it:</p>
+<pre><code>\*this text is surrounded by literal asterisks\*
+</code></pre>
+<h3 id="code">Code</h3>
+<p>To indicate a span of code, wrap it with backtick quotes (<code>`</code>).
+Unlike a pre-formatted code block, a code span indicates code within a
+normal paragraph. For example:</p>
+<pre><code>Use the `printf()` function.
+</code></pre>
+<p>will produce:</p>
+<pre><code>&lt;p&gt;Use the &lt;code&gt;printf()&lt;/code&gt; function.&lt;/p&gt;
+</code></pre>
+<p>To include a literal backtick character within a code span, you can use
+multiple backticks as the opening and closing delimiters:</p>
+<pre><code>``There is a literal backtick (`) here.``
+</code></pre>
+<p>which will produce this:</p>
+<pre><code>&lt;p&gt;&lt;code&gt;There is a literal backtick (`) here.&lt;/code&gt;&lt;/p&gt;
+</code></pre>
+<p>The backtick delimiters surrounding a code span may include spaces --
+one after the opening, one before the closing. This allows you to place
+literal backtick characters at the beginning or end of a code span:</p>
+<pre><code>A single backtick in a code span: `` ` ``
+
+A backtick-delimited string in a code span: `` `foo` ``
+</code></pre>
+<p>will produce:</p>
+<pre><code>&lt;p&gt;A single backtick in a code span: &lt;code&gt;`&lt;/code&gt;&lt;/p&gt;
+
+&lt;p&gt;A backtick-delimited string in a code span: &lt;code&gt;`foo`&lt;/code&gt;&lt;/p&gt;
+</code></pre>
+<p>With a code span, ampersands and angle brackets are encoded as HTML
+entities automatically, which makes it easy to include example HTML
+tags. Markdown will turn this:</p>
+<pre><code>Please don't use any `&lt;blink&gt;` tags.
+</code></pre>
+<p>into:</p>
+<pre><code>&lt;p&gt;Please don't use any &lt;code&gt;&amp;lt;blink&amp;gt;&lt;/code&gt; tags.&lt;/p&gt;
+</code></pre>
+<p>You can write this:</p>
+<pre><code>`&amp;#8212;` is the decimal-encoded equivalent of `&amp;mdash;`.
+</code></pre>
+<p>to produce:</p>
+<pre><code>&lt;p&gt;&lt;code&gt;&amp;amp;#8212;&lt;/code&gt; is the decimal-encoded
+equivalent of &lt;code&gt;&amp;amp;mdash;&lt;/code&gt;.&lt;/p&gt;
+</code></pre>
+<h3 id="img">Images</h3>
+<p>Admittedly, it's fairly difficult to devise a &quot;natural&quot; syntax for
+placing images into a plain text document format.</p>
+<p>Markdown uses an image syntax that is intended to resemble the syntax
+for links, allowing for two styles: <em>inline</em> and <em>reference</em>.</p>
+<p>Inline image syntax looks like this:</p>
+<pre><code>![Alt text](/path/to/img.jpg)
+
+![Alt text](/path/to/img.jpg &quot;Optional title&quot;)
+</code></pre>
+<p>That is:</p>
+<ul>
+<li>An exclamation mark: <code>!</code>;</li>
+<li>followed by a set of square brackets, containing the <code>alt</code>
+attribute text for the image;</li>
+<li>followed by a set of parentheses, containing the URL or path to
+the image, and an optional <code>title</code> attribute enclosed in double
+or single quotes.</li>
+</ul>
+<p>Reference-style image syntax looks like this:</p>
+<pre><code>![Alt text][id]
+</code></pre>
+<p>Where &quot;id&quot; is the name of a defined image reference. Image references
+are defined using syntax identical to link references:</p>
+<pre><code>[id]: url/to/image &quot;Optional title attribute&quot;
+</code></pre>
+<p>As of this writing, Markdown has no syntax for specifying the
+dimensions of an image; if this is important to you, you can simply
+use regular HTML <code>&lt;img&gt;</code> tags.</p>
+<hr />
+<h2 id="misc">Miscellaneous</h2>
+<h3 id="autolink">Automatic Links</h3>
+<p>Markdown supports a shortcut style for creating &quot;automatic&quot; links for URLs and email addresses: simply surround the URL or email address with angle brackets. What this means is that if you want to show the actual text of a URL or email address, and also have it be a clickable link, you can do this:</p>
+<pre><code>&lt;http://example.com/&gt;
+</code></pre>
+<p>Markdown will turn this into:</p>
+<pre><code>&lt;a href=&quot;http://example.com/&quot;&gt;http://example.com/&lt;/a&gt;
+</code></pre>
+<p>Automatic links for email addresses work similarly, except that
+Markdown will also perform a bit of randomized decimal and hex
+entity-encoding to help obscure your address from address-harvesting
+spambots. For example, Markdown will turn this:</p>
+<pre><code>&lt;address@example.com&gt;
+</code></pre>
+<p>into something like this:</p>
+<pre><code>&lt;a href=&quot;&amp;#x6D;&amp;#x61;i&amp;#x6C;&amp;#x74;&amp;#x6F;:&amp;#x61;&amp;#x64;&amp;#x64;&amp;#x72;&amp;#x65;
+&amp;#115;&amp;#115;&amp;#64;&amp;#101;&amp;#120;&amp;#x61;&amp;#109;&amp;#x70;&amp;#x6C;e&amp;#x2E;&amp;#99;&amp;#111;
+&amp;#109;&quot;&gt;&amp;#x61;&amp;#x64;&amp;#x64;&amp;#x72;&amp;#x65;&amp;#115;&amp;#115;&amp;#64;&amp;#101;&amp;#120;&amp;#x61;
+&amp;#109;&amp;#x70;&amp;#x6C;e&amp;#x2E;&amp;#99;&amp;#111;&amp;#109;&lt;/a&gt;
+</code></pre>
+<p>which will render in a browser as a clickable link to &quot;address@example.com&quot;.</p>
+<p>(This sort of entity-encoding trick will indeed fool many, if not
+most, address-harvesting bots, but it definitely won't fool all of
+them. It's better than nothing, but an address published in this way
+will probably eventually start receiving spam.)</p>
+<h3 id="backslash">Backslash Escapes</h3>
+<p>Markdown allows you to use backslash escapes to generate literal
+characters which would otherwise have special meaning in Markdown's
+formatting syntax. For example, if you wanted to surround a word with
+literal asterisks (instead of an HTML <code>&lt;em&gt;</code> tag), you can backslashes
+before the asterisks, like this:</p>
+<pre><code>\*literal asterisks\*
+</code></pre>
+<p>Markdown provides backslash escapes for the following characters:</p>
+<pre><code>\ backslash
+` backtick
+* asterisk
+_ underscore
+{} curly braces
+[] square brackets
+() parentheses
+# hash mark
++ plus sign
+- minus sign (hyphen)
+. dot
+! exclamation mark
+</code></pre>
diff --git a/oldtests/Original/Markdown_Documentation_Syntax.markdown b/oldtests/Original/Markdown_Documentation_Syntax.markdown
new file mode 100644
index 0000000..57360a1
--- /dev/null
+++ b/oldtests/Original/Markdown_Documentation_Syntax.markdown
@@ -0,0 +1,888 @@
+Markdown: Syntax
+================
+
+<ul id="ProjectSubmenu">
+ <li><a href="/projects/markdown/" title="Markdown Project Page">Main</a></li>
+ <li><a href="/projects/markdown/basics" title="Markdown Basics">Basics</a></li>
+ <li><a class="selected" title="Markdown Syntax Documentation">Syntax</a></li>
+ <li><a href="/projects/markdown/license" title="Pricing and License Information">License</a></li>
+ <li><a href="/projects/markdown/dingus" title="Online Markdown Web Form">Dingus</a></li>
+</ul>
+
+
+* [Overview](#overview)
+ * [Philosophy](#philosophy)
+ * [Inline HTML](#html)
+ * [Automatic Escaping for Special Characters](#autoescape)
+* [Block Elements](#block)
+ * [Paragraphs and Line Breaks](#p)
+ * [Headers](#header)
+ * [Blockquotes](#blockquote)
+ * [Lists](#list)
+ * [Code Blocks](#precode)
+ * [Horizontal Rules](#hr)
+* [Span Elements](#span)
+ * [Links](#link)
+ * [Emphasis](#em)
+ * [Code](#code)
+ * [Images](#img)
+* [Miscellaneous](#misc)
+ * [Backslash Escapes](#backslash)
+ * [Automatic Links](#autolink)
+
+
+**Note:** This document is itself written using Markdown; you
+can [see the source for it by adding '.text' to the URL][src].
+
+ [src]: /projects/markdown/syntax.text
+
+* * *
+
+<h2 id="overview">Overview</h2>
+
+<h3 id="philosophy">Philosophy</h3>
+
+Markdown is intended to be as easy-to-read and easy-to-write as is feasible.
+
+Readability, however, is emphasized above all else. A Markdown-formatted
+document should be publishable as-is, as plain text, without looking
+like it's been marked up with tags or formatting instructions. While
+Markdown's syntax has been influenced by several existing text-to-HTML
+filters -- including [Setext] [1], [atx] [2], [Textile] [3], [reStructuredText] [4],
+[Grutatext] [5], and [EtText] [6] -- the single biggest source of
+inspiration for Markdown's syntax is the format of plain text email.
+
+ [1]: http://docutils.sourceforge.net/mirror/setext.html
+ [2]: http://www.aaronsw.com/2002/atx/
+ [3]: http://textism.com/tools/textile/
+ [4]: http://docutils.sourceforge.net/rst.html
+ [5]: http://www.triptico.com/software/grutatxt.html
+ [6]: http://ettext.taint.org/doc/
+
+To this end, Markdown's syntax is comprised entirely of punctuation
+characters, which punctuation characters have been carefully chosen so
+as to look like what they mean. E.g., asterisks around a word actually
+look like \*emphasis\*. Markdown lists look like, well, lists. Even
+blockquotes look like quoted passages of text, assuming you've ever
+used email.
+
+
+
+<h3 id="html">Inline HTML</h3>
+
+Markdown's syntax is intended for one purpose: to be used as a
+format for *writing* for the web.
+
+Markdown is not a replacement for HTML, or even close to it. Its
+syntax is very small, corresponding only to a very small subset of
+HTML tags. The idea is *not* to create a syntax that makes it easier
+to insert HTML tags. In my opinion, HTML tags are already easy to
+insert. The idea for Markdown is to make it easy to read, write, and
+edit prose. HTML is a *publishing* format; Markdown is a *writing*
+format. Thus, Markdown's formatting syntax only addresses issues that
+can be conveyed in plain text.
+
+For any markup that is not covered by Markdown's syntax, you simply
+use HTML itself. There's no need to preface it or delimit it to
+indicate that you're switching from Markdown to HTML; you just use
+the tags.
+
+The only restrictions are that block-level HTML elements -- e.g. `<div>`,
+`<table>`, `<pre>`, `<p>`, etc. -- must be separated from surrounding
+content by blank lines, and the start and end tags of the block should
+not be indented with tabs or spaces. Markdown is smart enough not
+to add extra (unwanted) `<p>` tags around HTML block-level tags.
+
+For example, to add an HTML table to a Markdown article:
+
+ This is a regular paragraph.
+
+ <table>
+ <tr>
+ <td>Foo</td>
+ </tr>
+ </table>
+
+ This is another regular paragraph.
+
+Note that Markdown formatting syntax is not processed within block-level
+HTML tags. E.g., you can't use Markdown-style `*emphasis*` inside an
+HTML block.
+
+Span-level HTML tags -- e.g. `<span>`, `<cite>`, or `<del>` -- can be
+used anywhere in a Markdown paragraph, list item, or header. If you
+want, you can even use HTML tags instead of Markdown formatting; e.g. if
+you'd prefer to use HTML `<a>` or `<img>` tags instead of Markdown's
+link or image syntax, go right ahead.
+
+Unlike block-level HTML tags, Markdown syntax *is* processed within
+span-level tags.
+
+
+<h3 id="autoescape">Automatic Escaping for Special Characters</h3>
+
+In HTML, there are two characters that demand special treatment: `<`
+and `&`. Left angle brackets are used to start tags; ampersands are
+used to denote HTML entities. If you want to use them as literal
+characters, you must escape them as entities, e.g. `&lt;`, and
+`&amp;`.
+
+Ampersands in particular are bedeviling for web writers. If you want to
+write about 'AT&T', you need to write '`AT&amp;T`'. You even need to
+escape ampersands within URLs. Thus, if you want to link to:
+
+ http://images.google.com/images?num=30&q=larry+bird
+
+you need to encode the URL as:
+
+ http://images.google.com/images?num=30&amp;q=larry+bird
+
+in your anchor tag `href` attribute. Needless to say, this is easy to
+forget, and is probably the single most common source of HTML validation
+errors in otherwise well-marked-up web sites.
+
+Markdown allows you to use these characters naturally, taking care of
+all the necessary escaping for you. If you use an ampersand as part of
+an HTML entity, it remains unchanged; otherwise it will be translated
+into `&amp;`.
+
+So, if you want to include a copyright symbol in your article, you can write:
+
+ &copy;
+
+and Markdown will leave it alone. But if you write:
+
+ AT&T
+
+Markdown will translate it to:
+
+ AT&amp;T
+
+Similarly, because Markdown supports [inline HTML](#html), if you use
+angle brackets as delimiters for HTML tags, Markdown will treat them as
+such. But if you write:
+
+ 4 < 5
+
+Markdown will translate it to:
+
+ 4 &lt; 5
+
+However, inside Markdown code spans and blocks, angle brackets and
+ampersands are *always* encoded automatically. This makes it easy to use
+Markdown to write about HTML code. (As opposed to raw HTML, which is a
+terrible format for writing about HTML syntax, because every single `<`
+and `&` in your example code needs to be escaped.)
+
+
+* * *
+
+
+<h2 id="block">Block Elements</h2>
+
+
+<h3 id="p">Paragraphs and Line Breaks</h3>
+
+A paragraph is simply one or more consecutive lines of text, separated
+by one or more blank lines. (A blank line is any line that looks like a
+blank line -- a line containing nothing but spaces or tabs is considered
+blank.) Normal paragraphs should not be intended with spaces or tabs.
+
+The implication of the "one or more consecutive lines of text" rule is
+that Markdown supports "hard-wrapped" text paragraphs. This differs
+significantly from most other text-to-HTML formatters (including Movable
+Type's "Convert Line Breaks" option) which translate every line break
+character in a paragraph into a `<br />` tag.
+
+When you *do* want to insert a `<br />` break tag using Markdown, you
+end a line with two or more spaces, then type return.
+
+Yes, this takes a tad more effort to create a `<br />`, but a simplistic
+"every line break is a `<br />`" rule wouldn't work for Markdown.
+Markdown's email-style [blockquoting][bq] and multi-paragraph [list items][l]
+work best -- and look better -- when you format them with hard breaks.
+
+ [bq]: #blockquote
+ [l]: #list
+
+
+
+<h3 id="header">Headers</h3>
+
+Markdown supports two styles of headers, [Setext] [1] and [atx] [2].
+
+Setext-style headers are "underlined" using equal signs (for first-level
+headers) and dashes (for second-level headers). For example:
+
+ This is an H1
+ =============
+
+ This is an H2
+ -------------
+
+Any number of underlining `=`'s or `-`'s will work.
+
+Atx-style headers use 1-6 hash characters at the start of the line,
+corresponding to header levels 1-6. For example:
+
+ # This is an H1
+
+ ## This is an H2
+
+ ###### This is an H6
+
+Optionally, you may "close" atx-style headers. This is purely
+cosmetic -- you can use this if you think it looks better. The
+closing hashes don't even need to match the number of hashes
+used to open the header. (The number of opening hashes
+determines the header level.) :
+
+ # This is an H1 #
+
+ ## This is an H2 ##
+
+ ### This is an H3 ######
+
+
+<h3 id="blockquote">Blockquotes</h3>
+
+Markdown uses email-style `>` characters for blockquoting. If you're
+familiar with quoting passages of text in an email message, then you
+know how to create a blockquote in Markdown. It looks best if you hard
+wrap the text and put a `>` before every line:
+
+ > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
+ > consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
+ > Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
+ >
+ > Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
+ > id sem consectetuer libero luctus adipiscing.
+
+Markdown allows you to be lazy and only put the `>` before the first
+line of a hard-wrapped paragraph:
+
+ > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
+ consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
+ Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
+
+ > Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
+ id sem consectetuer libero luctus adipiscing.
+
+Blockquotes can be nested (i.e. a blockquote-in-a-blockquote) by
+adding additional levels of `>`:
+
+ > This is the first level of quoting.
+ >
+ > > This is nested blockquote.
+ >
+ > Back to the first level.
+
+Blockquotes can contain other Markdown elements, including headers, lists,
+and code blocks:
+
+ > ## This is a header.
+ >
+ > 1. This is the first list item.
+ > 2. This is the second list item.
+ >
+ > Here's some example code:
+ >
+ > return shell_exec("echo $input | $markdown_script");
+
+Any decent text editor should make email-style quoting easy. For
+example, with BBEdit, you can make a selection and choose Increase
+Quote Level from the Text menu.
+
+
+<h3 id="list">Lists</h3>
+
+Markdown supports ordered (numbered) and unordered (bulleted) lists.
+
+Unordered lists use asterisks, pluses, and hyphens -- interchangably
+-- as list markers:
+
+ * Red
+ * Green
+ * Blue
+
+is equivalent to:
+
+ + Red
+ + Green
+ + Blue
+
+and:
+
+ - Red
+ - Green
+ - Blue
+
+Ordered lists use numbers followed by periods:
+
+ 1. Bird
+ 2. McHale
+ 3. Parish
+
+It's important to note that the actual numbers you use to mark the
+list have no effect on the HTML output Markdown produces. The HTML
+Markdown produces from the above list is:
+
+ <ol>
+ <li>Bird</li>
+ <li>McHale</li>
+ <li>Parish</li>
+ </ol>
+
+If you instead wrote the list in Markdown like this:
+
+ 1. Bird
+ 1. McHale
+ 1. Parish
+
+or even:
+
+ 3. Bird
+ 1. McHale
+ 8. Parish
+
+you'd get the exact same HTML output. The point is, if you want to,
+you can use ordinal numbers in your ordered Markdown lists, so that
+the numbers in your source match the numbers in your published HTML.
+But if you want to be lazy, you don't have to.
+
+If you do use lazy list numbering, however, you should still start the
+list with the number 1. At some point in the future, Markdown may support
+starting ordered lists at an arbitrary number.
+
+List markers typically start at the left margin, but may be indented by
+up to three spaces. List markers must be followed by one or more spaces
+or a tab.
+
+To make lists look nice, you can wrap items with hanging indents:
+
+ * Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
+ Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
+ viverra nec, fringilla in, laoreet vitae, risus.
+ * Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
+ Suspendisse id sem consectetuer libero luctus adipiscing.
+
+But if you want to be lazy, you don't have to:
+
+ * Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
+ Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
+ viverra nec, fringilla in, laoreet vitae, risus.
+ * Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
+ Suspendisse id sem consectetuer libero luctus adipiscing.
+
+If list items are separated by blank lines, Markdown will wrap the
+items in `<p>` tags in the HTML output. For example, this input:
+
+ * Bird
+ * Magic
+
+will turn into:
+
+ <ul>
+ <li>Bird</li>
+ <li>Magic</li>
+ </ul>
+
+But this:
+
+ * Bird
+
+ * Magic
+
+will turn into:
+
+ <ul>
+ <li><p>Bird</p></li>
+ <li><p>Magic</p></li>
+ </ul>
+
+List items may consist of multiple paragraphs. Each subsequent
+paragraph in a list item must be intended by either 4 spaces
+or one tab:
+
+ 1. This is a list item with two paragraphs. Lorem ipsum dolor
+ sit amet, consectetuer adipiscing elit. Aliquam hendrerit
+ mi posuere lectus.
+
+ Vestibulum enim wisi, viverra nec, fringilla in, laoreet
+ vitae, risus. Donec sit amet nisl. Aliquam semper ipsum
+ sit amet velit.
+
+ 2. Suspendisse id sem consectetuer libero luctus adipiscing.
+
+It looks nice if you indent every line of the subsequent
+paragraphs, but here again, Markdown will allow you to be
+lazy:
+
+ * This is a list item with two paragraphs.
+
+ This is the second paragraph in the list item. You're
+ only required to indent the first line. Lorem ipsum dolor
+ sit amet, consectetuer adipiscing elit.
+
+ * Another item in the same list.
+
+To put a blockquote within a list item, the blockquote's `>`
+delimiters need to be indented:
+
+ * A list item with a blockquote:
+
+ > This is a blockquote
+ > inside a list item.
+
+To put a code block within a list item, the code block needs
+to be indented *twice* -- 8 spaces or two tabs:
+
+ * A list item with a code block:
+
+ <code goes here>
+
+
+It's worth noting that it's possible to trigger an ordered list by
+accident, by writing something like this:
+
+ 1986. What a great season.
+
+In other words, a *number-period-space* sequence at the beginning of a
+line. To avoid this, you can backslash-escape the period:
+
+ 1986\. What a great season.
+
+
+
+<h3 id="precode">Code Blocks</h3>
+
+Pre-formatted code blocks are used for writing about programming or
+markup source code. Rather than forming normal paragraphs, the lines
+of a code block are interpreted literally. Markdown wraps a code block
+in both `<pre>` and `<code>` tags.
+
+To produce a code block in Markdown, simply indent every line of the
+block by at least 4 spaces or 1 tab. For example, given this input:
+
+ This is a normal paragraph:
+
+ This is a code block.
+
+Markdown will generate:
+
+ <p>This is a normal paragraph:</p>
+
+ <pre><code>This is a code block.
+ </code></pre>
+
+One level of indentation -- 4 spaces or 1 tab -- is removed from each
+line of the code block. For example, this:
+
+ Here is an example of AppleScript:
+
+ tell application "Foo"
+ beep
+ end tell
+
+will turn into:
+
+ <p>Here is an example of AppleScript:</p>
+
+ <pre><code>tell application "Foo"
+ beep
+ end tell
+ </code></pre>
+
+A code block continues until it reaches a line that is not indented
+(or the end of the article).
+
+Within a code block, ampersands (`&`) and angle brackets (`<` and `>`)
+are automatically converted into HTML entities. This makes it very
+easy to include example HTML source code using Markdown -- just paste
+it and indent it, and Markdown will handle the hassle of encoding the
+ampersands and angle brackets. For example, this:
+
+ <div class="footer">
+ &copy; 2004 Foo Corporation
+ </div>
+
+will turn into:
+
+ <pre><code>&lt;div class="footer"&gt;
+ &amp;copy; 2004 Foo Corporation
+ &lt;/div&gt;
+ </code></pre>
+
+Regular Markdown syntax is not processed within code blocks. E.g.,
+asterisks are just literal asterisks within a code block. This means
+it's also easy to use Markdown to write about Markdown's own syntax.
+
+
+
+<h3 id="hr">Horizontal Rules</h3>
+
+You can produce a horizontal rule tag (`<hr />`) by placing three or
+more hyphens, asterisks, or underscores on a line by themselves. If you
+wish, you may use spaces between the hyphens or asterisks. Each of the
+following lines will produce a horizontal rule:
+
+ * * *
+
+ ***
+
+ *****
+
+ - - -
+
+ ---------------------------------------
+
+ _ _ _
+
+
+* * *
+
+<h2 id="span">Span Elements</h2>
+
+<h3 id="link">Links</h3>
+
+Markdown supports two style of links: *inline* and *reference*.
+
+In both styles, the link text is delimited by [square brackets].
+
+To create an inline link, use a set of regular parentheses immediately
+after the link text's closing square bracket. Inside the parentheses,
+put the URL where you want the link to point, along with an *optional*
+title for the link, surrounded in quotes. For example:
+
+ This is [an example](http://example.com/ "Title") inline link.
+
+ [This link](http://example.net/) has no title attribute.
+
+Will produce:
+
+ <p>This is <a href="http://example.com/" title="Title">
+ an example</a> inline link.</p>
+
+ <p><a href="http://example.net/">This link</a> has no
+ title attribute.</p>
+
+If you're referring to a local resource on the same server, you can
+use relative paths:
+
+ See my [About](/about/) page for details.
+
+Reference-style links use a second set of square brackets, inside
+which you place a label of your choosing to identify the link:
+
+ This is [an example][id] reference-style link.
+
+You can optionally use a space to separate the sets of brackets:
+
+ This is [an example] [id] reference-style link.
+
+Then, anywhere in the document, you define your link label like this,
+on a line by itself:
+
+ [id]: http://example.com/ "Optional Title Here"
+
+That is:
+
+* Square brackets containing the link identifier (optionally
+ indented from the left margin using up to three spaces);
+* followed by a colon;
+* followed by one or more spaces (or tabs);
+* followed by the URL for the link;
+* optionally followed by a title attribute for the link, enclosed
+ in double or single quotes.
+
+The link URL may, optionally, be surrounded by angle brackets:
+
+ [id]: <http://example.com/> "Optional Title Here"
+
+You can put the title attribute on the next line and use extra spaces
+or tabs for padding, which tends to look better with longer URLs:
+
+ [id]: http://example.com/longish/path/to/resource/here
+ "Optional Title Here"
+
+Link definitions are only used for creating links during Markdown
+processing, and are stripped from your document in the HTML output.
+
+Link definition names may constist of letters, numbers, spaces, and punctuation -- but they are *not* case sensitive. E.g. these two links:
+
+ [link text][a]
+ [link text][A]
+
+are equivalent.
+
+The *implicit link name* shortcut allows you to omit the name of the
+link, in which case the link text itself is used as the name.
+Just use an empty set of square brackets -- e.g., to link the word
+"Google" to the google.com web site, you could simply write:
+
+ [Google][]
+
+And then define the link:
+
+ [Google]: http://google.com/
+
+Because link names may contain spaces, this shortcut even works for
+multiple words in the link text:
+
+ Visit [Daring Fireball][] for more information.
+
+And then define the link:
+
+ [Daring Fireball]: http://daringfireball.net/
+
+Link definitions can be placed anywhere in your Markdown document. I
+tend to put them immediately after each paragraph in which they're
+used, but if you want, you can put them all at the end of your
+document, sort of like footnotes.
+
+Here's an example of reference links in action:
+
+ I get 10 times more traffic from [Google] [1] than from
+ [Yahoo] [2] or [MSN] [3].
+
+ [1]: http://google.com/ "Google"
+ [2]: http://search.yahoo.com/ "Yahoo Search"
+ [3]: http://search.msn.com/ "MSN Search"
+
+Using the implicit link name shortcut, you could instead write:
+
+ I get 10 times more traffic from [Google][] than from
+ [Yahoo][] or [MSN][].
+
+ [google]: http://google.com/ "Google"
+ [yahoo]: http://search.yahoo.com/ "Yahoo Search"
+ [msn]: http://search.msn.com/ "MSN Search"
+
+Both of the above examples will produce the following HTML output:
+
+ <p>I get 10 times more traffic from <a href="http://google.com/"
+ title="Google">Google</a> than from
+ <a href="http://search.yahoo.com/" title="Yahoo Search">Yahoo</a>
+ or <a href="http://search.msn.com/" title="MSN Search">MSN</a>.</p>
+
+For comparison, here is the same paragraph written using
+Markdown's inline link style:
+
+ I get 10 times more traffic from [Google](http://google.com/ "Google")
+ than from [Yahoo](http://search.yahoo.com/ "Yahoo Search") or
+ [MSN](http://search.msn.com/ "MSN Search").
+
+The point of reference-style links is not that they're easier to
+write. The point is that with reference-style links, your document
+source is vastly more readable. Compare the above examples: using
+reference-style links, the paragraph itself is only 81 characters
+long; with inline-style links, it's 176 characters; and as raw HTML,
+it's 234 characters. In the raw HTML, there's more markup than there
+is text.
+
+With Markdown's reference-style links, a source document much more
+closely resembles the final output, as rendered in a browser. By
+allowing you to move the markup-related metadata out of the paragraph,
+you can add links without interrupting the narrative flow of your
+prose.
+
+
+<h3 id="em">Emphasis</h3>
+
+Markdown treats asterisks (`*`) and underscores (`_`) as indicators of
+emphasis. Text wrapped with one `*` or `_` will be wrapped with an
+HTML `<em>` tag; double `*`'s or `_`'s will be wrapped with an HTML
+`<strong>` tag. E.g., this input:
+
+ *single asterisks*
+
+ _single underscores_
+
+ **double asterisks**
+
+ __double underscores__
+
+will produce:
+
+ <em>single asterisks</em>
+
+ <em>single underscores</em>
+
+ <strong>double asterisks</strong>
+
+ <strong>double underscores</strong>
+
+You can use whichever style you prefer; the lone restriction is that
+the same character must be used to open and close an emphasis span.
+
+Emphasis can be used in the middle of a word:
+
+ un*fucking*believable
+
+But if you surround an `*` or `_` with spaces, it'll be treated as a
+literal asterisk or underscore.
+
+To produce a literal asterisk or underscore at a position where it
+would otherwise be used as an emphasis delimiter, you can backslash
+escape it:
+
+ \*this text is surrounded by literal asterisks\*
+
+
+
+<h3 id="code">Code</h3>
+
+To indicate a span of code, wrap it with backtick quotes (`` ` ``).
+Unlike a pre-formatted code block, a code span indicates code within a
+normal paragraph. For example:
+
+ Use the `printf()` function.
+
+will produce:
+
+ <p>Use the <code>printf()</code> function.</p>
+
+To include a literal backtick character within a code span, you can use
+multiple backticks as the opening and closing delimiters:
+
+ ``There is a literal backtick (`) here.``
+
+which will produce this:
+
+ <p><code>There is a literal backtick (`) here.</code></p>
+
+The backtick delimiters surrounding a code span may include spaces --
+one after the opening, one before the closing. This allows you to place
+literal backtick characters at the beginning or end of a code span:
+
+ A single backtick in a code span: `` ` ``
+
+ A backtick-delimited string in a code span: `` `foo` ``
+
+will produce:
+
+ <p>A single backtick in a code span: <code>`</code></p>
+
+ <p>A backtick-delimited string in a code span: <code>`foo`</code></p>
+
+With a code span, ampersands and angle brackets are encoded as HTML
+entities automatically, which makes it easy to include example HTML
+tags. Markdown will turn this:
+
+ Please don't use any `<blink>` tags.
+
+into:
+
+ <p>Please don't use any <code>&lt;blink&gt;</code> tags.</p>
+
+You can write this:
+
+ `&#8212;` is the decimal-encoded equivalent of `&mdash;`.
+
+to produce:
+
+ <p><code>&amp;#8212;</code> is the decimal-encoded
+ equivalent of <code>&amp;mdash;</code>.</p>
+
+
+
+<h3 id="img">Images</h3>
+
+Admittedly, it's fairly difficult to devise a "natural" syntax for
+placing images into a plain text document format.
+
+Markdown uses an image syntax that is intended to resemble the syntax
+for links, allowing for two styles: *inline* and *reference*.
+
+Inline image syntax looks like this:
+
+ ![Alt text](/path/to/img.jpg)
+
+ ![Alt text](/path/to/img.jpg "Optional title")
+
+That is:
+
+* An exclamation mark: `!`;
+* followed by a set of square brackets, containing the `alt`
+ attribute text for the image;
+* followed by a set of parentheses, containing the URL or path to
+ the image, and an optional `title` attribute enclosed in double
+ or single quotes.
+
+Reference-style image syntax looks like this:
+
+ ![Alt text][id]
+
+Where "id" is the name of a defined image reference. Image references
+are defined using syntax identical to link references:
+
+ [id]: url/to/image "Optional title attribute"
+
+As of this writing, Markdown has no syntax for specifying the
+dimensions of an image; if this is important to you, you can simply
+use regular HTML `<img>` tags.
+
+
+* * *
+
+
+<h2 id="misc">Miscellaneous</h2>
+
+<h3 id="autolink">Automatic Links</h3>
+
+Markdown supports a shortcut style for creating "automatic" links for URLs and email addresses: simply surround the URL or email address with angle brackets. What this means is that if you want to show the actual text of a URL or email address, and also have it be a clickable link, you can do this:
+
+ <http://example.com/>
+
+Markdown will turn this into:
+
+ <a href="http://example.com/">http://example.com/</a>
+
+Automatic links for email addresses work similarly, except that
+Markdown will also perform a bit of randomized decimal and hex
+entity-encoding to help obscure your address from address-harvesting
+spambots. For example, Markdown will turn this:
+
+ <address@example.com>
+
+into something like this:
+
+ <a href="&#x6D;&#x61;i&#x6C;&#x74;&#x6F;:&#x61;&#x64;&#x64;&#x72;&#x65;
+ &#115;&#115;&#64;&#101;&#120;&#x61;&#109;&#x70;&#x6C;e&#x2E;&#99;&#111;
+ &#109;">&#x61;&#x64;&#x64;&#x72;&#x65;&#115;&#115;&#64;&#101;&#120;&#x61;
+ &#109;&#x70;&#x6C;e&#x2E;&#99;&#111;&#109;</a>
+
+which will render in a browser as a clickable link to "address@example.com".
+
+(This sort of entity-encoding trick will indeed fool many, if not
+most, address-harvesting bots, but it definitely won't fool all of
+them. It's better than nothing, but an address published in this way
+will probably eventually start receiving spam.)
+
+
+
+<h3 id="backslash">Backslash Escapes</h3>
+
+Markdown allows you to use backslash escapes to generate literal
+characters which would otherwise have special meaning in Markdown's
+formatting syntax. For example, if you wanted to surround a word with
+literal asterisks (instead of an HTML `<em>` tag), you can backslashes
+before the asterisks, like this:
+
+ \*literal asterisks\*
+
+Markdown provides backslash escapes for the following characters:
+
+ \ backslash
+ ` backtick
+ * asterisk
+ _ underscore
+ {} curly braces
+ [] square brackets
+ () parentheses
+ # hash mark
+ + plus sign
+ - minus sign (hyphen)
+ . dot
+ ! exclamation mark
+
diff --git a/oldtests/Original/Nested_blockquotes.html b/oldtests/Original/Nested_blockquotes.html
new file mode 100644
index 0000000..02efc59
--- /dev/null
+++ b/oldtests/Original/Nested_blockquotes.html
@@ -0,0 +1,7 @@
+<blockquote>
+<p>foo</p>
+<blockquote>
+<p>bar</p>
+</blockquote>
+<p>foo</p>
+</blockquote>
diff --git a/oldtests/Original/Nested_blockquotes.markdown b/oldtests/Original/Nested_blockquotes.markdown
new file mode 100644
index 0000000..ed3c624
--- /dev/null
+++ b/oldtests/Original/Nested_blockquotes.markdown
@@ -0,0 +1,5 @@
+> foo
+>
+> > bar
+>
+> foo
diff --git a/oldtests/Original/Ordered_and_unordered_lists.html b/oldtests/Original/Ordered_and_unordered_lists.html
new file mode 100644
index 0000000..78d752e
--- /dev/null
+++ b/oldtests/Original/Ordered_and_unordered_lists.html
@@ -0,0 +1,112 @@
+<h2>Unordered</h2>
+<p>Asterisks tight:</p>
+<ul>
+<li>asterisk 1</li>
+<li>asterisk 2</li>
+<li>asterisk 3</li>
+</ul>
+<p>Asterisks loose:</p>
+<ul>
+<li><p>asterisk 1</p></li>
+<li><p>asterisk 2</p></li>
+<li><p>asterisk 3</p></li>
+</ul>
+<hr />
+<p>Pluses tight:</p>
+<ul>
+<li>Plus 1</li>
+<li>Plus 2</li>
+<li>Plus 3</li>
+</ul>
+<p>Pluses loose:</p>
+<ul>
+<li><p>Plus 1</p></li>
+<li><p>Plus 2</p></li>
+<li><p>Plus 3</p></li>
+</ul>
+<hr />
+<p>Minuses tight:</p>
+<ul>
+<li>Minus 1</li>
+<li>Minus 2</li>
+<li>Minus 3</li>
+</ul>
+<p>Minuses loose:</p>
+<ul>
+<li><p>Minus 1</p></li>
+<li><p>Minus 2</p></li>
+<li><p>Minus 3</p></li>
+</ul>
+<h2>Ordered</h2>
+<p>Tight:</p>
+<ol>
+<li>First</li>
+<li>Second</li>
+<li>Third</li>
+</ol>
+<p>and:</p>
+<ol>
+<li>One</li>
+<li>Two</li>
+<li>Three</li>
+</ol>
+<p>Loose using tabs:</p>
+<ol>
+<li><p>First</p></li>
+<li><p>Second</p></li>
+<li><p>Third</p></li>
+</ol>
+<p>and using spaces:</p>
+<ol>
+<li><p>One</p></li>
+<li><p>Two</p></li>
+<li><p>Three</p></li>
+</ol>
+<p>Multiple paragraphs:</p>
+<ol>
+<li><p>Item 1, graf one.</p>
+<p>Item 2. graf two. The quick brown fox jumped over the lazy dog's
+back.</p></li>
+<li><p>Item 2.</p></li>
+<li><p>Item 3.</p></li>
+</ol>
+<h2>Nested</h2>
+<ul>
+<li>Tab
+<ul>
+<li>Tab
+<ul>
+<li>Tab</li>
+</ul></li>
+</ul></li>
+</ul>
+<p>Here's another:</p>
+<ol>
+<li>First</li>
+<li>Second:
+<ul>
+<li>Fee</li>
+<li>Fie</li>
+<li>Foe</li>
+</ul></li>
+<li>Third</li>
+</ol>
+<p>Same thing but with paragraphs:</p>
+<ol>
+<li><p>First</p></li>
+<li><p>Second:</p>
+<ul>
+<li>Fee</li>
+<li>Fie</li>
+<li>Foe</li>
+</ul></li>
+<li><p>Third</p></li>
+</ol>
+<p>This was an error in Markdown 1.0.1:</p>
+<ul>
+<li><p>this</p>
+<ul>
+<li>sub</li>
+</ul>
+<p>that</p></li>
+</ul>
diff --git a/oldtests/Original/Ordered_and_unordered_lists.markdown b/oldtests/Original/Ordered_and_unordered_lists.markdown
new file mode 100644
index 0000000..7f3b497
--- /dev/null
+++ b/oldtests/Original/Ordered_and_unordered_lists.markdown
@@ -0,0 +1,131 @@
+## Unordered
+
+Asterisks tight:
+
+* asterisk 1
+* asterisk 2
+* asterisk 3
+
+
+Asterisks loose:
+
+* asterisk 1
+
+* asterisk 2
+
+* asterisk 3
+
+* * *
+
+Pluses tight:
+
++ Plus 1
++ Plus 2
++ Plus 3
+
+
+Pluses loose:
+
++ Plus 1
+
++ Plus 2
+
++ Plus 3
+
+* * *
+
+
+Minuses tight:
+
+- Minus 1
+- Minus 2
+- Minus 3
+
+
+Minuses loose:
+
+- Minus 1
+
+- Minus 2
+
+- Minus 3
+
+
+## Ordered
+
+Tight:
+
+1. First
+2. Second
+3. Third
+
+and:
+
+1. One
+2. Two
+3. Three
+
+
+Loose using tabs:
+
+1. First
+
+2. Second
+
+3. Third
+
+and using spaces:
+
+1. One
+
+2. Two
+
+3. Three
+
+Multiple paragraphs:
+
+1. Item 1, graf one.
+
+ Item 2. graf two. The quick brown fox jumped over the lazy dog's
+ back.
+
+2. Item 2.
+
+3. Item 3.
+
+
+
+## Nested
+
+* Tab
+ * Tab
+ * Tab
+
+Here's another:
+
+1. First
+2. Second:
+ * Fee
+ * Fie
+ * Foe
+3. Third
+
+Same thing but with paragraphs:
+
+1. First
+
+2. Second:
+ * Fee
+ * Fie
+ * Foe
+
+3. Third
+
+
+This was an error in Markdown 1.0.1:
+
+* this
+
+ * sub
+
+ that
diff --git a/oldtests/Original/README b/oldtests/Original/README
new file mode 100644
index 0000000..5143258
--- /dev/null
+++ b/oldtests/Original/README
@@ -0,0 +1,15 @@
+These are from John Gruber's original markdown test suite, via
+Michel Fortin's mdtest.
+
+The html files have been modified slightly in ways that do not affect the
+semantics. For example, entities are used for quotes in text, and
+blank lines are omitted between block-level tags.
+
+Trailing blank spaces are removed from lines in raw HTML blocks.
+
+The one (insignificant) semantic change is switching the order
+of emph and strong tags in the output for ***strong and emph***.
+
+We have removed Hard-wrapped_paragraphs_with_list-like_lines tests,
+because the new implementation no longer requires a blank line
+before a list.
diff --git a/oldtests/Original/Strong_and_em_together.html b/oldtests/Original/Strong_and_em_together.html
new file mode 100644
index 0000000..2629594
--- /dev/null
+++ b/oldtests/Original/Strong_and_em_together.html
@@ -0,0 +1,4 @@
+<p><strong><em>This is strong and em.</em></strong></p>
+<p>So is <strong><em>this</em></strong> word.</p>
+<p><strong><em>This is strong and em.</em></strong></p>
+<p>So is <strong><em>this</em></strong> word.</p>
diff --git a/oldtests/Original/Strong_and_em_together.markdown b/oldtests/Original/Strong_and_em_together.markdown
new file mode 100644
index 0000000..95ee690
--- /dev/null
+++ b/oldtests/Original/Strong_and_em_together.markdown
@@ -0,0 +1,7 @@
+***This is strong and em.***
+
+So is ***this*** word.
+
+___This is strong and em.___
+
+So is ___this___ word.
diff --git a/oldtests/Original/Tabs.html b/oldtests/Original/Tabs.html
new file mode 100644
index 0000000..5389bdf
--- /dev/null
+++ b/oldtests/Original/Tabs.html
@@ -0,0 +1,19 @@
+<ul>
+<li><p>this is a list item
+indented with tabs</p></li>
+<li><p>this is a list item
+indented with spaces</p></li>
+</ul>
+<p>Code:</p>
+<pre><code>this code block is indented by one tab
+</code></pre>
+<p>And:</p>
+<pre><code> this code block is indented by two tabs
+</code></pre>
+<p>And:</p>
+<pre><code>+ this is an example list item
+ indented with tabs
+
++ this is an example list item
+ indented with spaces
+</code></pre>
diff --git a/oldtests/Original/Tabs.markdown b/oldtests/Original/Tabs.markdown
new file mode 100644
index 0000000..589d113
--- /dev/null
+++ b/oldtests/Original/Tabs.markdown
@@ -0,0 +1,21 @@
++ this is a list item
+ indented with tabs
+
++ this is a list item
+ indented with spaces
+
+Code:
+
+ this code block is indented by one tab
+
+And:
+
+ this code block is indented by two tabs
+
+And:
+
+ + this is an example list item
+ indented with tabs
+
+ + this is an example list item
+ indented with spaces
diff --git a/oldtests/Original/Tidyness.html b/oldtests/Original/Tidyness.html
new file mode 100644
index 0000000..f2a8ce7
--- /dev/null
+++ b/oldtests/Original/Tidyness.html
@@ -0,0 +1,8 @@
+<blockquote>
+<p>A list within a blockquote:</p>
+<ul>
+<li>asterisk 1</li>
+<li>asterisk 2</li>
+<li>asterisk 3</li>
+</ul>
+</blockquote>
diff --git a/oldtests/Original/Tidyness.markdown b/oldtests/Original/Tidyness.markdown
new file mode 100644
index 0000000..5f18b8d
--- /dev/null
+++ b/oldtests/Original/Tidyness.markdown
@@ -0,0 +1,5 @@
+> A list within a blockquote:
+>
+> * asterisk 1
+> * asterisk 2
+> * asterisk 3
diff --git a/oldtests/Tabs/TabConversionUnicode.html b/oldtests/Tabs/TabConversionUnicode.html
new file mode 100644
index 0000000..f596f6a
--- /dev/null
+++ b/oldtests/Tabs/TabConversionUnicode.html
@@ -0,0 +1 @@
+<p><code>То лпой</code> is a Russian word with a tab inside.</p>
diff --git a/oldtests/Tabs/TabConversionUnicode.markdown b/oldtests/Tabs/TabConversionUnicode.markdown
new file mode 100644
index 0000000..0bd7b52
--- /dev/null
+++ b/oldtests/Tabs/TabConversionUnicode.markdown
@@ -0,0 +1 @@
+`То лпой` is a Russian word with a tab inside.
diff --git a/runtests.pl b/runtests.pl
new file mode 100644
index 0000000..5facbe6
--- /dev/null
+++ b/runtests.pl
@@ -0,0 +1,159 @@
+#!/usr/bin/env perl
+use warnings;
+use strict;
+use Term::ANSIColor;
+use IO::Handle;
+use IPC::Open2;
+
+my $usage="runtests.pl PROGRAM SPEC\nSet ANSI_COLORS_DISABLED=1 if you redirect to a file.\nSet PATT='...' to restrict tests to sections matching a regex.\n";
+
+my $PROG=$ARGV[0];
+my $SPEC=$ARGV[1];
+my $PATT=$ENV{'PATT'};
+
+if (!(defined $PROG && defined $SPEC)) {
+ print STDERR $usage;
+ exit 1;
+}
+
+my $passed = 0;
+my $failed = 0;
+my $skipped = 0;
+
+# Markdown implementations vary on insignificant whitespace.
+# Some leave blanks between block elements, others don't.
+# This function tries to normalize the output so it can be
+# compared with our test. tidy takes two arguments: the
+# string containing the actual output, and a pathname of the
+# file to which the tidied output is to be saved.
+sub tidy
+{
+ my $inpre = 0;
+ my $out = "";
+ my $outfh;
+ open($outfh, '>', \$out);
+ for (split /^/, $_[0]) {
+ if (/<pre/) {
+ $inpre = 1;
+ } elsif (/<\/pre/) {
+ $inpre = 0;
+ }
+ if ($inpre) {
+ print $outfh $_;
+ } else {
+ # remove leading spaces
+ s/^ *//;
+ # remove trailing spaces
+ s/ *$//;
+ # collapse consecutive spaces
+ s/ */ /;
+ # collapse space before /> in tag
+ s/ *\/>/\/>/;
+ # skip blank line
+ if (/^$/) {
+ next;
+ }
+ print $outfh $_;
+ }
+ }
+ close $outfh;
+ return $out;
+}
+
+sub dotest
+{
+ my $markdown = $_[0];
+ my $html = $_[1];
+ my $testname = $_[2];
+ my $actual = "";
+ # We use → to indicate tab and ␣ space in the spec
+ $markdown =~ s/→/\t/g;s/␣/ /g;
+ $html =~ s/→/\t/g;s/␣/ /g;
+ open2(my $out, my $in, $PROG);
+ print $in $markdown;
+ close $in;
+ flush $out;
+ $actual = do { local $/; <$out>; };
+ close $out;
+ $html = &tidy($html);
+ $actual = &tidy($actual);
+ $actual =~ s/\&#39;/'/;
+ if ($actual eq $html) {
+ print colored("✓", "green");
+ return 1;
+ } else {
+ print colored("\n✘ $testname", "red");
+ print "\n";
+ print color "cyan";
+ print "=== markdown ===============\n";
+ print $markdown;
+ print "=== expected ===============\n";
+ print $html;
+ print "=== got ====================\n";
+ print $actual;
+ print color "black";
+ return 0;
+ }
+}
+
+my $stage = 0;
+my $markdown = "";
+my $html = "";
+my $example = 0;
+my $linenum = 0;
+my $exampleline = 0;
+my @secnums = ();
+my $secheading;
+
+open(SPEC, "< $SPEC");
+while (<SPEC>) {
+ $linenum++;
+ if (/^\.$/) {
+ $stage = ($stage + 1) % 3;
+ if ($stage == 1) {
+ $exampleline = $linenum;
+ }
+ if ($stage == 0) {
+ $example++;
+ if (!$PATT || $secheading =~ /$PATT/) {
+ if (&dotest($markdown, $html,
+ "Example $example (line $exampleline)")) {
+ $passed++;
+ } else {
+ $failed++;
+ }
+ } else {
+ $skipped++;
+ }
+ $markdown = "";
+ $html = "";
+ }
+ } elsif ($stage == 0 && $_ =~ /^<!-- END TESTS -->/) {
+ last;
+ } elsif ($stage == 0 && $_ =~ /^(#+) +(.*)/) {
+ my $seclevel = length($1);
+ $secheading = $2;
+ if ($#secnums == $seclevel - 1) {
+ $secnums[$#secnums]++;
+ } elsif ($#secnums > $seclevel - 1) {
+ @secnums = @secnums[0..($seclevel - 1)];
+ $secnums[$#secnums]++;
+ } else {
+ while ($#secnums < $seclevel - 1) {
+ push(@secnums, 1);
+ }
+ }
+ if (!$PATT || $secheading =~ /$PATT/) {
+ print ("\n", join(".", @secnums) . " " . $secheading, " ");
+ }
+ } elsif ($stage == 1) {
+ $markdown .= $_;
+ } elsif ($stage == 2) {
+ $html .= $_;
+ }
+}
+
+print "\n";
+print STDERR colored("$passed tests passed, $failed failed, $skipped skipped.", "bold");
+print STDERR "\n";
+exit $failed;
diff --git a/spec.txt b/spec.txt
new file mode 100644
index 0000000..96721e6
--- /dev/null
+++ b/spec.txt
@@ -0,0 +1,6044 @@
+---
+title: Standard Markdown Spec
+author:
+- John MacFarlane
+version: 1
+date: 2014-07-21
+...
+
+# Introduction
+
+## What is markdown?
+
+Markdown is a plain text format for writing structured documents,
+based on conventions used for indicating formatting in email and
+usenet posts. It was developed in 2004 by John Gruber, who wrote
+the first markdown-to-HTML converter in perl, and it soon became
+widely used in websites. By 2014 there were dozens of
+implementations in many languages. Some of them extended basic
+markdown syntax with conventions for footnotes, definition lists,
+tables, and other constructs, and some allowed output not just in
+HTML but in LaTeX and many other formats.
+
+## Why is a spec needed?
+
+John Gruber's [canonical description of markdown's
+syntax](http://daringfireball.net/projects/markdown/syntax)
+does not specify the syntax unambiguously. Here are some examples of
+questions it does not answer:
+
+1. How much indentation is needed for a sublist? The spec says that
+ continuation paragraphs need to be indented four spaces, but is
+ not fully explicit about sublists. It is natural to think that
+ they, too, must be indented four spaces, but `Markdown.pl` does
+ not require that. This is hardly a "corner case," and divergences
+ between implementations on this issue often lead to surprises for
+ users in real documents. (See [this comment by John
+ Gruber](http://article.gmane.org/gmane.text.markdown.general/1997).)
+
+2. Is a blank line needed before a block quote or header?
+ Most implementations do not require the blank line. However,
+ this can lead to unexpected results in hard-wrapped text, and
+ also to ambiguities in parsing (note that some implementations
+ put the header inside the blockquote, while others do not).
+ (John Gruber has also spoken [in favor of requiring the blank
+ lines](http://article.gmane.org/gmane.text.markdown.general/2146).)
+
+3. What is the exact rule for determining when list items get
+ wrapped in `<p>` tags? Can a list be partially "loose" and partially
+ "tight"? What should we do with a list like this?
+
+ ``` markdown
+ 1. one
+
+ 2. two
+ 3. three
+ ```
+
+ Or this?
+
+ ``` markdown
+ 1. one
+
+ - a
+
+ - b
+ 2. two
+ ```
+
+ (There are some relevant comments by John Gruber
+ [here](http://article.gmane.org/gmane.text.markdown.general/2554).)
+
+4. When list markers change from bullets to numbers, should we have
+ two lists or one?
+
+ ``` markdown
+ 1. fee
+ 2. fie
+ - foe
+ - fum
+ ```
+
+5. What are the precedence rules for the markers of inline structure?
+ For example, is the following a valid link, or does the code span
+ take precedence ?
+
+ ``` markdown
+ [foo `](bar)`
+ ```
+
+6. What are the precedence rules for markers of emphasis and strong
+ emphasis? For example, how should the following be parsed?
+
+ ``` markdown
+ *foo *bar** baz*
+ ```
+
+7. Can list items include headers?
+
+ ``` markdown
+ - # Heading
+ ```
+
+8. Can link references be defined inside block quotes or list items?
+
+ ``` markdown
+ > Blockquote [foo].
+ >
+ > [foo]: /url
+ ```
+
+In the absence of a spec, early implementers consulted `Markdown.pl`
+to resolve these ambiguities. But `Markdown.pl` was quite buggy, and
+gave manifestly bad results in many cases, so it was not a
+satisfactory replacement for a spec.
+
+Because there is no unambiguous spec, implementations have diverged
+considerably. As a result, users are often surprised to find that
+a document that renders one way on one system (say, a github wiki)
+renders differently on another (say, converting to docbook using
+pandoc). To make matters worse, because nothing in markdown counts
+as a "syntax error," the divergence often isn't discovered right away.
+
+## About this document
+
+This document attempts to specify markdown syntax unambiguously.
+It contains many examples with side-by-side markdown and
+HTML. These are intended to double as conformance tests. An
+accompanying script `runtests.pl` can be used to run the tests
+against any markdown program:
+
+ perl runtests.pl PROGRAM spec.html
+
+Since this document describes how markdown is to be parsed into
+an abstract syntax tree, it would have made sense to use an abstract
+representation of the syntax tree instead of HTML. But HTML is capable
+of representing the structural distinctions we need to make, and the
+choice of HTML for the tests makes it possible to run the tests against
+an implementation without writing an abstract syntax tree renderer.
+
+This document is generated from a text file, `spec.txt`, written
+in markdown with a small extension for the side-by-side tests.
+The script `spec2md.pl` can be used to turn `spec.txt` into pandoc
+markdown, which can then be converted into other formats.
+
+In the examples, the `→` character is used to represent tabs.
+
+# Preprocessing
+
+A [line](#line) <a id="line"/>
+is a sequence of one or more characters followed by a line
+ending (CR, LF, or CRLF, depending on the platform) or by the end of
+file.
+
+This spec does not specify an encoding; it thinks of lines as composed
+of characters rather than bytes. A conforming parser may be limited
+to a certain encoding.
+
+Tabs in lines are expanded to spaces, with a tab stop of 4 characters:
+
+.
+foo→baz→→bim
+.
+<p>foo baz bim</p>
+.
+
+.
+οὐ→χρῆν
+.
+<p>οὐ χρῆν</p>
+.
+
+Line endings are replaced by newline characters (LF).
+
+A line containing only spaces (after tab expansion) followed by
+a line ending is called a [blank line](#blank-line). <a
+id="blank-line"/>
+
+# Blocks and inlines
+
+We can think of a document as a sequence of [blocks](#block)<a
+id="block"/>---structural elements like paragraphs, block quotations,
+lists, headers, rules, and code blocks. Blocks can contain other
+blocks, or they can contain [inline](#inline)<a id="inline"/> content:
+words, spaces, links, emphasized text, images, and inline code.
+
+## Precedence
+
+Indicators of block structure always take precedence over indicators
+of inline structure. So, for example, the following is a list with
+two items, not a list with one item containing a code span:
+
+.
+- `one
+- two`
+.
+<ul>
+<li>`one</li>
+<li>two`</li>
+</ul>
+.
+
+This means that parsing can proceed in two steps: first, the block
+structure of the document can be discerned; second, text lines inside
+paragraphs, headers, and other block constructs can be parsed for inline
+structure. The second step requires information about link reference
+definitions that will be available only at the end of the first
+step. Note that the first step requires processing lines in sequence,
+but the second can be parallelized, since the inline parsing of
+one block element does not affect the inline parsing of any other.
+
+## Container blocks and leaf blocks
+
+We can divide blocks into two types:
+[container blocks](#container-block), <a id="container-block"/>
+which can contain other blocks, and [leaf blocks](#leaf-block),
+<a id="leaf-block"/> which cannot.
+
+# Leaf blocks
+
+This section describes the different kinds of leaf block that make up a
+markdown document.
+
+## Horizontal rules
+
+A line consisting of 0-3 spaces of indentation, followed by a sequence
+of three or more matching `-`, `_`, or `*` characters, each followed
+optionally any number of spaces, forms a [horizontal
+rule](#horizontal-rule). <a id="horizontal-rule"/>
+
+.
+***
+---
+___
+.
+<hr />
+<hr />
+<hr />
+.
+
+Wrong characters:
+
+.
++++
+.
+<p>+++</p>
+.
+
+.
+===
+.
+<p>===</p>
+.
+
+Not enough characters:
+
+.
+--
+**
+__
+.
+<p>--
+**
+__</p>
+.
+
+One to three spaces indent are allowed:
+
+.
+ ***
+ ***
+ ***
+.
+<hr />
+<hr />
+<hr />
+.
+
+Four spaces is too many:
+
+.
+ ***
+.
+<pre><code>***
+</code></pre>
+.
+
+.
+Foo
+ ***
+.
+<p>Foo
+***</p>
+.
+
+More than three characters may be used:
+
+.
+_____________________________________
+.
+<hr />
+.
+
+Spaces are allowed between the characters:
+
+.
+ - - -
+.
+<hr />
+.
+
+.
+ ** * ** * ** * **
+.
+<hr />
+.
+
+.
+- - - -
+.
+<hr />
+.
+
+Spaces are allowed at the end:
+
+.
+- - - -
+.
+<hr />
+.
+
+However, no other characters may occur at the end or the
+beginning:
+
+.
+_ _ _ _ a
+
+a------
+.
+<p>_ _ _ _ a</p>
+<p>a------</p>
+.
+
+It is required that all of the non-space characters be the same.
+So, this is not a horizontal rule:
+
+.
+ *-*
+.
+<p><em>-</em></p>
+.
+
+Horizontal rules do not need blank lines before or after:
+
+.
+- foo
+***
+- bar
+.
+<ul>
+<li>foo</li>
+</ul>
+<hr />
+<ul>
+<li>bar</li>
+</ul>
+.
+
+Horizontal rules can interrupt a paragraph:
+
+.
+Foo
+***
+bar
+.
+<p>Foo</p>
+<hr />
+<p>bar</p>
+.
+
+Note, however, that this is a setext header, not a paragraph followed
+by a horizontal rule:
+
+.
+Foo
+---
+bar
+.
+<h2>Foo</h2>
+<p>bar</p>
+.
+
+When both a horizontal rule and a list item are possible
+interpretations of a line, the horizontal rule is preferred:
+
+.
+* Foo
+* * *
+* Bar
+.
+<ul>
+<li>Foo</li>
+</ul>
+<hr />
+<ul>
+<li>Bar</li>
+</ul>
+.
+
+If you want a horizontal rule in a list item, use a different bullet:
+
+.
+- Foo
+- * * *
+.
+<ul>
+<li>Foo</li>
+<li><hr /></li>
+</ul>
+.
+
+## ATX headers
+
+An [ATX header](#atx-header) <a id="atx-header"/>
+consists of a string of characters, parsed as inline content, between an
+opening sequence of 1--6 unescaped `#` characters and an optional
+closing sequence of any number of `#` characters. The opening sequence
+of `#` characters cannot be followed directly by a nonspace character.
+The closing `#` characters may be followed by spaces only. The opening
+`#` character may be indented 0-3 spaces. The raw contents of the
+header are stripped of leading and trailing spaces before being parsed
+as inline content. The header level is equal to the number of `#`
+characters in the opening sequence.
+
+Simple headers:
+
+.
+# foo
+## foo
+### foo
+#### foo
+##### foo
+###### foo
+.
+<h1>foo</h1>
+<h2>foo</h2>
+<h3>foo</h3>
+<h4>foo</h4>
+<h5>foo</h5>
+<h6>foo</h6>
+.
+
+More than six `#` characters is not a header:
+
+.
+####### foo
+.
+<p>####### foo</p>
+.
+
+A space is required between the `#` characters and the header's
+contents. Note that many implementations currently do not require
+the space. However, the space was required by the [original ATX
+implementation](http://www.aaronsw.com/2002/atx/atx.py), and it helps
+prevent things like the following from being parsed as headers:
+
+.
+#5 bolt
+.
+<p>#5 bolt</p>
+.
+
+This is not a header, because the first `#` is escaped:
+
+.
+\## foo
+.
+<p>## foo</p>
+.
+
+Contents are parsed as inlines:
+
+.
+# foo *bar* \*baz\*
+.
+<h1>foo <em>bar</em> *baz*</h1>
+.
+
+Leading and trailing blanks are ignored in parsing inline content:
+
+.
+# foo
+.
+<h1>foo</h1>
+.
+
+One to three spaces indentation are allowed:
+
+.
+ ### foo
+ ## foo
+ # foo
+.
+<h3>foo</h3>
+<h2>foo</h2>
+<h1>foo</h1>
+.
+
+Four spaces are too much:
+
+.
+ # foo
+.
+<pre><code># foo
+</code></pre>
+.
+
+.
+foo
+ # bar
+.
+<p>foo
+# bar</p>
+.
+
+A closing sequence of `#` characters is optional:
+
+.
+## foo ##
+ ### bar ###
+.
+<h2>foo</h2>
+<h3>bar</h3>
+.
+
+It need not be the same length as the opening sequence:
+
+.
+# foo ##################################
+##### foo ##
+.
+<h1>foo</h1>
+<h5>foo</h5>
+.
+
+Spaces are allowed after the closing sequence:
+
+.
+### foo ###
+.
+<h3>foo</h3>
+.
+
+A sequence of `#` characters with a nonspace character following it
+is not a closing sequence, but counts as part of the contents of the
+header:
+
+.
+### foo ### b
+.
+<h3>foo ### b</h3>
+.
+
+Backslash-escaped `#` characters do not count as part
+of the closing sequence:
+
+.
+### foo \###
+## foo \#\##
+.
+<h3>foo #</h3>
+<h2>foo ##</h2>
+.
+
+ATX headers need not be separated from surrounding content by blank
+lines, and they can interrupt paragraphs:
+
+.
+****
+## foo
+****
+.
+<hr />
+<h2>foo</h2>
+<hr />
+.
+
+.
+Foo bar
+# baz
+Bar foo
+.
+<p>Foo bar</p>
+<h1>baz</h1>
+<p>Bar foo</p>
+.
+
+ATX headers can be empty:
+
+.
+##
+#
+### ###
+.
+<h2></h2>
+<h1></h1>
+<h3></h3>
+.
+
+## Setext headers
+
+A [setext header](#setext-header) <a id="setext-header"/>
+consists of a line of text, containing at least one nonspace character,
+with no more than 3 spaces indentation, followed by a [setext header
+underline](#setext-header-underline). A [setext header
+underline](#setext-header-underline) <a id="setext-header-underline"/>
+is a sequence of `=` characters or a sequence of `-` characters, with no
+more than 3 spaces indentation and any number of leading or trailing
+spaces. The header is a level 1 header if `=` characters are used, and
+a level 2 header if `-` characters are used. The contents of the header
+are the result of parsing the first line as markdown inline content.
+
+In general, a setext header need not be preceded or followed by a
+blank line. However, it cannot interrupt a paragraph, so when a
+setext header comes after a paragraph, a blank line is needed between
+them.
+
+Simple examples:
+
+.
+Foo *bar*
+=========
+
+Foo *bar*
+---------
+.
+<h1>Foo <em>bar</em></h1>
+<h2>Foo <em>bar</em></h2>
+.
+
+The underlining can be any length:
+
+.
+Foo
+-------------------------
+
+Foo
+=
+.
+<h2>Foo</h2>
+<h1>Foo</h1>
+.
+
+The header content can be indented up to three spaces, and need
+not line up with the underlining:
+
+.
+ Foo
+---
+
+ Foo
+-----
+
+ Foo
+ ===
+.
+<h2>Foo</h2>
+<h2>Foo</h2>
+<h1>Foo</h1>
+.
+
+Four spaces indent is too much:
+
+.
+ Foo
+ ---
+
+ Foo
+---
+.
+<pre><code>Foo
+---
+
+Foo
+</code></pre>
+<hr />
+.
+
+The setext header underline can be indented up to three spaces, and
+may have trailing spaces:
+
+.
+Foo
+ ----
+.
+<h2>Foo</h2>
+.
+
+Four spaces is too much:
+
+.
+Foo
+ ---
+.
+<p>Foo
+---</p>
+.
+
+The setext header underline cannot contain internal spaces:
+
+.
+Foo
+= =
+
+Foo
+--- -
+.
+<p>Foo
+= =</p>
+<p>Foo</p>
+<hr />
+.
+
+Trailing spaces in the content line do not cause a line break:
+
+.
+Foo
+-----
+.
+<h2>Foo</h2>
+.
+
+Nor does a backslash at the end:
+
+.
+Foo\
+----
+.
+<h2>Foo\</h2>
+.
+
+Since indicators of block structure take precedence over
+indicators of inline structure, the following are setext headers:
+
+.
+`Foo
+----
+`
+
+<a title="a lot
+---
+of dashes"/>
+.
+<h2>`Foo</h2>
+<p>`</p>
+<h2>&lt;a title=&quot;a lot</h2>
+<p>of dashes&quot;/&gt;</p>
+.
+
+The setext header underline cannot be a lazy line:
+
+.
+> Foo
+---
+.
+<blockquote>
+<p>Foo</p>
+</blockquote>
+<hr />
+.
+
+A setext header cannot interrupt a paragraph:
+
+.
+Foo
+Bar
+---
+
+Foo
+Bar
+===
+.
+<p>Foo
+Bar</p>
+<hr />
+<p>Foo
+Bar
+===</p>
+.
+
+But in general a blank line is not required before or after:
+
+.
+---
+Foo
+---
+Bar
+---
+Baz
+.
+<hr />
+<h2>Foo</h2>
+<h2>Bar</h2>
+<p>Baz</p>
+.
+
+Setext headers cannot be empty:
+
+.
+
+====
+.
+<p>====</p>
+.
+
+
+## Indented code blocks
+
+An [indented code block](#indented-code-block)
+<a id="indented-code-block"/> is composed of one or more
+[indented chunks](#indented-chunk) separated by blank lines.
+An [indented chunk](#indented-chunk) <a id="indented-chunk"/>
+is a sequence of non-blank lines, each indented four or more
+spaces. An indented code block cannot interrupt a paragraph, so
+if it occurs before or after a paragraph, there must be an
+intervening blank line. The contents of the code block are
+the literal contents of the lines, including trailing newlines,
+minus four spaces of indentation. An indented code block has no
+attributes.
+
+.
+ a simple
+ indented code block
+.
+<pre><code>a simple
+ indented code block
+</code></pre>
+.
+
+The contents are literal text, and do not get parsed as markdown:
+
+.
+ <a/>
+ *hi*
+
+ - one
+.
+<pre><code>&lt;a/&gt;
+*hi*
+
+- one
+</code></pre>
+.
+
+Here we have three chunks separated by blank lines:
+
+.
+ chunk1
+
+ chunk2
+
+
+
+ chunk3
+.
+<pre><code>chunk1
+
+chunk2
+
+
+
+chunk3
+</code></pre>
+.
+
+Any initial spaces beyond four will be included in the content, even
+in interior blank lines:
+
+.
+ chunk1
+
+ chunk2
+.
+<pre><code>chunk1
+
+ chunk2
+</code></pre>
+.
+
+An indented code code block cannot interrupt a paragraph. (This
+allows hanging indents and the like.)
+
+.
+Foo
+ bar
+
+.
+<p>Foo
+bar</p>
+.
+
+However, any non-blank line with fewer than four leading spaces ends
+the code block immediately. So a paragraph may occur immediately
+after indented code:
+
+.
+ foo
+bar
+.
+<pre><code>foo
+</code></pre>
+<p>bar</p>
+.
+
+And indented code can occur immediately before and after other kinds of
+blocks:
+
+.
+# Header
+ foo
+Header
+------
+ foo
+----
+.
+<h1>Header</h1>
+<pre><code>foo
+</code></pre>
+<h2>Header</h2>
+<pre><code>foo
+</code></pre>
+<hr />
+.
+
+The first line can be indented more than four spaces:
+
+.
+ foo
+ bar
+.
+<pre><code> foo
+bar
+</code></pre>
+.
+
+Blank lines preceding or following an indented code block
+are not included in it:
+
+.
+
+
+ foo
+
+
+.
+<pre><code>foo
+</code></pre>
+.
+
+Trailing spaces are included in the code block's content:
+
+.
+ foo
+.
+<pre><code>foo
+</code></pre>
+.
+
+
+## Fenced code blocks
+
+A [code fence](#code-fence) <a id="code-fence"/> is a sequence
+of at least three consecutive backtick characters (`` ` ``) or
+tildes (`~`). (Tildes and backticks cannot be mixed.).
+A [fenced code block](#fenced-code-block) <a id="fenced-code-block"/>
+begins with a code fence, indented no more than three spaces.
+
+The line with the opening code fence may optionally contain some text
+following the code fence; this is trimmed of leading and trailing
+spaces and called the [info string](#info-string). <a
+id="info-string"/> The [info string] may not contain any backtick
+characters. (The reason for this restriction is that otherwise
+some inline code would be incorrectly interpreted as the
+beginning of a fenced code block.)
+
+The content of the code block consists of all subsequent lines, until
+a closing [code fence](#code-fence) of the same type as the code block
+began with (backticks or tildes), and with at least as many backticks
+or tildes as the opening code fence. If the leading code fence is
+indented N spaces, then up to N spaces of indentation are removed from
+each line of the content (if present). (If a content line is not
+indented, it is preserved unchanged. If it is indented less than N
+spaces, all of the indentation is removed.)
+
+The closing code fence may be indented up to three spaces, and may be
+followed only by spaces, which are ignored. If the end of the
+document is reached and no closing code fence has been found, the code
+block contains all of the lines after the opening code fence.
+(An alternative spec would require backtracking in the event
+that a closing code fence is not found. But this makes parsing much
+less efficient, and there seems to be no real down side to the
+behavior described here.)
+
+A fenced code block may interrupt a paragraph, and does not require
+a blank line either before or after.
+
+The content of a code fence is treated as literal text, not parsed
+as inlines. The first word of the info string is typically used to
+specify the language of the code sample, and rendered in the `class`
+attribute of the `pre` tag. However, this spec does not mandate any
+particular treatment of the info string.
+
+Here is a simple example with backticks:
+
+.
+```
+<
+ >
+```
+.
+<pre><code>&lt;
+ &gt;
+</code></pre>
+.
+
+With tildes:
+
+.
+~~~
+<
+ >
+~~~
+.
+<pre><code>&lt;
+ &gt;
+</code></pre>
+.
+
+The closing code fence must use the same character as the opening
+fence:
+
+.
+```
+aaa
+~~~
+```
+.
+<pre><code>aaa
+~~~
+</code></pre>
+.
+
+.
+~~~
+aaa
+```
+~~~
+.
+<pre><code>aaa
+```
+</code></pre>
+.
+
+The closing code fence must be at least as long as the opening fence:
+
+.
+````
+aaa
+```
+``````
+.
+<pre><code>aaa
+```
+</code></pre>
+.
+
+.
+~~~~
+aaa
+~~~
+~~~~
+.
+<pre><code>aaa
+~~~
+</code></pre>
+.
+
+Unclosed code blocks are closed by the end of the document:
+
+.
+```
+.
+<pre><code></code></pre>
+.
+
+.
+`````
+
+```
+aaa
+.
+<pre><code>
+```
+aaa
+</code></pre>
+.
+
+A code block can have all empty lines as its content:
+
+.
+```
+
+
+```
+.
+<pre><code>
+
+</code></pre>
+.
+
+A code block can be empty:
+
+.
+```
+```
+.
+<pre><code></code></pre>
+.
+
+Fences can be indented. If the opening fence is indented,
+content lines will have equivalent opening indentation removed,
+if present:
+
+.
+ ```
+ aaa
+aaa
+```
+.
+<pre><code>aaa
+aaa
+</code></pre>
+.
+
+.
+ ```
+aaa
+ aaa
+aaa
+ ```
+.
+<pre><code>aaa
+aaa
+aaa
+</code></pre>
+.
+
+.
+ ```
+ aaa
+ aaa
+ aaa
+ ```
+.
+<pre><code>aaa
+ aaa
+aaa
+</code></pre>
+.
+
+Four spaces indentation produces an indented code block:
+
+.
+ ```
+ aaa
+ ```
+.
+<pre><code>```
+aaa
+```
+</code></pre>
+.
+
+Code fences (opening and closing) cannot contain internal spaces:
+
+.
+``` ```
+aaa
+.
+<p><code></code>
+aaa</p>
+.
+
+.
+~~~~~~
+aaa
+~~~ ~~
+.
+<pre><code>aaa
+~~~ ~~
+</code></pre>
+.
+
+Fenced code blocks can interrupt paragraphs, and can be followed
+directly by paragraphs, without a blank line between:
+
+.
+foo
+```
+bar
+```
+baz
+.
+<p>foo</p>
+<pre><code>bar
+</code></pre>
+<p>baz</p>
+.
+
+Other blocks can also occur before and after fenced code blocks
+without an intervening blank line:
+
+.
+foo
+---
+~~~
+bar
+~~~
+# baz
+.
+<h2>foo</h2>
+<pre><code>bar
+</code></pre>
+<h1>baz</h1>
+.
+
+An [info string](#info-string) can be provided after the opening code fence.
+Opening and closing spaces will be stripped, and the first word
+is used here to populate the `class` attribute of the enclosing
+`pre` tag.
+
+.
+```ruby
+def foo(x)
+ return 3
+end
+```
+.
+<pre class="ruby"><code>def foo(x)
+ return 3
+end
+</code></pre>
+.
+
+.
+~~~~ ruby startline=3 $%@#$
+def foo(x)
+ return 3
+end
+~~~~~~~
+.
+<pre class="ruby"><code>def foo(x)
+ return 3
+end
+</code></pre>
+.
+
+.
+````;
+````
+.
+<pre class=";"><code></code></pre>
+.
+
+Info strings for backtick code blocks cannot contain backticks:
+
+.
+``` aa ```
+foo
+.
+<p><code>aa</code>
+foo</p>
+.
+
+Closing code fences cannot have info strings:
+
+.
+```
+``` aaa
+```
+.
+<pre><code>``` aaa
+</code></pre>
+.
+
+
+## HTML blocks
+
+An [HTML block tag](#html-block-tag) <a id="html-block-tag"/> is
+an [open tag](#open-tag) or [closing tag](#closing-tag) whose tag
+name is one of the following (case-insensitive):
+`article`, `header`, `aside`, `hgroup`, `blockquote`, `hr`, `body`,
+`li`, `br`, `map`, `button`, `object`, `canvas`, `ol`, `caption`,
+`output`, `col`, `p`, `colgroup`, `pre`, `dd`, `progress`, `div`,
+`section`, `dl`, `table`, `td`, `dt`, `tbody`, `embed`, `textarea`,
+`fieldset`, `tfoot`, `figcaption`, `th`, `figure`, `thead`, `footer`,
+`footer`, `tr`, `form`, `ul`, `h1`, `h2`, `h3`, `h4`, `h5`, `h6`,
+`video`, `script`, `style`.
+
+An [HTML block](#html-block) <a id="html-block"/> begins with an
+[HTML block tag](#html-block-tag), [HTML comment](#html-comment),
+[processing instruction](#processing-instruction),
+[declaration](#declaration), or [CDATA section](#cdata-section).
+It ends when a [blank line](#blank-line) or the end of the
+input is encountered. The initial line may be indented up to three
+spaces, and subsequent lines may have any indentation. The contents
+of the HTML block are interpreted as raw HTML, and will not be escaped
+in HTML output.
+
+Some simple examples:
+
+.
+<table>
+ <tr>
+ <td>
+ hi
+ </td>
+ </tr>
+</table>
+
+okay.
+.
+<table>
+ <tr>
+ <td>
+ hi
+ </td>
+ </tr>
+</table>
+<p>okay.</p>
+.
+
+.
+ <div>
+ *hello*
+ <foo><a>
+.
+ <div>
+ *hello*
+ <foo><a>
+.
+
+Here we have two code blocks with a markdown paragraph between them:
+
+.
+<DIV CLASS="foo">
+
+*Markdown*
+
+</DIV>
+.
+<DIV CLASS="foo">
+<p><em>Markdown</em></p>
+</DIV>
+.
+
+In the following example, what looks like a markdown code block
+is actually part of the HTML block, which continues until a blank
+line or the end of the document is reached:
+
+.
+<div></div>
+``` c
+int x = 33;
+```
+.
+<div></div>
+``` c
+int x = 33;
+```
+.
+
+A comment:
+
+.
+<!-- Foo
+bar
+ baz -->
+.
+<!-- Foo
+bar
+ baz -->
+.
+
+A processing instruction:
+
+.
+<?php
+ echo 'foo'
+?>
+.
+<?php
+ echo 'foo'
+?>
+.
+
+CDATA:
+
+.
+<![CDATA[
+function matchwo(a,b)
+{
+if (a < b && a < 0) then
+ {
+ return 1;
+ }
+else
+ {
+ return 0;
+ }
+}
+]]>
+.
+<![CDATA[
+function matchwo(a,b)
+{
+if (a < b && a < 0) then
+ {
+ return 1;
+ }
+else
+ {
+ return 0;
+ }
+}
+]]>
+.
+
+The opening tag can be indented 1-3 spaces, but not 4:
+
+.
+ <!-- foo -->
+
+ <!-- foo -->
+.
+ <!-- foo -->
+<pre><code>&lt;!-- foo --&gt;
+</code></pre>
+.
+
+An HTML block can interrupt a paragraph, and need not be preceded
+by a blank line.
+
+.
+Foo
+<div>
+bar
+</div>
+.
+<p>Foo</p>
+<div>
+bar
+</div>
+.
+
+However, a following blank line is always needed, except at the end of
+a document:
+
+.
+<div>
+bar
+</div>
+*foo*
+.
+<div>
+bar
+</div>
+*foo*
+.
+
+An incomplete HTML block tag may also start an HTML block:
+
+.
+<div class
+foo
+.
+<div class
+foo
+.
+
+This rule differs from John Gruber's original markdown syntax
+specification, which says:
+
+> The only restrictions are that block-level HTML elements —
+> e.g. `<div>`, `<table>`, `<pre>`, `<p>`, etc. — must be separated from
+> surrounding content by blank lines, and the start and end tags of the
+> block should not be indented with tabs or spaces.
+
+In some ways Gruber's rule is more restrictive than the one given
+here:
+
+- It requires that an HTML block be preceded by a blank line.
+- It does not allow the start tag to be indented.
+- It requires a matching end tag, which it also does not allow to
+ be indented.
+
+Indeed, most markdown implementations, including some of Gruber's
+own perl implementations, do not impose these restrictions.
+
+There is one respect, however, in which Gruber's rule is more liberal
+than the one given here, since it allows blank lines to occur inside
+an HTML block. There are two reasons for disallowing them here.
+First, it removes the need to parse balanced tags, which is
+expensive and can require backtracking from the end of the document
+if no matching end tag is found. Second, it provides a very simple
+and flexible way of including markdown content inside HTML tags:
+simply separate the markdown from the HTML using blank lines:
+
+.
+<div>
+
+*Emphasized* text.
+
+</div>
+.
+<div>
+<p><em>Emphasized</em> text.</p>
+</div>
+.
+
+Compare:
+
+.
+<div>
+*Emphasized* text.
+</div>
+.
+<div>
+*Emphasized* text.
+</div>
+.
+
+Some markdown implementations have adopted a convention of
+interpreting content inside tags as text if the open tag has
+the attribute `markdown=1`. The rule given above seems a simpler and
+more elegant way of achieving the same expressive power, which is also
+much simpler to parse.
+
+The main potential drawback is that one can no longer paste HTML
+blocks into markdown documents with 100% reliability. However,
+*in most cases* this will work fine, because the blank lines in
+HTML are usually followed by HTML block tags. For example:
+
+.
+<table>
+
+<tr>
+
+<td>
+Hi
+</td>
+
+</tr>
+
+</table>
+.
+<table>
+<tr>
+<td>
+Hi
+</td>
+</tr>
+</table>
+.
+
+Moreover, blank lines are usually not necessary and can be
+deleted. The exception is inside `<pre>` tags; here, one can
+replace the blank lines with `&#10;` entities.
+
+So there is no important loss of expressive power with the new rule.
+
+## Link reference definitions
+
+A [link reference definition](#link-reference-definition)
+<a id="link-reference-definition"/> consists of a [link
+label](#link-label), indented up to three spaces, followed
+by a colon (`:`), optional blank space (including up to one
+newline), a [link destination](#link-destination), optional
+blank space (including up to one newline), and an optional [link
+title](#link-title), which if it is present must be separated
+from the [link destination](#link-destination) by whitespace.
+No further non-space characters may occur on the line.
+
+A [link reference-definition](#link-reference-definition)
+does not correspond to a structural element of a document. Instead, it
+defines a label which can be used in [reference links](#reference-link)
+and reference-style [images](#image) elsewhere in the document. [Link
+references] can be defined either before or after the links that use
+them.
+
+.
+[foo]: /url "title"
+
+[foo]
+.
+<p><a href="/url" title="title">foo</a></p>
+.
+
+.
+ [foo]:
+ /url
+ 'the title'
+
+[foo]
+.
+<p><a href="/url" title="the title">foo</a></p>
+.
+
+.
+[Foo*bar\]]:my_(url) 'title (with parens)'
+
+[Foo*bar\]]
+.
+<p><a href="my_(url)" title="title (with parens)">Foo*bar]</a></p>
+.
+
+.
+[Foo bar]:
+<my url>
+'title'
+
+[Foo bar]
+.
+<p><a href="my url" title="title">Foo bar</a></p>
+.
+
+The title may be omitted:
+
+.
+[foo]:
+/url
+
+[foo]
+.
+<p><a href="/url">foo</a></p>
+.
+
+The link destination may not be omitted:
+
+.
+[foo]:
+
+[foo]
+.
+<p>[foo]:</p>
+<p>[foo]</p>
+.
+
+A link can come before its corresponding definition:
+
+.
+[foo]
+
+[foo]: url
+.
+<p><a href="url">foo</a></p>
+.
+
+If there are several matching definitions, the first one takes
+precedence:
+
+.
+[foo]
+
+[foo]: first
+[foo]: second
+.
+<p><a href="first">foo</a></p>
+.
+
+As noted in the section on [Links], matching of labels is
+case-insensitive (see [matches](#matches)).
+
+.
+[FOO]: /url
+
+[Foo]
+.
+<p><a href="/url">Foo</a></p>
+.
+
+.
+[ΑΓΩ]: /φου
+
+[αγω]
+.
+<p><a href="/φου">αγω</a></p>
+.
+
+Here is a link reference definition with no corresponding link.
+It contributes nothing to the document.
+
+.
+[foo]: /url
+.
+.
+
+This is not a link reference definition, because there are
+non-space characters after the title:
+
+.
+[foo]: /url "title" ok
+.
+<p>[foo]: /url &quot;title&quot; ok</p>
+.
+
+This is not a link reference definition, because it is indented
+four spaces:
+
+.
+ [foo]: /url "title"
+
+[foo]
+.
+<pre><code>[foo]: /url &quot;title&quot;
+</code></pre>
+<p>[foo]</p>
+.
+
+This is not a link reference definition, because it occurs inside
+a code block:
+
+.
+```
+[foo]: /url
+```
+
+[foo]
+.
+<pre><code>[foo]: /url
+</code></pre>
+<p>[foo]</p>
+.
+
+A [link reference definition](#link-reference-definition) cannot
+interrupt a paragraph.
+
+.
+Foo
+[bar]: /baz
+
+[bar]
+.
+<p>Foo
+[bar]: /baz</p>
+<p>[bar]</p>
+.
+
+However, it can directly follow other block elements, such as headers
+and horizontal rules, and it need not be followed by a blank line.
+
+.
+# [Foo]
+[foo]: /url
+> bar
+.
+<h1><a href="/url">Foo</a></h1>
+<blockquote>
+<p>bar</p>
+</blockquote>
+.
+
+Several [link references](#link-reference) can occur one after another,
+without intervening blank lines.
+
+.
+[foo]: /foo-url "foo"
+[bar]: /bar-url
+ "bar"
+[baz]: /baz-url
+
+[foo],
+[bar],
+[baz]
+.
+<p><a href="/foo-url" title="foo">foo</a>,
+<a href="/bar-url" title="bar">bar</a>,
+<a href="/baz-url">baz</a></p>
+.
+
+[Link reference definitions](#link-reference-definition) can occur
+inside block containers, like lists and block quotations. They
+affect the entire document, not just the container in which they
+are defined:
+
+.
+[foo]
+
+> [foo]: /url
+.
+<p><a href="/url">foo</a></p>
+<blockquote>
+</blockquote>
+.
+
+
+## Paragraphs
+
+A sequence of non-blank lines that cannot be interpreted as other
+kinds of blocks forms a [paragraph](#paragraph) <a id="paragraph"/>.
+The contents of the paragraph are the result of parsing the
+paragraph's raw content as inlines. The paragraph's raw content
+is formed by concatenating the lines and removing initial and final
+spaces.
+
+A simple example with two paragraphs:
+
+.
+aaa
+
+bbb
+.
+<p>aaa</p>
+<p>bbb</p>
+.
+
+Paragraphs can contain multiple lines, but no blank lines:
+
+.
+aaa
+bbb
+
+ccc
+ddd
+.
+<p>aaa
+bbb</p>
+<p>ccc
+ddd</p>
+.
+
+Multiple blank lines between paragraph have no effect:
+
+.
+aaa
+
+
+bbb
+.
+<p>aaa</p>
+<p>bbb</p>
+.
+
+Leading spaces are skipped:
+
+.
+ aaa
+ bbb
+.
+<p>aaa
+bbb</p>
+.
+
+Lines after the first may be indented any amount, since indented
+code blocks cannot interrupt paragraphs.
+
+.
+aaa
+ bbb
+ ccc
+.
+<p>aaa
+bbb
+ccc</p>
+.
+
+However, the first line may be indented at most three spaces,
+or an indented code block will be triggered:
+
+.
+ aaa
+bbb
+.
+<p>aaa
+bbb</p>
+.
+
+.
+ aaa
+bbb
+.
+<pre><code>aaa
+</code></pre>
+<p>bbb</p>
+.
+
+Final spaces are stripped before inline parsing, so a paragraph
+that ends with two or more spaces will not end with a hard line
+break:
+
+.
+aaa
+bbb
+.
+<p>aaa<br />
+bbb</p>
+.
+
+## Blank lines
+
+[Blank lines](#blank-line) between block-level elements are ignored,
+except for the role they play in determining whether a [list](#list)
+is [tight](#tight) or [loose](#loose).
+
+Blank lines at the beginning and end of the document are also ignored.
+
+.
+
+
+aaa
+
+
+# aaa
+
+
+.
+<p>aaa</p>
+<h1>aaa</h1>
+.
+
+
+# Container blocks
+
+A [container block](#container-block) is a block that has other
+blocks as its contents. There are two basic kinds of container blocks:
+[block quotes](#block-quote) and [list items](#list-item).
+[Lists](#list) are meta-containers for [list items](#list-item).
+
+We define the syntax for container blocks recursively. The general
+form of the definition is:
+
+> If X is a sequence of blocks, then the result of
+> transforming X in such-and-such a way is a container of type Y
+> with these blocks as its content.
+
+So, we explain what counts as a block quote or list item by
+explaining how these can be *generated* from their contents.
+This should suffice to define the syntax, although it does not
+give a recipe for *parsing* these constructions. (A recipe is
+provided below in the section entitled [A parsing strategy].)
+
+## Block quotes
+
+A [block quote marker](#block-quote-marker) <a id="block-quote-marker"/>
+consists of 0-3 spaces of initial indent, plus (a) the character `>` together
+with a following space, or (b) a single character `>` not followed by a space.
+
+The following rules define [block quotes](#block-quote):
+<a id="block-quote"/>
+
+1. **Basic case.** If a string of lines *Ls* constitute a sequence
+ of blocks *Bs*, then the result of appending a [block quote marker]
+ to the beginning of each line in *Ls* is a [block quote](#block-quote)
+ containing *Bs*.
+
+2. **Laziness.** If a string of lines *Ls* constitute a [block
+ quote](#block-quote) with contents *Bs*, then the result of deleting
+ the initial [block quote marker](#block-quote-marker) from one or
+ more lines in which the next non-space character after the [block
+ quote marker](#block-quote-marker) is [paragraph continuation
+ text](#paragraph-continuation-text) is a block quote with *Bs* as
+ its content. [Paragraph continuation
+ text](#paragraph-continuation-text) is text that will be parsed as
+ part of the content of a paragraph, but does not occur at the
+ beginning of the paragraph.
+
+3. **Consecutiveness.** A document cannot contain two [block
+ quotes](#block-quote) in a row unless there is a [blank
+ line](#blank-line) between them.
+
+Nothing else counts as a [block quote](#block-quote).
+
+Here is a simple example:
+
+.
+> # Foo
+> bar
+> baz
+.
+<blockquote>
+<h1>Foo</h1>
+<p>bar
+baz</p>
+</blockquote>
+.
+
+The spaces after the `>` characters can be omitted:
+
+.
+># Foo
+>bar
+> baz
+.
+<blockquote>
+<h1>Foo</h1>
+<p>bar
+baz</p>
+</blockquote>
+.
+
+The `>` characters can be indented 1-3 spaces:
+
+.
+ > # Foo
+ > bar
+ > baz
+.
+<blockquote>
+<h1>Foo</h1>
+<p>bar
+baz</p>
+</blockquote>
+.
+
+Four spaces gives us a code block:
+
+.
+ > # Foo
+ > bar
+ > baz
+.
+<pre><code>&gt; # Foo
+&gt; bar
+&gt; baz
+</code></pre>
+.
+
+The Laziness clause allows us to omit the `>` before a
+paragraph continuation line:
+
+.
+> # Foo
+> bar
+baz
+.
+<blockquote>
+<h1>Foo</h1>
+<p>bar
+baz</p>
+</blockquote>
+.
+
+A block quote can contain some lazy and some non-lazy
+continuation lines:
+
+.
+> bar
+baz
+> foo
+.
+<blockquote>
+<p>bar
+baz
+foo</p>
+</blockquote>
+.
+
+Laziness only applies to lines that are continuations of
+paragraphs. Lines containing characters or indentation that indicate
+block structure cannot be lazy.
+
+.
+> foo
+---
+.
+<blockquote>
+<p>foo</p>
+</blockquote>
+<hr />
+.
+
+.
+> - foo
+- bar
+.
+<blockquote>
+<ul>
+<li>foo</li>
+</ul>
+</blockquote>
+<ul>
+<li>bar</li>
+</ul>
+.
+
+.
+> foo
+ bar
+.
+<blockquote>
+<pre><code>foo
+</code></pre>
+</blockquote>
+<pre><code>bar
+</code></pre>
+.
+
+.
+> ```
+foo
+```
+.
+<blockquote>
+<pre><code></code></pre>
+</blockquote>
+<p>foo</p>
+<pre><code></code></pre>
+.
+
+A block quote can be empty:
+
+.
+>
+.
+<blockquote>
+</blockquote>
+.
+
+.
+>
+>
+>
+.
+<blockquote>
+</blockquote>
+.
+
+A block quote can have initial or final blank lines:
+
+.
+>
+> foo
+>
+.
+<blockquote>
+<p>foo</p>
+</blockquote>
+.
+
+A blank line always separates block quotes:
+
+.
+> foo
+
+> bar
+.
+<blockquote>
+<p>foo</p>
+</blockquote>
+<blockquote>
+<p>bar</p>
+</blockquote>
+.
+
+(Most current markdown implementations, including John Gruber's
+original `Markdown.pl`, will parse this eample as a single block quote
+with two paragraphs. But it seems better to allow the author to decide
+whether two block quotes or one are wanted.)
+
+Consecutiveness means that if we put these block quotes together,
+we get a single block quote:
+
+.
+> foo
+> bar
+.
+<blockquote>
+<p>foo
+bar</p>
+</blockquote>
+.
+
+To get a block quote with two paragraphs, use:
+
+.
+> foo
+>
+> bar
+.
+<blockquote>
+<p>foo</p>
+<p>bar</p>
+</blockquote>
+.
+
+Block quotes can interrupt paragraphs:
+
+.
+foo
+> bar
+.
+<p>foo</p>
+<blockquote>
+<p>bar</p>
+</blockquote>
+.
+
+In general, blank lines are not needed before or after block
+quotes:
+
+.
+> aaa
+***
+> bbb
+.
+<blockquote>
+<p>aaa</p>
+</blockquote>
+<hr />
+<blockquote>
+<p>bbb</p>
+</blockquote>
+.
+
+However, because of laziness, a blank line is needed between
+a block quote and a following paragraph:
+
+.
+> bar
+baz
+.
+<blockquote>
+<p>bar
+baz</p>
+</blockquote>
+.
+
+.
+> bar
+
+baz
+.
+<blockquote>
+<p>bar</p>
+</blockquote>
+<p>baz</p>
+.
+
+.
+> bar
+>
+baz
+.
+<blockquote>
+<p>bar</p>
+</blockquote>
+<p>baz</p>
+.
+
+It is a consequence of the Laziness rule that any number
+of initial `>`s may be omitted on a continuation line of a
+nested block quote:
+
+.
+> > > foo
+bar
+.
+<blockquote>
+<blockquote>
+<blockquote>
+<p>foo
+bar</p>
+</blockquote>
+</blockquote>
+</blockquote>
+.
+
+.
+>>> foo
+> bar
+>>baz
+.
+<blockquote>
+<blockquote>
+<blockquote>
+<p>foo
+bar
+baz</p>
+</blockquote>
+</blockquote>
+</blockquote>
+.
+
+When including an indented code block in a block quote,
+remember that the [block quote marker](#block-quote-marker) includes
+both the `>` and a following space. So *five spaces* are needed after
+the `>`:
+
+.
+> code
+
+> not code
+.
+<blockquote>
+<pre><code>code
+</code></pre>
+</blockquote>
+<blockquote>
+<p>not code</p>
+</blockquote>
+.
+
+
+## List items
+
+A [list marker](#list-marker) <a id="list-marker"/> is a
+[bullet list marker](#bullet-list-marker) or an [ordered list
+marker](#ordered-list-marker).
+
+A [bullet list marker](#bullet-list-marker) <a id="bullet-list-marker"/>
+is a `-`, `+`, or `*` character.
+
+An [ordered list marker](#ordered-list-marker) <a id="ordered-list-marker"/>
+is a sequence of one of more digits (`0-9`), followed by either a
+`.` character or a `)` character.
+
+The following rules define [list items](#list-item):
+
+1. **Basic case.** If a sequence of lines *Ls* constitute a sequence of
+ blocks *Bs* starting with a non-space character and not separated
+ from each other by more than one blank line, and *M* is a list
+ marker *M* of width *W* followed by 0 < *N* < 5 spaces, then the result
+ of prepending *M* and the following spaces to the first line of
+ *Ls*, and indenting subsequent lines of *Ls* by *W + N* spaces, is a
+ list item with *Bs* as its contents. The type of the list item
+ (bullet or ordered) is determined by the type of its list marker.
+ If the list item is ordered, then it is also assigned a start
+ number, based on the ordered list marker.
+
+For example, let *Ls* be the lines
+
+.
+A paragraph
+with two lines.
+
+ indented code
+
+> A block quote.
+.
+<p>A paragraph
+with two lines.</p>
+<pre><code>indented code
+</code></pre>
+<blockquote>
+<p>A block quote.</p>
+</blockquote>
+.
+
+And let *M* be the marker `1.`, and *N* = 2. Then rule #1 says
+that the following is an ordered list item with start number 1,
+and the same contents as *Ls*:
+
+.
+1. A paragraph
+ with two lines.
+
+ indented code
+
+ > A block quote.
+.
+<ol>
+<li><p>A paragraph
+with two lines.</p>
+<pre><code>indented code
+</code></pre>
+<blockquote>
+<p>A block quote.</p>
+</blockquote></li>
+</ol>
+.
+
+The most important thing to notice is that the position of
+the text after the list marker determines how much indentation
+is needed in subsequent blocks in the list item. If the list
+marker takes up two spaces, and there are three spaces between
+the list marker and the next nonspace character, then blocks
+must be indented five spaces in order to fall under the list
+item.
+
+Here are some examples showing how far content must be indented to be
+put under the list item:
+
+.
+- one
+
+ two
+.
+<ul>
+<li>one</li>
+</ul>
+<p>two</p>
+.
+
+.
+- one
+
+ two
+.
+<ul>
+<li><p>one</p>
+<p>two</p></li>
+</ul>
+.
+
+.
+ - one
+
+ two
+.
+<ul>
+<li>one</li>
+</ul>
+<pre><code> two
+</code></pre>
+.
+
+.
+ - one
+
+ two
+.
+<ul>
+<li><p>one</p>
+<p>two</p></li>
+</ul>
+.
+
+It is tempting to think of this in terms of columns: the continuation
+blocks must be indented at least to the column of the first nonspace
+character after the list marker. However, that is not quite right.
+The spaces after the list marker determine how much relative indentation
+is needed. Which column this indentation reaches will depend on
+how the list item is embedded in other constructions, as show by
+this example:
+
+.
+ > > 1. one
+>>
+>> two
+.
+<blockquote>
+<blockquote>
+<ol>
+<li><p>one</p>
+<p>two</p></li>
+</ol>
+</blockquote>
+</blockquote>
+.
+
+Here `two` occurs in the same column as the list marker `1.`,
+but is actually contained in the list item, because there is
+sufficent indentation after the last containing blockquote marker.
+
+The converse is also possible. In the following example, the word `two`
+occurs far to the right of the initial text of the list item, `one`, but
+it is not considered part of the list item, because it is not indented
+far enough past the blockquote marker:
+
+.
+>>- one
+>>
+ > > two
+.
+<blockquote>
+<blockquote>
+<ul>
+<li>one</li>
+</ul>
+<p>two</p>
+</blockquote>
+</blockquote>
+.
+
+A list item may not contain blocks that are separated by more than
+one blank line. Thus, two blank lines will end a list:
+
+.
+- foo
+
+ bar
+
+- foo
+
+
+ bar
+.
+<ul>
+<li><p>foo</p>
+<p>bar</p></li>
+<li><p>foo</p></li>
+</ul>
+<p>bar</p>
+.
+
+A list item may contain any kind of block:
+
+.
+1. foo
+
+ ```
+ bar
+ ```
+
+ baz
+
+ > bam
+.
+<ol>
+<li><p>foo</p>
+<pre><code>bar
+</code></pre>
+<p>baz</p>
+<blockquote>
+<p>bam</p>
+</blockquote></li>
+</ol>
+.
+
+2. **Item starting with indented code.** If a sequence of lines *Ls*
+ constitute a sequence of blocks *Bs* starting with an indented code
+ block and not separated from each other by more than one blank line,
+ and *M* is a list marker *M* of width *W* followed by
+ one space, then the result of prepending *M* and the following
+ space to the first line of *Ls*, and indenting subsequent lines of
+ *Ls* by *W + 1* spaces, is a list item with *Bs* as its contents.
+ If a line is empty, then it need not be indented. The type of the
+ list item (bullet or ordered) is determined by the type of its list
+ marker. If the list item is ordered, then it is also assigned a
+ start number, based on the ordered list marker.
+
+An indented code block will have to be indented four spaces beyond
+the edge of the region where text will be included in the list item.
+In the following case that is 6 spaces:
+
+.
+- foo
+
+ bar
+.
+<ul>
+<li><p>foo</p>
+<pre><code>bar
+</code></pre></li>
+</ul>
+.
+
+And in this case it is 11 spaces:
+
+.
+ 10. foo
+
+ bar
+.
+<ol start="10">
+<li><p>foo</p>
+<pre><code>bar
+</code></pre></li>
+</ol>
+.
+
+If the *first* block in the list item is an indented code block,
+then by rule #2, the contents must be indented *one* space after the
+list marker:
+
+.
+ indented code
+
+paragraph
+
+ more code
+.
+<pre><code>indented code
+</code></pre>
+<p>paragraph</p>
+<pre><code>more code
+</code></pre>
+.
+
+.
+1. indented code
+
+ paragraph
+
+ more code
+.
+<ol>
+<li><pre><code>indented code
+</code></pre>
+<p>paragraph</p>
+<pre><code>more code
+</code></pre></li>
+</ol>
+.
+
+Note that an additional space indent is interpreted as space
+inside the code block:
+
+.
+1. indented code
+
+ paragraph
+
+ more code
+.
+<ol>
+<li><pre><code> indented code
+</code></pre>
+<p>paragraph</p>
+<pre><code>more code
+</code></pre></li>
+</ol>
+.
+
+Note that rules #1 and #2 only apply to two cases: (a) cases
+in which the lines to be included in a list item begin with a nonspace
+character, and (b) cases in which they begin with an indented code
+block. In a case like the following, where the first block begins with
+a three-space indent, the rules do not allow us to form a list item by
+indenting the whole thing and prepending a list marker:
+
+.
+ foo
+
+bar
+.
+<p>foo</p>
+<p>bar</p>
+.
+
+.
+- foo
+
+ bar
+.
+<ul>
+<li>foo</li>
+</ul>
+<p>bar</p>
+.
+
+This is not a significant restriction, because when a block begins
+with 1-3 spaces indent, the indentation can always be removed without
+a change in interpretation, allowing rule #1 to be applied. So, in
+the above case:
+
+.
+- foo
+
+ bar
+.
+<ul>
+<li><p>foo</p>
+<p>bar</p></li>
+</ul>
+.
+
+
+3. **Indentation.** If a sequence of lines *Ls* constitutes a list item
+ according to rule #1 or #2, then the result of indenting each line
+ of *L* by 1-3 spaces (the same for each line) also constitutes a
+ list item with the same contents and attributes. If a line is
+ empty, then it need not be indented.
+
+Indented one space:
+
+.
+ 1. A paragraph
+ with two lines.
+
+ indented code
+
+ > A block quote.
+.
+<ol>
+<li><p>A paragraph
+with two lines.</p>
+<pre><code>indented code
+</code></pre>
+<blockquote>
+<p>A block quote.</p>
+</blockquote></li>
+</ol>
+.
+
+Indented two spaces:
+
+.
+ 1. A paragraph
+ with two lines.
+
+ indented code
+
+ > A block quote.
+.
+<ol>
+<li><p>A paragraph
+with two lines.</p>
+<pre><code>indented code
+</code></pre>
+<blockquote>
+<p>A block quote.</p>
+</blockquote></li>
+</ol>
+.
+
+Indented three spaces:
+
+.
+ 1. A paragraph
+ with two lines.
+
+ indented code
+
+ > A block quote.
+.
+<ol>
+<li><p>A paragraph
+with two lines.</p>
+<pre><code>indented code
+</code></pre>
+<blockquote>
+<p>A block quote.</p>
+</blockquote></li>
+</ol>
+.
+
+Four spaces indent gives a code block:
+
+.
+ 1. A paragraph
+ with two lines.
+
+ indented code
+
+ > A block quote.
+.
+<pre><code>1. A paragraph
+ with two lines.
+
+ indented code
+
+ &gt; A block quote.
+</code></pre>
+.
+
+
+4. **Laziness.** If a string of lines *Ls* constitute a [list
+ item](#list-item) with contents *Bs*, then the result of deleting
+ some or all of the indentation from one or more lines in which the
+ next non-space character after the [list marker](#list--marker) is
+ [paragraph continuation text](#paragraph-continuation-text) is a
+ list item with the same contents and attributes.
+
+Here is an example with lazy continuation lines:
+
+.
+ 1. A paragraph
+with two lines.
+
+ indented code
+
+ > A block quote.
+.
+<ol>
+<li><p>A paragraph
+with two lines.</p>
+<pre><code>indented code
+</code></pre>
+<blockquote>
+<p>A block quote.</p>
+</blockquote></li>
+</ol>
+.
+
+Indentation can be partially deleted:
+
+.
+ 1. A paragraph
+ with two lines.
+.
+<ol>
+<li>A paragraph
+with two lines.</li>
+</ol>
+.
+
+These examples show how laziness can work in nested structures:
+
+.
+> 1. > Blockquote
+continued here.
+.
+<blockquote>
+<ol>
+<li><blockquote>
+<p>Blockquote
+continued here.</p>
+</blockquote></li>
+</ol>
+</blockquote>
+.
+
+.
+> 1. > Blockquote
+> continued here.
+.
+<blockquote>
+<ol>
+<li><blockquote>
+<p>Blockquote
+continued here.</p>
+</blockquote></li>
+</ol>
+</blockquote>
+.
+
+
+5. **That's all.** Nothing that is not counted as a list item by rules
+ #1--4 counts as a [list item](#block-quote).
+
+The rules for sublists follow from the general rules above. A sublist
+must be indented the same number of spaces a paragraph would need to be
+in order to be included in the list item.
+
+So, in this case we need two spaces indent:
+
+.
+- foo
+ - bar
+ - baz
+.
+<ul>
+<li>foo
+<ul>
+<li>bar
+<ul>
+<li>baz</li>
+</ul></li>
+</ul></li>
+</ul>
+.
+
+One is not enough:
+
+.
+- foo
+ - bar
+ - baz
+.
+<ul>
+<li>foo</li>
+<li>bar</li>
+<li>baz</li>
+</ul>
+.
+
+Here we need four, because the list marker is wider:
+
+.
+10) foo
+ - bar
+.
+<ol start="10">
+<li>foo
+<ul>
+<li>bar</li>
+</ul></li>
+</ol>
+.
+
+Three is not enough:
+
+.
+10) foo
+ - bar
+.
+<ol start="10">
+<li>foo</li>
+</ol>
+<ul>
+<li>bar</li>
+</ul>
+.
+
+A list may be the first block in a list item:
+
+.
+- - foo
+.
+<ul>
+<li><ul>
+<li>foo</li>
+</ul></li>
+</ul>
+.
+
+.
+1. - 2. foo
+.
+<ol>
+<li><ul>
+<li><ol start="2">
+<li>foo</li>
+</ol></li>
+</ul></li>
+</ol>
+.
+
+A list item may be empty:
+
+.
+- foo
+-
+- bar
+.
+<ul>
+<li>foo</li>
+<li></li>
+<li>bar</li>
+</ul>
+.
+
+.
+-
+.
+<ul>
+<li></li>
+</ul>
+.
+
+### Motivation
+
+John Gruber's markdown spec says the following about list items:
+
+1. "List markers typically start at the left margin, but may be indented
+ by up to three spaces. List markers must be followed by one or more
+ spaces or a tab."
+
+2. "To make lists look nice, you can wrap items with hanging indents....
+ But if you don't want to, you don't have to."
+
+3. "List items may consist of multiple paragraphs. Each subsequent
+ paragraph in a list item must be indented by either 4 spaces or one
+ tab."
+
+4. "It looks nice if you indent every line of the subsequent paragraphs,
+ but here again, Markdown will allow you to be lazy."
+
+5. "To put a blockquote within a list item, the blockquote's `>`
+ delimiters need to be indented."
+
+6. "To put a code block within a list item, the code block needs to be
+ indented twice — 8 spaces or two tabs."
+
+These rules specify that a paragraph under a list item must be indented
+four spaces (presumably, from the left margin, rather than the start of
+the list marker, but this is not said), and that code under a list item
+must be indented eight spaces instead of the usual four. They also say
+that a block quote must be indented, but not by how much; however, the
+example given has four spaces indentation. Although nothing is said
+about other kinds of block-level content, it is certainly reasonable to
+infer that *all* block elements under a list item, including other
+lists, must be indented four spaces. This principle has been called the
+*four-space rule*.
+
+The four-space rule is clear and principled, and if the reference
+implementation `Markdown.pl` had followed it, it probably would have
+become the standard. However, `Markdown.pl` allowed paragraphs and
+sublists to start with only two spaces indentation, at least on the
+outer level. Worse, its behavior was inconsistent: a sublist of an
+outer-level list needed two spaces indentation, but a sublist of this
+sublist needed three spaces. It is not surprising, then, that different
+implementations of markdown have developed very different rules for
+determining what comes under a list item. (Pandoc and python-markdown,
+for example, stuck with Gruber's syntax description and the four-space
+rule, while discount, redcarpet, marked, PHP markdown, and others
+followed `Markdown.pl`'s behavior more closely.)
+
+Unfortunately, given the divergences between implementations, there
+is no way to give a spec for list items that will be guaranteed not
+to break any existing documents. However, the spec given here should
+correctly handle lists formatted with either the four-space rule or
+the more forgiving `Markdown.pl` behavior, provided they are laid out
+in a way that is natural for a human to read.
+
+The strategy here is to let the width and indentation of the list marker
+determine the indentation necessary for blocks to fall under the list
+item, rather than having a fixed and arbitrary number. The writer can
+think of the body of the list item as a unit which gets indented to the
+right enough to fit the list marker (and any indentation on the list
+marker). (The laziness rule, #4, then allows continuation lines to be
+unindented if needed.)
+
+This rule is superior, we claim, to any rule requiring a fixed level of
+indentation from the margin. The four-space rule is clear but
+unnatural. It is quite unintuitive that
+
+``` markdown
+- foo
+
+ bar
+
+ - baz
+```
+
+should be parsed as two lists with an intervening paragraph,
+
+``` html
+<ul>
+<li>foo</li>
+</ul>
+<p>bar</p>
+<ul>
+<li>baz</li>
+</ul>
+```
+
+as the four-space rule demands, rather than a single list,
+
+``` html
+<ul>
+<li><p>foo<p>
+<p>bar></p></li>
+<li><p>baz</p><li>
+</ul>
+```
+
+The choice of four spaces is arbitrary. It can be learned, but it is
+not likely to be guessed, and it trips up beginners regularly.
+
+Would it help to adopt a two-space rule? The problem is that such
+a rule, together with the rule allowing 1--3 spaces indentation of the
+initial list marker, allows text that is indented *less than* the
+original list marker to be included in the list item. For example,
+`Markdown.pl` parses
+
+``` markdown
+ - one
+
+ two
+```
+
+as a single list item, with `two` a continuation paragraph:
+
+``` html
+<ul>
+<li><p>one</p>
+<p>two</p></li>
+</ul>
+```
+
+and similarly
+
+``` markdown
+> - one
+>
+> two
+```
+
+as
+
+``` html
+<blockquote>
+<ul>
+<li><p>one</p>
+<p>two</p></li>
+</ul>
+</blockquote>
+```
+
+This is extremely unintuitive.
+
+Rather than requiring a fixed indent from the margin, we could require
+a fixed indent (say, two spaces, or even one space) from the list marker (which
+may itself be indented). This proposal would remove the last anomaly
+discussed. Unlike the spec presented above, it would count the following
+as a list item with a subparagraph, even though the paragraph `bar`
+is not indented as far as the first paragraph `foo`:
+
+``` markdown
+ 10. foo
+
+ bar
+```
+
+Arguably this text does read like a list item with `bar` as a subparagraph,
+which may count in favor of the proposal. However, on this proposal indented
+code would have to be indented six spaces after the list marker. And this
+would break a lot of existing markdown, which has the pattern:
+
+``` markdown
+1. foo
+
+ indented code
+```
+
+where the code is indented eight spaces. The spec above, by contrast, will
+parse this text as expected, since the code block's indentation is measured
+from the beginning of `foo`.
+
+The one case that needs special treatment is a list item that *starts*
+with indented code. How much indentation is required in that case, since
+we don't have a "first paragraph" to measure from? Rule #2 simply stipulates
+that in such cases, we require one space indentation from the list marker
+(and then the normal four spaces for the indented code). This will match the
+four-space rule in cases where the list marker plus its initial indentation
+takes four spaces (a common case), but diverge in other cases.
+
+## Lists
+
+A [list](#list) <a id="list"/> is a sequence of one or more
+list items [of the same type](#of-the-same-type). The list items
+may be separated by single [blank lines](#blank-line), but two
+blank lines end all containing lists.
+
+Two list items are [of the same type](#of-the-same-type)
+<a id="of-the-same-type"/> if they begin with a [list
+marker](#list-marker) of the same type. Two list markers are of the
+same type if (a) they are bullet list markers using the same character
+(`-`, `+`, or `*`) or (b) they are ordered list numbers with the same
+delimiter (either `.` or `)`).
+
+A list is an [ordered list](#ordered-list) <a id="ordered-list"/>
+if its constituent list items begin with
+[ordered list markers](#ordered-list-marker), and a [bullet
+list](#bullet-list) <a id="bullet-list"/> if its constituent list
+items begin with [bullet list markers](#bullet-list-marker).
+
+The [start number](#start-number) <a id="start-number"/>
+of an [ordered list](#ordered-list) is determined by the list number of
+its initial list item. The numbers of subsequent list items are
+disregarded.
+
+A list is [loose](#loose) if it any of its constituent list items are
+separated by blank lines, or if any of its constituent list items
+directly contain two block-level elements with a blank line between
+them. Otherwise a list is [tight](#tight). (The difference in HTML output
+is that paragraphs in a loose with are wrapped in `<p>` tags, while
+paragraphs in a tight list are not.)
+
+Changing the bullet or ordered list delimiter starts a new list:
+
+.
+- foo
+- bar
++ baz
+.
+<ul>
+<li>foo</li>
+<li>bar</li>
+</ul>
+<ul>
+<li>baz</li>
+</ul>
+.
+
+.
+1. foo
+2. bar
+3) baz
+.
+<ol>
+<li>foo</li>
+<li>bar</li>
+</ol>
+<ol start="3">
+<li>baz</li>
+</ol>
+.
+
+There can be blank lines between items, but two blank lines end
+a list:
+
+.
+- foo
+
+- bar
+
+
+- baz
+.
+<ul>
+<li><p>foo</p></li>
+<li><p>bar</p></li>
+</ul>
+<ul>
+<li>baz</li>
+</ul>
+.
+
+As illustrated above in the section on [list items](#list-item),
+two blank lines between blocks *within* a list item will also end a
+list:
+
+.
+- foo
+
+
+ bar
+- baz
+.
+<ul>
+<li>foo</li>
+</ul>
+<p>bar</p>
+<ul>
+<li>baz</li>
+</ul>
+.
+
+Indeed, two blank lines will end *all* containing lists:
+
+.
+- foo
+ - bar
+ - baz
+
+
+ bim
+.
+<ul>
+<li>foo
+<ul>
+<li>bar
+<ul>
+<li>baz</li>
+</ul></li>
+</ul></li>
+</ul>
+<pre><code> bim
+</code></pre>
+.
+
+Thus, two blank lines can be used to separate consecutive lists of
+the same type, or to separate a list from an indented code block
+that would otherwise be parsed as a subparagraph of the final list
+item:
+
+.
+- foo
+- bar
+
+
+- baz
+- bim
+.
+<ul>
+<li>foo</li>
+<li>bar</li>
+</ul>
+<ul>
+<li>baz</li>
+<li>bim</li>
+</ul>
+.
+
+.
+- foo
+
+ notcode
+
+- foo
+
+
+ code
+.
+<ul>
+<li><p>foo</p>
+<p>notcode</p></li>
+<li><p>foo</p></li>
+</ul>
+<pre><code>code
+</code></pre>
+.
+
+List items need not be indented to the same level. The following
+list items will be treated as items at the same list level,
+since none is indented enough to belong to the previous list
+item:
+
+.
+- a
+ - b
+ - c
+ - d
+ - e
+ - f
+- g
+.
+<ul>
+<li>a</li>
+<li>b</li>
+<li>c</li>
+<li>d</li>
+<li>e</li>
+<li>f</li>
+<li>g</li>
+</ul>
+.
+
+This is a loose list, because there is a blank line between
+two of the list items:
+
+.
+- a
+- b
+
+- c
+.
+<ul>
+<li><p>a</p></li>
+<li><p>b</p></li>
+<li><p>c</p></li>
+</ul>
+.
+
+So is this, with a empty second item:
+
+.
+* a
+*
+
+* c
+.
+<ul>
+<li><p>a</p></li>
+<li></li>
+<li><p>c</p></li>
+</ul>
+.
+
+These are loose lists, even though there is no space between the items,
+because one of the items directly contains two block-level elements
+with a blank line between them:
+
+.
+- a
+- b
+
+ c
+- d
+.
+<ul>
+<li><p>a</p></li>
+<li><p>b</p>
+<p>c</p></li>
+<li><p>d</p></li>
+</ul>
+.
+
+.
+- a
+- b
+
+ [ref]: /url
+- d
+.
+<ul>
+<li><p>a</p></li>
+<li><p>b</p></li>
+<li><p>d</p></li>
+</ul>
+.
+
+This is a tight list, because the blank lines are in a code block:
+
+.
+- a
+- ```
+ b
+
+
+ ```
+- c
+.
+<ul>
+<li>a</li>
+<li><pre><code>b
+
+
+</code></pre></li>
+<li>c</li>
+</ul>
+.
+
+This is a tight list, because the blank line is between two
+paragraphs of a sublist. So the inner list is loose while
+the other list is tight:
+
+.
+- a
+ - b
+
+ c
+- d
+.
+<ul>
+<li>a
+<ul>
+<li><p>b</p>
+<p>c</p></li>
+</ul></li>
+<li>d</li>
+</ul>
+.
+
+This is a tight list, because the blank line is inside the
+block quote:
+
+.
+* a
+ > b
+ >
+* c
+.
+<ul>
+<li>a
+<blockquote>
+<p>b</p>
+</blockquote></li>
+<li>c</li>
+</ul>
+.
+
+This list is tight, because the consecutive block elements
+are not separated by blank lines:
+
+.
+- a
+ > b
+ ```
+ c
+ ```
+- d
+.
+<ul>
+<li>a
+<blockquote>
+<p>b</p>
+</blockquote>
+<pre><code>c
+</code></pre></li>
+<li>d</li>
+</ul>
+.
+
+A single-paragraph list is tight:
+
+.
+- a
+.
+<ul>
+<li>a</li>
+</ul>
+.
+
+.
+- a
+ - b
+.
+<ul>
+<li>a
+<ul>
+<li>b</li>
+</ul></li>
+</ul>
+.
+
+Here the outer list is loose, the inner list tight:
+
+.
+* foo
+ * bar
+
+ baz
+.
+<ul>
+<li><p>foo</p>
+<ul>
+<li>bar</li>
+</ul>
+<p>baz</p></li>
+</ul>
+.
+
+.
+- a
+ - b
+ - c
+
+- d
+ - e
+ - f
+.
+<ul>
+<li><p>a</p>
+<ul>
+<li>b</li>
+<li>c</li>
+</ul></li>
+<li><p>d</p>
+<ul>
+<li>e</li>
+<li>f</li>
+</ul></li>
+</ul>
+.
+
+# Inlines
+
+Inlines are parsed sequentially from the beginning of the character
+stream to the end (left to right, in left-to-right languages).
+Thus, for example, in
+
+.
+`hi`lo`
+.
+<p><code>hi</code>lo`</p>
+.
+
+`hi` is parsed as code, leaving the backtick at the end as a literal
+backtick.
+
+## Backslash escapes
+
+Any ASCII punctuation character may be backslash-escaped:
+
+.
+\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~
+.
+<p>!&quot;#$%&amp;'()*+,-./:;&lt;=&gt;?@[\]^_`{|}~</p>
+.
+
+Backslashes before other characters are treated as literal
+backslashes:
+
+.
+\→\A\a\ \3\φ\«
+.
+<p>\ \A\a\ \3\φ\«</p>
+.
+
+Escaped characters are treated as regular characters and do
+not have their usual markdown meanings:
+
+.
+\*not emphasized*
+\<br/> not a tag
+\[not a link](/foo)
+\`not code`
+1\. not a list
+\* not a list
+\# not a header
+\[foo]: /url "not a reference"
+.
+<p>*not emphasized*
+&lt;br/&gt; not a tag
+[not a link](/foo)
+`not code`
+1. not a list
+* not a list
+# not a header
+[foo]: /url &quot;not a reference&quot;</p>
+.
+
+If a backslash is itself escaped, the following character is not:
+
+.
+\\*emphasis*
+.
+<p>\<em>emphasis</em></p>
+.
+
+A backslash at the end of the line is a hard line break:
+
+.
+foo\
+bar
+.
+<p>foo<br />
+bar</p>
+.
+
+Backslash escapes do not work in code blocks, code spans, autolinks, or
+raw HTML:
+
+.
+`` \[\` ``
+.
+<p><code>\[\`</code></p>
+.
+
+.
+ \[\]
+.
+<pre><code>\[\]
+</code></pre>
+.
+
+.
+~~~
+\[\]
+~~~
+.
+<pre><code>\[\]
+</code></pre>
+.
+
+.
+<http://google.com?find=\*>
+.
+<p><a href="http://google.com?find=\*">http://google.com?find=\*</a></p>
+.
+
+.
+<a href="/bar\/)">
+.
+<p><a href="/bar\/)"></p>
+.
+
+But they work in all other contexts, including URLs and link titles,
+link references, and info strings in [fenced code
+blocks](#fenced-code-block):
+
+.
+[foo](/bar\* "ti\*tle")
+.
+<p><a href="/bar*" title="ti*tle">foo</a></p>
+.
+
+.
+[foo]
+
+[foo]: /bar\* "ti\*tle"
+.
+<p><a href="/bar*" title="ti*tle">foo</a></p>
+.
+
+.
+``` foo\+bar
+foo
+```
+.
+<pre class="foo+bar"><code>foo
+</code></pre>
+.
+
+
+## Entities
+
+Entities are parsed as entities, not as literal text, in all contexts
+except code spans and code blocks. Three kinds of entities are recognized.
+
+[Named entities](#name-entities) <a id="named-entities"/> consist of `&`
++ a string of 2-32 alphanumerics beginning with a letter + `;`.
+
+.
+&nbsp; &amp; &copy; &AElig; &Dcaron; &frac34; &HilbertSpace; &DifferentialD; &ClockwiseContourIntegral;
+.
+<p>&nbsp; &amp; &copy; &AElig; &Dcaron; &frac34; &HilbertSpace; &DifferentialD; &ClockwiseContourIntegral;</p>
+.
+
+[Decimal entities](#decimal-entities) <a id="decimal-entities"/>
+consist of `&` + a string of 1--8 arabic digits + `;`.
+
+.
+&#1; &#35; &#1234; &#992; &#98765432;
+.
+<p>&#1; &#35; &#1234; &#992; &#98765432;</p>
+.
+
+[Hexadecimal entities](#hexadecimal-entities) <a id="hexadecimal-entities"/>
+consist of `&` + either `X` or `x` + a string of 1-8 hexadecimal digits
++ `;`.
+
+.
+&#x1; &#X22; &#XD06; &#xcab;
+.
+<p>&#x1; &#X22; &#XD06; &#xcab;</p>
+.
+
+Here are some nonentities:
+
+.
+&nbsp &x; &#; &#x; &#123456789; &ThisIsWayTooLongToBeAnEntityIsntIt; &hi?;
+.
+<p>&amp;nbsp &amp;x; &amp;#; &amp;#x; &amp;#123456789; &amp;ThisIsWayTooLongToBeAnEntityIsntIt; &amp;hi?;</p>
+.
+
+Although HTML5 does accept some entities without a trailing semicolon
+(such as `&copy`), these are not recognized as entities here:
+
+.
+&copy
+.
+<p>&amp;copy</p>
+.
+
+On the other hand, many strings that are not on the list of HTML5
+named entities are recognized as entities here:
+
+.
+&MadeUpEntity;
+.
+<p>&MadeUpEntity;</p>
+.
+
+Entities are recognized in any any context besides code spans or
+code blocks, including raw HTML, URLs, [link titles](#link-title), and
+[fenced code block](#fenced-code-block) info strings:
+
+.
+<a href="&ouml;&ouml;.html">
+.
+<p><a href="&ouml;&ouml;.html"></p>
+.
+
+.
+[foo](/f&ouml;&ouml; "f&ouml;&ouml;")
+.
+<p><a href="/f&ouml;&ouml;" title="f&ouml;&ouml;">foo</a></p>
+.
+
+.
+[foo]
+
+[foo]: /f&ouml;&ouml; "f&ouml;&ouml;"
+.
+<p><a href="/f&ouml;&ouml;" title="f&ouml;&ouml;">foo</a></p>
+.
+
+.
+``` f&ouml;&ouml;
+foo
+```
+.
+<pre class="f&ouml;&ouml;"><code>foo
+</code></pre>
+.
+
+Entities are treated as literal text in code spans and code blocks:
+
+.
+`f&ouml;&ouml;`
+.
+<p><code>f&amp;ouml;&amp;ouml;</code></p>
+.
+
+.
+ f&ouml;f&ouml;
+.
+<pre><code>f&amp;ouml;f&amp;ouml;
+</code></pre>
+.
+
+## Code span
+
+A [backtick string](#backtick-string) <a id="backtick-string"/>
+is a string of one or more backtick characters (`` ` ``) that is neither
+preceded nor followed by a backtick.
+
+A code span begins with a backtick string and ends with a backtick
+string of equal length. The contents of the code span are the
+characters between the two backtick strings, with leading and trailing
+spaces and newlines removed, and consecutive spaces and newlines
+collapsed to single spaces.
+
+This is a simple code span:
+
+.
+`foo`
+.
+<p><code>foo</code></p>
+.
+
+Here two backticks are used, because the code contains a backtick.
+This example also illustrates stripping of leading and trailing spaces:
+
+.
+`` foo ` bar ``
+.
+<p><code>foo ` bar</code></p>
+.
+
+This example shows the motivation for stripping leading and trailing
+spaces:
+
+.
+` `` `
+.
+<p><code>``</code></p>
+.
+
+Newlines are treated like spaces:
+
+.
+``
+foo
+``
+.
+<p><code>foo</code></p>
+.
+
+Interior spaces and newlines are collapsed into single spaces, just
+as they would be by a browser:
+
+.
+`foo bar
+ baz`
+.
+<p><code>foo bar baz</code></p>
+.
+
+Q: Why not just leave the spaces, since browsers will collapse them
+anyway? A: Because we might be targeting a non-HTML format, and we
+shouldn't rely on HTML-specific rendering assumptions.
+
+(Existing implementations differ in their treatment of internal
+spaces and newlines. Some, including `Markdown.pl` and
+`showdown`, convert an internal newline into a `<br />` tag.
+But this makes things difficult for those who like to hard-wrap
+their paragraphs, since a line break in the midst of a code
+span will cause an unintended line break in the output. Others
+just leave internal spaces as they are, which is fine if only
+HTML is being targeted.)
+
+.
+`foo `` bar`
+.
+<p><code>foo `` bar</code></p>
+.
+
+Note that backslash escapes do not work in code spans. All backslashes
+are treated literally:
+
+.
+`foo\`bar`
+.
+<p><code>foo\</code>bar`</p>
+.
+
+Backslash escapes are never needed, because one can always choose a
+string of *n* backtick characters as delimiters, where the code does
+not contain any strings of exactly *n* backtick characters.
+
+Code span backticks have higher precedence than any other inline
+constructs except HTML tags and autolinks. Thus, for example, this is
+not parsed as emphasized text, since the second `*` is part of a code
+span:
+
+.
+*foo`*`
+.
+<p>*foo<code>*</code></p>
+.
+
+And this is not parsed as a link:
+
+.
+[not a `link](/foo`)
+.
+<p>[not a <code>link](/foo</code>)</p>
+.
+
+But this is a link:
+
+.
+<http://foo.bar.`baz>`
+.
+<p><a href="http://foo.bar.`baz">http://foo.bar.`baz</a>`</p>
+.
+
+And this is an HTML tag:
+
+.
+<a href="`">`
+.
+<p><a href="`">`</p>
+.
+
+When a backtick string is not closed by a matching backtick string,
+we just have literal backticks:
+
+.
+```foo``
+.
+<p>```foo``</p>
+.
+
+.
+`foo
+.
+<p>`foo</p>
+.
+
+## Emphasis and strong emphasis
+
+John Gruber's original [markdown syntax
+description](http://daringfireball.net/projects/markdown/syntax#em) says:
+
+> Markdown treats asterisks (`*`) and underscores (`_`) as indicators of
+> emphasis. Text wrapped with one `*` or `_` will be wrapped with an HTML
+> `<em>` tag; double `*`'s or `_`'s will be wrapped with an HTML `<strong>`
+> tag.
+
+This is enough for most users, but these rules leave much undecided,
+especially when it comes to nested emphasis. The original
+`Markdown.pl` test suite makes it clear that triple `***` and
+`___` delimiters can be used for strong emphasis, and most
+implementations have also allowed the following patterns:
+
+``` markdown
+***strong emph***
+***strong** in emph*
+***emph* in strong**
+**in strong *emph***
+*in emph **strong***
+```
+
+The following patterns are less widely supported, but the intent
+is clear and they are useful (especially in contexts like bibliography
+entries):
+
+``` markdown
+*emph *with emph* in it*
+**strong **with strong** in it**
+```
+
+Many implementations have also restricted intraword emphasis to
+the `*` forms, to avoid unwanted emphasis in words containing
+internal underscores. (It is best practice to put these in code
+spans, but users often do not.)
+
+``` markdown
+internal emphasis: foo*bar*baz
+no emphasis: foo_bar_baz
+```
+
+The following rules capture all of these patterns, while allowing
+for efficient parsing strategies that do not backtrack:
+
+1. A single `*` character [can open emphasis](#can-open-emphasis)
+ <a id="can-open-emphasis"/> iff
+
+ (a) it is not part of a sequence of four or more unescaped `*`s,
+ (b) it is not followed by whitespace, and
+ (c) either it is not followed by a `*` character or it is
+ followed immediately by strong emphasis.
+
+2. A single `_` character [can open emphasis](#can-open-emphasis) iff
+
+ (a) it is not part of a sequence of four or more unescaped `_`s,
+ (b) it is not followed by whitespace,
+ (c) is is not preceded by an ASCII alphanumeric character, and
+ (d) either it is not followed by a `_` character or it is
+ followed immediately by strong emphasis.
+
+3. A single `*` character [can close emphasis](#can-close-emphasis)
+ <a id="can-close-emphasis"/> iff
+
+ (a) it is not part of a sequence of four or more unescaped `*`s, and
+ (b) it is not preceded by whitespace.
+
+4. A single `_` character [can close emphasis](#can-close-emphasis) iff
+
+ (a) it is not part of a sequence of four or more unescaped `_`s,
+ (b) it is not preceded by whitespace, and
+ (c) it is not followed by an ASCII alphanumeric character.
+
+5. A double `**` [can open strong emphasis](#can-open-strong-emphasis)
+ <a id="can-open-strong-emphasis" /> iff
+
+ (a) it is not part of a sequence of four or more unescaped `*`s,
+ (b) it is not followed by whitespace, and
+ (c) either it is not followed by a `*` character or it is
+ followed immediately by emphasis.
+
+6. A double `__` [can open strong emphasis](#can-open-strong-emphasis)
+ iff
+
+ (a) it is not part of a sequence of four or more unescaped `_`s,
+ (b) it is not followed by whitespace, and
+ (c) it is not preceded by an ASCII alphanumeric character, and
+ (d) either it is not followed by a `_` character or it is
+ followed immediately by emphasis.
+
+7. A double `**` [can close strong emphasis](#can-close-strong-emphasis)
+ <a id="can-close-strong-emphasis" /> iff
+
+ (a) it is not part of a sequence of four or more unescaped `*`s, and
+ (b) it is not preceded by whitespace.
+
+8. A double `__` [can close strong emphasis](#can-close-strong-emphasis)
+ iff
+
+ (a) it is not part of a sequence of four or more unescaped `_`s,
+ (b) it is not preceded by whitespace, and
+ (c) it is not followed by an ASCII alphanumeric character.
+
+9. Emphasis begins with a delimiter that [can open
+ emphasis](#can-open-emphasis) and includes inlines parsed
+ sequentially until a delimiter that [can close
+ emphasis](#can-close-emphasis), and that uses the same
+ character (`_` or `*`) as the opening delimiter, is reached.
+
+10. Strong emphasis begins with a delimiter that [can open strong
+ emphasis](#can-open-strong-emphasis) and includes inlines parsed
+ sequentially until a delimiter that [can close strong
+ emphasis](#can-close-strong-emphasis), and that uses the
+ same character (`_` or `*`) as the opening delimiter, is reached.
+
+These rules can be illustrated through a series of examples.
+
+Simple emphasis:
+
+.
+*foo bar*
+.
+<p><em>foo bar</em></p>
+.
+
+.
+_foo bar_
+.
+<p><em>foo bar</em></p>
+.
+
+Simple strong emphasis:
+
+.
+**foo bar**
+.
+<p><strong>foo bar</strong></p>
+.
+
+.
+__foo bar__
+.
+<p><strong>foo bar</strong></p>
+.
+
+Emphasis can continue over line breaks:
+
+.
+*foo
+bar*
+.
+<p><em>foo
+bar</em></p>
+.
+
+.
+_foo
+bar_
+.
+<p><em>foo
+bar</em></p>
+.
+
+.
+**foo
+bar**
+.
+<p><strong>foo
+bar</strong></p>
+.
+
+.
+__foo
+bar__
+.
+<p><strong>foo
+bar</strong></p>
+.
+
+Emphasis can contain other inline constructs:
+
+.
+*foo [bar](/url)*
+.
+<p><em>foo <a href="/url">bar</a></em></p>
+.
+
+.
+_foo [bar](/url)_
+.
+<p><em>foo <a href="/url">bar</a></em></p>
+.
+
+.
+**foo [bar](/url)**
+.
+<p><strong>foo <a href="/url">bar</a></strong></p>
+.
+
+.
+__foo [bar](/url)__
+.
+<p><strong>foo <a href="/url">bar</a></strong></p>
+.
+
+Symbols contained in other inline constructs will not
+close emphasis:
+
+.
+*foo [bar*](/url)
+.
+<p>*foo <a href="/url">bar*</a></p>
+.
+
+.
+_foo [bar_](/url)
+.
+<p>_foo <a href="/url">bar_</a></p>
+.
+
+.
+**<a href="**">
+.
+<p>**<a href="**"></p>
+.
+
+.
+__<a href="__">
+.
+<p>__<a href="__"></p>
+.
+
+.
+*a `*`*
+.
+<p><em>a <code>*</code></em></p>
+.
+
+.
+_a `_`_
+.
+<p><em>a <code>_</code></em></p>
+.
+
+.
+**a<http://foo.bar?q=**>
+.
+<p>**a<a href="http://foo.bar?q=**">http://foo.bar?q=**</a></p>
+.
+
+.
+__a<http://foo.bar?q=__>
+.
+<p>__a<a href="http://foo.bar?q=__">http://foo.bar?q=__</a></p>
+.
+
+This is not emphasis, because the opening delimiter is
+followed by white space:
+
+.
+and * foo bar*
+.
+<p>and * foo bar*</p>
+.
+
+.
+_ foo bar_
+.
+<p>_ foo bar_</p>
+.
+
+.
+and ** foo bar**
+.
+<p>and ** foo bar**</p>
+.
+
+.
+__ foo bar__
+.
+<p>__ foo bar__</p>
+.
+
+This is not emphasis, because the closing delimiter is
+preceded by white space:
+
+.
+and *foo bar *
+.
+<p>and *foo bar *</p>
+.
+
+.
+and _foo bar _
+.
+<p>and _foo bar _</p>
+.
+
+.
+and **foo bar **
+.
+<p>and **foo bar **</p>
+.
+
+.
+and __foo bar __
+.
+<p>and __foo bar __</p>
+.
+
+The rules imply that a sequence of four or more unescaped `*` or
+`_` characters will always be parsed as a literal string:
+
+.
+****hi****
+.
+<p>****hi****</p>
+.
+
+.
+_____hi_____
+.
+<p>_____hi_____</p>
+.
+
+.
+Sign here: _________
+.
+<p>Sign here: _________</p>
+.
+
+The rules also imply that there can be no empty emphasis or strong
+emphasis:
+
+.
+** is not an empty emphasis
+.
+<p>** is not an empty emphasis</p>
+.
+
+.
+**** is not an empty strong emphasis
+.
+<p>**** is not an empty strong emphasis</p>
+.
+
+To include `*` or `_` in emphasized sections, use backslash escapes
+or code spans:
+
+.
+*here is a \**
+.
+<p><em>here is a *</em></p>
+.
+
+.
+__this is a double underscore (`__`)__
+.
+<p><strong>this is a double underscore (<code>__</code>)</strong></p>
+.
+
+`*` delimiters allow intra-word emphasis; `_` delimiters do not:
+
+.
+foo*bar*baz
+.
+<p>foo<em>bar</em>baz</p>
+.
+
+.
+foo_bar_baz
+.
+<p>foo_bar_baz</p>
+.
+
+.
+foo__bar__baz
+.
+<p>foo__bar__baz</p>
+.
+
+.
+_foo_bar_baz_
+.
+<p><em>foo_bar_baz</em></p>
+.
+
+.
+11*15*32
+.
+<p>11<em>15</em>32</p>
+.
+
+.
+11_15_32
+.
+<p>11_15_32</p>
+.
+
+Internal underscores will be ignored in underscore-delimited
+emphasis:
+
+.
+_foo_bar_baz_
+.
+<p><em>foo_bar_baz</em></p>
+.
+
+.
+__foo__bar__baz__
+.
+<p><strong>foo__bar__baz</strong></p>
+.
+
+The rules are sufficient for the following nesting patterns:
+
+.
+***foo bar***
+.
+<p><strong><em>foo bar</em></strong></p>
+.
+
+.
+___foo bar___
+.
+<p><strong><em>foo bar</em></strong></p>
+.
+
+.
+***foo** bar*
+.
+<p><em><strong>foo</strong> bar</em></p>
+.
+
+.
+___foo__ bar_
+.
+<p><em><strong>foo</strong> bar</em></p>
+.
+
+.
+***foo* bar**
+.
+<p><strong><em>foo</em> bar</strong></p>
+.
+
+.
+___foo_ bar__
+.
+<p><strong><em>foo</em> bar</strong></p>
+.
+
+.
+*foo **bar***
+.
+<p><em>foo <strong>bar</strong></em></p>
+.
+
+.
+_foo __bar___
+.
+<p><em>foo <strong>bar</strong></em></p>
+.
+
+.
+**foo *bar***
+.
+<p><strong>foo <em>bar</em></strong></p>
+.
+
+.
+__foo _bar___
+.
+<p><strong>foo <em>bar</em></strong></p>
+.
+
+.
+*foo **bar***
+.
+<p><em>foo <strong>bar</strong></em></p>
+.
+
+.
+_foo __bar___
+.
+<p><em>foo <strong>bar</strong></em></p>
+.
+
+.
+*foo *bar* baz*
+.
+<p><em>foo <em>bar</em> baz</em></p>
+.
+
+.
+_foo _bar_ baz_
+.
+<p><em>foo <em>bar</em> baz</em></p>
+.
+
+.
+**foo **bar** baz**
+.
+<p><strong>foo <strong>bar</strong> baz</strong></p>
+.
+
+.
+__foo __bar__ baz__
+.
+<p><strong>foo <strong>bar</strong> baz</strong></p>
+.
+
+.
+*foo **bar** baz*
+.
+<p><em>foo <strong>bar</strong> baz</em></p>
+.
+
+.
+_foo __bar__ baz_
+.
+<p><em>foo <strong>bar</strong> baz</em></p>
+.
+
+.
+**foo *bar* baz**
+.
+<p><strong>foo <em>bar</em> baz</strong></p>
+.
+
+.
+__foo _bar_ baz__
+.
+<p><strong>foo <em>bar</em> baz</strong></p>
+.
+
+Note that you cannot nest emphasis directly inside emphasis
+using the same delimeter, or strong emphasis directly inside
+strong emphasis:
+
+.
+**foo**
+.
+<p><strong>foo</strong></p>
+.
+
+.
+****foo****
+.
+<p>****foo****</p>
+.
+
+For these nestings, you need to switch delimiters:
+
+.
+*_foo_*
+.
+<p><em><em>foo</em></em></p>
+.
+
+.
+**__foo__**
+.
+<p><strong><strong>foo</strong></strong></p>
+.
+
+Note that a `*` followed by a `*` can close emphasis, and
+a `**` followed by a `*` can close strong emphasis (and
+similarly for `_` and `__`):
+
+.
+*foo**
+.
+<p><em>foo</em>*</p>
+.
+
+.
+*foo *bar**
+.
+<p><em>foo <em>bar</em></em></p>
+.
+
+.
+**foo***
+.
+<p><strong>foo</strong>*</p>
+.
+
+The following contains no strong emphasis, because the opening
+delimiter is closed by the first `*` before `bar`:
+
+.
+*foo**bar***
+.
+<p><em>foo</em><em>bar</em>**</p>
+.
+
+However, a string of four or more `****` can never close emphasis:
+
+.
+*foo****
+.
+<p>*foo****</p>
+.
+
+Note that there are some asymmetries here:
+
+.
+*foo**
+
+**foo*
+.
+<p><em>foo</em>*</p>
+<p>**foo*</p>
+.
+
+.
+*foo *bar**
+
+**foo* bar*
+.
+<p><em>foo <em>bar</em></em></p>
+<p>**foo* bar*</p>
+.
+
+More cases with mismatched delimiters:
+
+.
+**foo* bar*
+.
+<p>**foo* bar*</p>
+.
+
+.
+*bar***
+.
+<p><em>bar</em>**</p>
+.
+
+.
+***foo*
+.
+<p>***foo*</p>
+.
+
+.
+**bar***
+.
+<p><strong>bar</strong>*</p>
+.
+
+.
+***foo**
+.
+<p>***foo**</p>
+.
+
+.
+***foo *bar*
+.
+<p>***foo <em>bar</em></p>
+.
+
+## Links
+
+A link contains a [link label](#link-label) (the visible text),
+a [destination](#destination) (the URI that is the link destination),
+and optionally a [link title](#link-title). There are two basic kinds
+of links in markdown. In [inline links](#inline-links) the destination
+and title are given immediately after the lable. In [reference
+links](#reference-links) the destination and title are defined elsewhere
+in the document.
+
+A [link label](#link-label) <a id="link-label"/> consists of
+
+- an opening `[`, followed by
+- zero or more backtick code spans, autolinks, HTML tags, link labels,
+ backslash-escaped ASCII punctuation characters, or non-`]` characters,
+ followed by
+- a closing `]`.
+
+These rules are motivated by the following intuitive ideas:
+
+- A link label is a container for inline elements.
+- The square brackets bind more tightly than emphasis markers,
+ but less tightly than `<>` or `` ` ``.
+- Link labels may contain material in matching square brackets.
+
+A [link destination](#link-destination) <a id="link-destination"/>
+consists of either
+
+- a sequence of zero or more characters between an opening `<` and a
+ closing `>` that contains no line breaks or unescaped `<` or `>`
+ characters, or
+
+- a nonempty sequence of characters that does not include
+ ASCII space or control characters, and includes parentheses
+ only if (a) they are backslash-escaped or (b) they are part of
+ a balanced pair of unescaped parentheses that is not itself
+ inside a balanced pair of unescaped paretheses.
+
+A [link title](#link-title) <a id="link-title"/> consists of either
+
+- a sequence of zero or more characters between straight double-quote
+ characters (`"`), including a `"` character only if it is
+ backslash-escaped, or
+
+- a sequence of zero or more characters between straight single-quote
+ characters (`'`), including a `'` character only if it is
+ backslash-escaped, or
+
+- a sequence of zero or more characters between matching parentheses
+ (`(...)`), including a `)` character only if it is backslash-escaped.
+
+An [inline link](#inline-link) <a id="inline-link"/>
+consists of a [link label](#link-label) followed immediately
+by a left parenthesis `(`, optional whitespace,
+an optional [link destination](#link-destination),
+an optional [link title](#link-title) separated from the link
+destination by whitespace, optional whitespace, and a right
+parenthesis `)`. The link's text consists of the label (excluding
+the enclosing square brackets) parsed as inlines. The link's
+URI consists of the link destination, excluding enclosing `<...>` if
+present, with backslash-escapes in effect as described above. The
+link's title consists of the link title, excluding its enclosing
+delimiters, with backslash-escapes in effect as described above.
+
+Here is a simple inline link:
+
+.
+[link](/uri "title")
+.
+<p><a href="/uri" title="title">link</a></p>
+.
+
+The title may be omitted:
+
+.
+[link](/uri)
+.
+<p><a href="/uri">link</a></p>
+.
+
+Both the title and the destination may be omitted:
+
+.
+[link]()
+.
+<p><a href="">link</a></p>
+.
+
+.
+[link](<>)
+.
+<p><a href="">link</a></p>
+.
+
+
+If the destination contains spaces, it must be enclosed in pointy
+braces:
+
+.
+[link](/my uri)
+.
+<p>[link](/my uri)</p>
+.
+
+.
+[link](</my uri>)
+.
+<p><a href="/my uri">link</a></p>
+.
+
+The destination cannot contain line breaks, even with pointy braces:
+
+.
+[link](foo
+bar)
+.
+<p>[link](foo
+bar)</p>
+.
+
+One level of balanced parentheses is allowed without escaping:
+
+.
+[link]((foo)and(bar))
+.
+<p><a href="(foo)and(bar)">link</a></p>
+.
+
+However, if you have parentheses within parentheses, you need to escape
+or use the `<...>` form:
+
+.
+[link](foo(and(bar)))
+.
+<p>[link](foo(and(bar)))</p>
+.
+
+.
+[link](foo(and\(bar\)))
+.
+<p><a href="foo(and(bar))">link</a></p>
+.
+
+.
+[link](<foo(and(bar))>)
+.
+<p><a href="foo(and(bar))">link</a></p>
+.
+
+Parentheses and other symbols can also be escaped, as usual
+in markdown:
+
+.
+[link](foo\)\:)
+.
+<p><a href="foo):">link</a></p>
+.
+
+URL-escaping and entities should be left alone inside the destination:
+
+.
+[link](foo%20b&auml;)
+.
+<p><a href="foo%20b&auml;">link</a></p>
+.
+
+Note that, because titles can often be parsed as destinations,
+if you try to omit the destination and keep the title, you'll
+get unexpected results:
+
+.
+[link]("title")
+.
+<p><a href="&quot;title&quot;">link</a></p>
+.
+
+Titles may be in single quotes, double quotes, or parentheses:
+
+.
+[link](/url "title")
+[link](/url 'title')
+[link](/url (title))
+.
+<p><a href="/url" title="title">link</a>
+<a href="/url" title="title">link</a>
+<a href="/url" title="title">link</a></p>
+.
+
+Backslash escapes and entities may be used in titles:
+
+.
+[link](/url "title \"&quot;")
+.
+<p><a href="/url" title="title &quot;&quot;">link</a></p>
+.
+
+Nested balanced quotes are not allowed without escaping:
+
+.
+[link](/url "title "and" title")
+.
+<p>[link](/url &quot;title &quot;and&quot; title&quot;)</p>
+.
+
+But it is easy to work around this by using a different quote type:
+
+.
+[link](/url 'title "and" title')
+.
+<p><a href="/url" title="title &quot;and&quot; title">link</a></p>
+.
+
+(Note: `Markdown.pl` did allow double quotes inside a double-quoted
+title, and its test suite included a test demonstrating this.
+But it is hard to see a good rationale for the extra complexity this
+brings, since there are already many ways---backslash escaping,
+entities, or using a different quote type for the enclosing title---to
+write titles containing double quotes. `Markdown.pl`'s handling of
+titles has a number of other strange features. For example, it allows
+single-quoted titles in inline links, but not reference links. And, in
+reference links but not inline links, it allows a title to begin with
+`"` and end with `)`. `Markdown.pl` 1.0.1 even allows titles with no closing
+quotation mark, though 1.0.2b8 does not. It seems preferable to adopt
+a simple, rational rule that works the same way in inline links and
+link reference definitions.)
+
+Whitespace is allowed around the destination and title:
+
+.
+[link]( /uri
+ "title" )
+.
+<p><a href="/uri" title="title">link</a></p>
+.
+
+But it is not allowed between the link label and the
+following parenthesis:
+
+.
+[link] (/uri)
+.
+<p>[link] (/uri)</p>
+.
+
+Note that this is not a link, because the closing `]` occurs in
+an HTML tag:
+
+.
+[foo <bar attr="](baz)">
+.
+<p>[foo <bar attr="](baz)"></p>
+.
+
+
+There are three kinds of [reference links](#reference-link):
+<a id="reference-link"/>
+
+A [full reference link](#full-reference-link) <a id="full-reference-link"/>
+consists of a [link label](#link-label), optional whitespace, and
+another [link label](#link-label) that [matches](#matches) a
+[reference link definition](#reference-link-definition) elsewhere in the
+document.
+
+One label [matches](#matches) <a id="matches"/>
+another just in case their normalized forms are equal. To normalize a
+label, perform the *unicode case fold* and collapse consecutive internal
+whitespace to a single space. If there are multiple matching reference
+link definitions, the one that comes first in the document is used. (It
+is desirable in such cases to emit a warning.)
+
+The contents of the first link label are parsed as inlines, which are
+used as the link's text. The link's URI and title are provided by the
+matching reference link definition.
+
+Here is a simple example:
+
+.
+[foo][bar]
+
+[bar]: /url "title"
+.
+<p><a href="/url" title="title">foo</a></p>
+.
+
+The first label can contain inline content:
+
+.
+[*foo\!*][bar]
+
+[bar]: /url "title"
+.
+<p><a href="/url" title="title"><em>foo!</em></a></p>
+.
+
+Matching is case-insensitive:
+
+.
+[foo][BaR]
+
+[bar]: /url "title"
+.
+<p><a href="/url" title="title">foo</a></p>
+.
+
+Unicode case fold is used:
+
+.
+[Толпой][Толпой] is a Russian word.
+
+[ТОЛПОЙ]: /url
+.
+<p><a href="/url">Толпой</a> is a Russian word.</p>
+.
+
+Consecutive internal whitespace is treated as one space for
+purposes of determining matching:
+
+.
+[Foo
+ bar]: /url
+
+[Baz][Foo bar]
+.
+<p><a href="/url">Baz</a></p>
+.
+
+There can be whitespace between the two labels:
+
+.
+[foo] [bar]
+
+[bar]: /url "title"
+.
+<p><a href="/url" title="title">foo</a></p>
+.
+
+.
+[foo]
+[bar]
+
+[bar]: /url "title"
+.
+<p><a href="/url" title="title">foo</a></p>
+.
+
+When there are multiple matching reference link definitions,
+the first is used:
+
+.
+[foo]: /url1
+
+[foo]: /url2
+
+[bar][foo]
+.
+<p><a href="/url1">bar</a></p>
+.
+
+Note that matching is performed on normalized strings, not parsed
+inline content. So the following does not match, even though the
+labels define equivalent inline content:
+
+.
+[bar][foo\!]
+
+[foo!]: /url
+.
+<p>[bar][foo!]</p>
+.
+
+A [collapsed reference link](#collapsed-reference-link)
+<a id="collapsed-reference-link"/> consists of a [link
+label](#link-label) that [matches](#matches) a [reference link
+definition](#reference-link-definition) elsewhere in the
+document, optional whitespace, and the string `[]`. The contents of the
+first link label are parsed as inlines, which are used as the link's
+text. The link's URI and title are provided by the matching reference
+link definition. Thus, `[foo][]` is equivalent to `[foo][foo]`.
+
+.
+[foo][]
+
+[foo]: /url "title"
+.
+<p><a href="/url" title="title">foo</a></p>
+.
+
+.
+[*foo* bar][]
+
+[*foo* bar]: /url "title"
+.
+<p><a href="/url" title="title"><em>foo</em> bar</a></p>
+.
+
+The link labels are case-insensitive:
+
+.
+[Foo][]
+
+[foo]: /url "title"
+.
+<p><a href="/url" title="title">Foo</a></p>
+.
+
+
+As with full reference links, whitespace is allowed
+between the two sets of brackets:
+
+.
+[foo]
+[]
+
+[foo]: /url "title"
+.
+<p><a href="/url" title="title">foo</a></p>
+.
+
+A [shortcut reference link](#shortcut-reference-link)
+<a id="shortcut-reference-link"/> consists of a [link
+label](#link-label) that [matches](#matches) a [reference link
+definition](#reference-link-definition) elsewhere in the
+document and is not followed by `[]` or a link label.
+The contents of the first link label are parsed as inlines,
+which are used as the link's text. the link's URI and title
+are provided by the matching reference link definition.
+Thus, `[foo]` is equivalent to `[foo][]`.
+
+.
+[foo]
+
+[foo]: /url "title"
+.
+<p><a href="/url" title="title">foo</a></p>
+.
+
+.
+[*foo* bar]
+
+[*foo* bar]: /url "title"
+.
+<p><a href="/url" title="title"><em>foo</em> bar</a></p>
+.
+
+.
+[[*foo* bar]]
+
+[*foo* bar]: /url "title"
+.
+<p>[<a href="/url" title="title"><em>foo</em> bar</a>]</p>
+.
+
+The link labels are case-insensitive:
+
+.
+[Foo]
+
+[foo]: /url "title"
+.
+<p><a href="/url" title="title">Foo</a></p>
+.
+
+If you just want bracketed text, you can backslash-escape the
+opening bracket to avoid links:
+
+.
+\[foo]
+
+[foo]: /url "title"
+.
+<p>[foo]</p>
+.
+
+Note that this is a link, because link labels bind more tightly
+than emphasis:
+
+.
+[foo*]: /url
+
+*[foo*]
+.
+<p>*<a href="/url">foo*</a></p>
+.
+
+However, this is not, because link labels bind tight less
+tightly than code backticks:
+
+.
+[foo`]: /url
+
+[foo`]`
+.
+<p>[foo<code>]</code></p>
+.
+
+Link labels can contain matched square brackets:
+
+.
+[[[foo]]]
+
+[[[foo]]]: /url
+.
+<p><a href="/url">[[foo]]</a></p>
+.
+
+.
+[[[foo]]]
+
+[[[foo]]]: /url1
+[foo]: /url2
+.
+<p><a href="/url1">[[foo]]</a></p>
+.
+
+For non-matching brackets, use backslash escapes:
+
+.
+[\[foo]
+
+[\[foo]: /url
+.
+<p><a href="/url">[foo</a></p>
+.
+
+Full references take precedence over shortcut references:
+
+.
+[foo][bar]
+
+[foo]: /url1
+[bar]: /url2
+.
+<p><a href="/url2">foo</a></p>
+.
+
+In the following case `[bar][baz]` is parsed as a reference,
+`[foo]` as normal text:
+
+.
+[foo][bar][baz]
+
+[baz]: /url
+.
+<p>[foo]<a href="/url">bar</a></p>
+.
+
+Here, though, `[foo][bar]` is parsed as a reference, since
+`[bar]` is defined:
+
+.
+[foo][bar][baz]
+
+[baz]: /url1
+[bar]: /url2
+.
+<p><a href="/url2">foo</a><a href="/url1">baz</a></p>
+.
+
+Here `[foo]` is not parsed as a shortcut reference, because it
+is followed by a link label (even though `[bar]` is not defined):
+
+.
+[foo][bar][baz]
+
+[baz]: /url1
+[foo]: /url2
+.
+<p>[foo]<a href="/url1">bar</a></p>
+.
+
+
+## Images
+
+An (unescaped) exclamation mark (`!`) followed by a reference or
+inline link will be parsed as an image. The link label will be
+used as the image's alt text, and the link title, if any, will
+be used as the image's title.
+
+.
+![foo](/url "title")
+.
+<p><img src="/url" alt="foo" title="title" /></p>
+.
+
+.
+![foo *bar*]
+
+[foo *bar*]: train.jpg "train & tracks"
+.
+<p><img src="train.jpg" alt="foo &lt;em&gt;bar&lt;/em&gt;" title="train &amp; tracks" /></p>
+.
+
+.
+![foo *bar*][]
+
+[foo *bar*]: train.jpg "train & tracks"
+.
+<p><img src="train.jpg" alt="foo &lt;em&gt;bar&lt;/em&gt;" title="train &amp; tracks" /></p>
+.
+
+.
+![foo *bar*][foobar]
+
+[FOOBAR]: train.jpg "train & tracks"
+.
+<p><img src="train.jpg" alt="foo &lt;em&gt;bar&lt;/em&gt;" title="train &amp; tracks" /></p>
+.
+
+.
+![foo](train.jpg)
+.
+<p><img src="train.jpg" alt="foo" /></p>
+.
+
+.
+My ![foo bar](/path/to/train.jpg "title" )
+.
+<p>My <img src="/path/to/train.jpg" alt="foo bar" title="title" /></p>
+.
+
+.
+![foo](<url>)
+.
+<p><img src="url" alt="foo" /></p>
+.
+
+.
+![](/url)
+.
+<p><img src="/url" alt="" /></p>
+.
+
+Reference-style:
+
+.
+![foo] [bar]
+
+[bar]: /url
+.
+<p><img src="/url" alt="foo" /></p>
+.
+
+.
+![foo] [bar]
+
+[BAR]: /url
+.
+<p><img src="/url" alt="foo" /></p>
+.
+
+Collapsed:
+
+.
+![foo][]
+
+[foo]: /url "title"
+.
+<p><img src="/url" alt="foo" title="title" /></p>
+.
+
+.
+![*foo* bar][]
+
+[*foo* bar]: /url "title"
+.
+<p><img src="/url" alt="&lt;em&gt;foo&lt;/em&gt; bar" title="title" /></p>
+.
+
+The labels are case-insensitive:
+
+.
+![Foo][]
+
+[foo]: /url "title"
+.
+<p><img src="/url" alt="Foo" title="title" /></p>
+.
+
+As with full reference links, whitespace is allowed
+between the two sets of brackets:
+
+.
+![foo]
+[]
+
+[foo]: /url "title"
+.
+<p><img src="/url" alt="foo" title="title" /></p>
+.
+
+Shortcut:
+
+.
+![foo]
+
+[foo]: /url "title"
+.
+<p><img src="/url" alt="foo" title="title" /></p>
+.
+
+.
+![*foo* bar]
+
+[*foo* bar]: /url "title"
+.
+<p><img src="/url" alt="&lt;em&gt;foo&lt;/em&gt; bar" title="title" /></p>
+.
+
+.
+![[foo]]
+
+[[foo]]: /url "title"
+.
+<p><img src="/url" alt="[foo]" title="title" /></p>
+.
+
+The link labels are case-insensitive:
+
+.
+![Foo]
+
+[foo]: /url "title"
+.
+<p><img src="/url" alt="Foo" title="title" /></p>
+.
+
+If you just want bracketed text, you can backslash-escape the
+opening `!` and `[`:
+
+.
+\!\[foo]
+
+[foo]: /url "title"
+.
+<p>![foo]</p>
+.
+
+If you want a link after a literal `!`, backslash-escape the
+`!`:
+
+.
+\![foo]
+
+[foo]: /url "title"
+.
+<p>!<a href="/url" title="title">foo</a></p>
+.
+
+## Autolinks
+
+Autolinks are absolute URIs and email addresses inside `<` and `>`.
+They are parsed as links, with the URL or email address as the link
+label.
+
+A [URI autolink](#uri-autolink) <a id="uri-autolink"/>
+consists of `<`, followed by an [absolute
+URI](#absolute-uri) not containing `<`, followed by `>`. It is parsed
+as a link to the URI, with the URI as the link's label.
+
+An [absolute URI](#absolute-uri), <a id="absolute-uri"/>
+for these purposes, consists of a [scheme](#scheme) followed by a colon (`:`)
+followed by zero or more characters other than ASCII whitespace and
+control characters, `<`, and `>`. If the URI includes these characters,
+you must use percent-encoding (e.g. `%20` for a space).
+
+The following [schemes](#scheme) <a id="scheme"/>
+are recognized (case-insensitive):
+`coap`, `doi`, `javascript`, `aaa`, `aaas`, `about`, `acap`, `cap`,
+`cid`, `crid`, `data`, `dav`, `dict`, `dns`, `file`, `ftp`, `geo`, `go`,
+`gopher`, `h323`, `http`, `https`, `iax`, `icap`, `im`, `imap`, `info`,
+`ipp`, `iris`, `iris.beep`, `iris.xpc`, `iris.xpcs`, `iris.lwz`, `ldap`,
+`mailto`, `mid`, `msrp`, `msrps`, `mtqp`, `mupdate`, `news`, `nfs`,
+`ni`, `nih`, `nntp`, `opaquelocktoken`, `pop`, `pres`, `rtsp`,
+`service`, `session`, `shttp`, `sieve`, `sip`, `sips`, `sms`, `snmp`,`
+soap.beep`, `soap.beeps`, `tag`, `tel`, `telnet`, `tftp`, `thismessage`,
+`tn3270`, `tip`, `tv`, `urn`, `vemmi`, `ws`, `wss`, `xcon`,
+`xcon-userid`, `xmlrpc.beep`, `xmlrpc.beeps`, `xmpp`, `z39.50r`,
+`z39.50s`, `adiumxtra`, `afp`, `afs`, `aim`, `apt`,` attachment`, `aw`,
+`beshare`, `bitcoin`, `bolo`, `callto`, `chrome`,` chrome-extension`,
+`com-eventbrite-attendee`, `content`, `cvs`,` dlna-playsingle`,
+`dlna-playcontainer`, `dtn`, `dvb`, `ed2k`, `facetime`, `feed`,
+`finger`, `fish`, `gg`, `git`, `gizmoproject`, `gtalk`, `hcp`, `icon`,
+`ipn`, `irc`, `irc6`, `ircs`, `itms`, `jar`, `jms`, `keyparc`, `lastfm`,
+`ldaps`, `magnet`, `maps`, `market`,` message`, `mms`, `ms-help`,
+`msnim`, `mumble`, `mvn`, `notes`, `oid`, `palm`, `paparazzi`,
+`platform`, `proxy`, `psyc`, `query`, `res`, `resource`, `rmi`, `rsync`,
+`rtmp`, `secondlife`, `sftp`, `sgn`, `skype`, `smb`, `soldat`,
+`spotify`, `ssh`, `steam`, `svn`, `teamspeak`, `things`, `udp`,
+`unreal`, `ut2004`, `ventrilo`, `view-source`, `webcal`, `wtai`,
+`wyciwyg`, `xfire`, `xri`, `ymsgr`.
+
+Here are some valid autolinks:
+
+.
+<http://foo.bar.baz>
+.
+<p><a href="http://foo.bar.baz">http://foo.bar.baz</a></p>
+.
+
+.
+<http://foo.bar.baz?q=hello&id=22&boolean>
+.
+<p><a href="http://foo.bar.baz?q=hello&amp;id=22&amp;boolean">http://foo.bar.baz?q=hello&amp;id=22&amp;boolean</a></p>
+.
+
+.
+<irc://foo.bar:2233/baz>
+.
+<p><a href="irc://foo.bar:2233/baz">irc://foo.bar:2233/baz</a></p>
+.
+
+Uppercase is also fine:
+
+.
+<MAILTO:FOO@BAR.BAZ>
+.
+<p><a href="MAILTO:FOO@BAR.BAZ">MAILTO:FOO@BAR.BAZ</a></p>
+.
+
+Spaces are not allowed in autolinks:
+
+.
+<http://foo.bar/baz bim>
+.
+<p>&lt;http://foo.bar/baz bim&gt;</p>
+.
+
+An [email autolink](#email-autolink) <a id="email-autolink"/>
+consists of `<`, followed by an [email address](#email-address),
+followed by `>`. The link's label is the email address,
+and the URL is `mailto:` followed by the email address.
+
+An [email address](#email-address), <a id="email-address"/>
+for these purposes, is anything that matches
+the [non-normative regex from the HTML5
+spec](http://www.whatwg.org/specs/web-apps/current-work/multipage/states-of-the-type-attribute.html#e-mail-state-%28type=email%29):
+
+ /^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?
+ (?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/
+
+Examples of email autolinks:
+
+.
+<foo@bar.baz.com>
+.
+<p><a href="mailto:foo@bar.baz.com">foo@bar.baz.com</a></p>
+.
+
+.
+<foo+special@Bar.baz-bar0.com>
+.
+<p><a href="mailto:foo+special@Bar.baz-bar0.com">foo+special@Bar.baz-bar0.com</a></p>
+.
+
+These are not autolinks:
+
+.
+<>
+.
+<p>&lt;&gt;</p>
+.
+
+.
+<heck://bing.bong>
+.
+<p>&lt;heck://bing.bong&gt;</p>
+.
+
+.
+< http://foo.bar >
+.
+<p>&lt; http://foo.bar &gt;</p>
+.
+
+.
+<foo.bar.baz>
+.
+<p>&lt;foo.bar.baz&gt;</p>
+.
+
+.
+<localhost:5001/foo>
+.
+<p>&lt;localhost:5001/foo&gt;</p>
+.
+
+## Raw HTML
+
+Text between `<` and `>` that looks like an HTML tag is parsed as a
+raw HTML tag and will be rendered in HTML without escaping.
+Tag and attribute names are not limited to current HTML tags,
+so custom tags (and even, say, DocBook tags) may be used.
+
+Here is the grammar for tags:
+
+A [tag name](#tag-name) <a id="tag-name"/> consists of an ASCII letter
+followed by zero or more ASCII letters or digits.
+
+An [attribute](#attribute) <a id="attribute"/> consists of whitespace,
+an **attribute name**, and an optional **attribute value
+specification**.
+
+An [attribute name](#attribute-name) <a id="attribute-name"/>
+consists of an ASCII letter, `_`, or `:`, followed by zero or more ASCII
+letters, digits, `_`, `.`, `:`, or `-`. (Note: This is the XML
+specification restricted to ASCII. HTML5 is laxer.)
+
+An [attribute value specification](#attribute-value-specification)
+<a id="attribute-value-specification"/> consists of optional whitespace,
+a `=` character, optional whitespace, and an [attribute
+value](#attribute-value).
+
+An [attribute value](#attribute-value) <a id="attribute-value"/>
+consists of an [unquoted attribute value](#unquoted-attribute-value),
+a [single-quoted attribute value](#single-quoted-attribute-value),
+or a [double-quoted attribute value](#double-quoted-attribute-value).
+
+An [unquoted attribute value](#unquoted-attribute-value)
+<a id="unquoted-attribute-value"/> is a nonempty string of characters not
+including spaces, `"`, `'`, `=`, `<`, `>`, or `` ` ``.
+
+A [single-quoted attribute value](#single-quoted-attribute-value)
+<a id="single-quoted-attribute-value"/> consists of `'`, zero or more
+characters not including `'`, and a final `'`.
+
+A [double-quoted attribute value](#double-quoted-attribute-value)
+<a id="double-quoted-attribute-value"/> consists of `"`, zero or more
+characters not including `"`, and a final `"`.
+
+An [open tag](#open-tag) <a id="open-tag"/> consists of a `<` character,
+a [tag name](#tag-name), zero or more [attributes](#attribute),
+optional whitespace, an optional `/` character, and a `>` character.
+
+A [closing tag](#closing-tag) <a id="closing-tag"/> consists of the
+string `</`, a [tag name](#tag-name), optional whitespace, and the
+character `>`.
+
+An [HTML comment](#html-comment) <a id="html-comment"/> consists of the
+string `<!--`, a string of characters not including the string `--`, and
+the string `-->`.
+
+A [processing instruction](#processing-instruction)
+<a id="processing-instruction"/> consists of the string `<?`, a string
+of characters not including the string `?>`, and the string
+`?>`.
+
+A [declaration](#declaration) <a id="declaration"/> consists of the
+string `<!`, a name consisting of one or more uppercase ASCII letters,
+whitespace, a string of characters not including the character `>`, and
+the character `>`.
+
+A [CDATA section](#cdata-section) <a id="cdata-section"/> consists of
+the string `<![CDATA[`, a string of characters not including the string
+`]]>`, and the string `]]>`.
+
+An [HTML tag](#html-tag) <a id="html-tag"/> consists of an [open
+tag](#open-tag), a [closing tag](#closing-tag), an [HTML
+comment](#html-comment), a [processing
+instruction](#processing-instruction), an [element type
+declaration](#element-type-declaration), or a [CDATA
+section](#cdata-section).
+
+Here are some simple open tags:
+
+.
+<a><bab><c2c>
+.
+<p><a><bab><c2c></p>
+.
+
+Empty elements:
+
+.
+<a/><b2/>
+.
+<p><a/><b2/></p>
+.
+
+Whitespace is allowed:
+
+.
+<a /><b2
+data="foo" >
+.
+<p><a /><b2
+data="foo" ></p>
+.
+
+With attributes:
+
+.
+<a foo="bar" bam = 'baz <em>"</em>'
+_boolean zoop:33=zoop:33 />
+.
+<p><a foo="bar" bam = 'baz <em>"</em>'
+_boolean zoop:33=zoop:33 /></p>
+.
+
+Illegal tag names, not parsed as HTML:
+
+.
+<33> <__>
+.
+<p>&lt;33&gt; &lt;__&gt;</p>
+.
+
+Illegal attribute names:
+
+.
+<a h*#ref="hi">
+.
+<p>&lt;a h*#ref=&quot;hi&quot;&gt;</p>
+.
+
+Illegal attribute values:
+
+.
+<a href="hi'> <a href=hi'>
+.
+<p>&lt;a href=&quot;hi'&gt; &lt;a href=hi'&gt;</p>
+.
+
+Illegal whitespace:
+
+.
+< a><
+foo><bar/ >
+.
+<p>&lt; a&gt;&lt;
+foo&gt;&lt;bar/ &gt;</p>
+.
+
+Missing whitespace:
+
+.
+<a href='bar'title=title>
+.
+<p>&lt;a href='bar'title=title&gt;</p>
+.
+
+Closing tags:
+
+.
+</a>
+</foo >
+.
+<p></a>
+</foo ></p>
+.
+
+Illegal attributes in closing tag:
+
+.
+</a href="foo">
+.
+<p>&lt;/a href=&quot;foo&quot;&gt;</p>
+.
+
+Comments:
+
+.
+foo <!-- this is a
+comment - with hyphen -->
+.
+<p>foo <!-- this is a
+comment - with hyphen --></p>
+.
+
+.
+foo <!-- not a comment -- two hyphens -->
+.
+<p>foo &lt;!-- not a comment -- two hyphens --&gt;</p>
+.
+
+Processing instructions:
+
+.
+foo <?php echo $a; ?>
+.
+<p>foo <?php echo $a; ?></p>
+.
+
+Declarations:
+
+.
+foo <!ELEMENT br EMPTY>
+.
+<p>foo <!ELEMENT br EMPTY></p>
+.
+
+CDATA sections:
+
+.
+foo <![CDATA[>&<]]>
+.
+<p>foo <![CDATA[>&<]]></p>
+.
+
+Entities are preserved in HTML attributes:
+
+.
+<a href="&ouml;">
+.
+<p><a href="&ouml;"></p>
+.
+
+Backslash escapes do not work in HTML attributes:
+
+.
+<a href="\*">
+.
+<p><a href="\*"></p>
+.
+
+.
+<a href="\"">
+.
+<p>&lt;a href=&quot;&quot;&quot;&gt;</p>
+.
+
+## Hard line breaks
+
+A line break (not in a code span or HTML tag) that is preceded
+by two or more spaces is parsed as a linebreak (rendered
+in HTML as a `<br />` tag):
+
+.
+foo
+baz
+.
+<p>foo<br />
+baz</p>
+.
+
+For a more visible alternative, a backslash before the newline may be
+used instead of two spaces:
+
+.
+foo\
+baz
+.
+<p>foo<br />
+baz</p>
+.
+
+More than two spaces can be used:
+
+.
+foo
+baz
+.
+<p>foo<br />
+baz</p>
+.
+
+Leading spaces at the beginning of the next line are ignored:
+
+.
+foo
+ bar
+.
+<p>foo<br />
+bar</p>
+.
+
+.
+foo\
+ bar
+.
+<p>foo<br />
+bar</p>
+.
+
+Line breaks can occur inside emphasis, links, and other constructs
+that allow inline content:
+
+.
+*foo
+bar*
+.
+<p><em>foo<br />
+bar</em></p>
+.
+
+.
+*foo\
+bar*
+.
+<p><em>foo<br />
+bar</em></p>
+.
+
+Line breaks do not occur inside code spans
+
+.
+`code
+span`
+.
+<p><code>code span</code></p>
+.
+
+.
+`code\
+span`
+.
+<p><code>code\ span</code></p>
+.
+
+or HTML tags:
+
+.
+<a href="foo
+bar">
+.
+<p><a href="foo
+bar"></p>
+.
+
+.
+<a href="foo\
+bar">
+.
+<p><a href="foo\
+bar"></p>
+.
+
+## Soft line breaks
+
+A regular line break (not in a code span or HTML tag) that is not
+preceded by two or more spaces is parsed as a softbreak. (A
+softbreak may be rendered in HTML either as a newline or as a space.
+The result will be the same in browsers. In the examples here, a
+newline will be used.)
+
+.
+foo
+baz
+.
+<p>foo
+baz</p>
+.
+
+Spaces at the end of the line and beginning of the next line are
+removed:
+
+.
+foo
+ baz
+.
+<p>foo
+baz</p>
+.
+
+A conforming parser may render a soft line break in HTML either as a
+line break or as a space.
+
+A renderer may also provide an option to render soft line breaks
+as hard line breaks.
+
+## Strings
+
+Any characters not given an interpretation by the above rules will
+be parsed as string content.
+
+.
+hello $.;'there
+.
+<p>hello $.;'there</p>
+.
+
+.
+Foo χρῆν
+.
+<p>Foo χρῆν</p>
+.
+
+Internal spaces are preserved verbatim:
+
+.
+Multiple spaces
+.
+<p>Multiple spaces</p>
+.
+
+<!-- END TESTS -->
+
+# Appendix A: A parsing strategy {-}
+
+## Overview {-}
+
+Parsing has two phases:
+
+1. In the first phase, lines of input are consumed and the block
+structure of the document---its division into paragraphs, block quotes,
+list items, and so on---is constructed. Text is assigned to these
+blocks but not parsed. Reference link definitions are parsed and a
+map of links is constructed.
+
+2. In the second phase, the raw text contents of paragraphs and headers
+are parsed into sequences of markdown inline elements (strings,
+code spans, links, emphasis, and so on), using the map of link
+references constructed in phase 1.
+
+## The document tree {-}
+
+At each point in processing, the document is represented as a tree of
+**blocks**. The root of the tree is a `document` block. The `document`
+may have any number of other blocks as **children**. These children
+may, in turn, have other blocks a children. The last child of a block
+is normally considered **open**, meaning that subsequent lines of input
+can alter its contents. (Blocks that are not open are **closed**.)
+Here, for example, is a possible document tree, with the open blocks
+marked by arrows:
+
+``` tree
+-> document
+ -> block_quote
+ paragraph
+ "Lorem ipsum dolor\nsit amet."
+ -> list (type=bullet tight=true bullet_char=-)
+ list_item
+ paragraph
+ "Qui *quodsi iracundia*"
+ -> list_item
+ -> paragraph
+ "aliquando id"
+```
+
+## How source lines alter the document tree {-}
+
+Each line that is processed has an effect on this tree. The line is
+analyzed and, depending on its contents, the document may be altered
+in one or more of the following ways:
+
+1. One or more open blocks may be closed.
+2. One or more new blocks may be created as children of the
+ last open block.
+3. Text may be added to the last (deepest) open block remaining
+ on the tree.
+
+Once a line has been incorporated into the tree in this way,
+it can be discarded, so input can be read in a stream.
+
+We can see how this works by considering how the tree above is
+generated by four lines of markdown:
+
+``` markdown
+> Lorem ipsum dolor
+sit amet.
+> - Qui *quodsi iracundia*
+> - aliquando id
+```
+
+At the outset, our document model is just
+
+``` tree
+-> document
+```
+
+The first line of our text,
+
+``` markdown
+> Lorem ipsum dolor
+```
+
+causes a `block_quote` block to be created as a child of our
+open `document` block, and a `paragraph` block as a child of
+the `block_quote`. Then the text is added to the last open
+block, the `paragraph`:
+
+``` tree
+-> document
+ -> block_quote
+ -> paragraph
+ "Lorem ipsum dolor"
+```
+
+The next line,
+
+``` markdown
+sit amet.
+```
+
+is a "lazy continuation" of the open `paragraph`, so it gets added
+to the paragraph's text:
+
+``` tree
+-> document
+ -> block_quote
+ -> paragraph
+ "Lorem ipsum dolor\nsit amet."
+```
+
+The third line,
+
+``` markdown
+> - Qui *quodsi iracundia*
+```
+
+causes the `paragraph` block to be closed, and a new `list` block
+opened as a child of the `block_quote`. A `list_item` is also
+added as a child of the `list`, and a `paragraph` as a chid of
+the `list_item`. The text is then added to the `paragraph`:
+
+``` tree
+-> document
+ -> block_quote
+ paragraph
+ "Lorem ipsum dolor\nsit amet."
+ -> list (type=bullet tight=true bullet_char=-)
+ -> list_item
+ -> paragraph
+ "Qui *quodsi iracundia*"
+```
+
+The fourth line,
+
+``` markdown
+> - aliquando id
+```
+
+causes the `list_item` (and its child the `paragraph`) to be closed,
+and a new `list_item` opened up as child of the `list`. A `paragraph`
+is added as a child of the new `list_item`, to contain the text.
+We thus obtain the final tree:
+
+``` tree
+-> document
+ -> block_quote
+ paragraph
+ "Lorem ipsum dolor\nsit amet."
+ -> list (type=bullet tight=true bullet_char=-)
+ list_item
+ paragraph
+ "Qui *quodsi iracundia*"
+ -> list_item
+ -> paragraph
+ "aliquando id"
+```
+
+## From block structure to the final document {-}
+
+Once all of the input has been parsed, all open blocks are closed.
+
+We then "walk the tree," visiting every node, and parse raw
+string contents of paragraphs and headers as inlines. At this
+point we have seen all the link reference definitions, so we can
+resolve reference links as we go.
+
+``` tree
+document
+ block_quote
+ paragraph
+ str "Lorem ipsum dolor"
+ softbreak
+ str "sit amet."
+ list (type=bullet tight=true bullet_char=-)
+ list_item
+ paragraph
+ str "Qui "
+ emph
+ str "quodsi iracundia"
+ list_item
+ paragraph
+ str "aliquando id"
+```
+
+Notice how the newline in the first paragraph has been parsed as
+a `softbreak`, and the asterisks in the first list item have become
+an `emph`.
+
+The document can be rendered as HTML, or in any other format, given
+an appropriate renderer.
+
+
diff --git a/spec2js.js b/spec2js.js
new file mode 100755
index 0000000..6bf366f
--- /dev/null
+++ b/spec2js.js
@@ -0,0 +1,17 @@
+#!/usr/bin/env node
+
+var fs = require('fs');
+var util = require('util');
+
+fs.readFile('spec.txt', 'utf8', function(err, data) {
+ if (err) {
+ return console.log(err);
+ }
+ var examples = [];
+ data.replace(/^\.\n([\s\S]*?)^\.\n([\s\S]*?)^\.$/gm,
+ function(_,x,y){
+ examples.push({markdown: x, html: y});
+ });
+ console.log(util.inspect(examples, { depth: null }));
+ console.warn(examples.length + ' examples');
+});
diff --git a/spec2md.pl b/spec2md.pl
new file mode 100644
index 0000000..1b4f26e
--- /dev/null
+++ b/spec2md.pl
@@ -0,0 +1,36 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+
+my $stage = 0;
+my $example = 0;
+my @match;
+my $section = "";
+
+while (<STDIN>) {
+ if (/^\.$/) {
+ if ($stage == 0) {
+ $example++;
+ print "\n<div class=\"example\" id=\"example-$example\" data-section=\"$section\">\n";
+ print "<div class=\"examplenum\">Example $example</div>\n\n";
+ print "````````````````````````````````````````````````````````` markdown\n";
+ } elsif ($stage == 1) {
+ print "`````````````````````````````````````````````````````````\n\n";
+ print "````````````````````````````````````````````````````````` html\n";
+ } elsif ($stage == 2) {
+ print "`````````````````````````````````````````````````````````\n\n";
+ print "</div>\n\n";
+ } else {
+ die "Encountered unknown stage $stage";
+ }
+ $stage = ($stage + 1) % 3;
+ } else {
+ if ($stage == 0 && (@match = ($_ =~ /^#{1,6} *(.*)/))) {
+ $section = $match[0];
+ }
+ if ($stage != 0) {
+ # $_ =~ s/ /␣/g;
+ }
+ print $_;
+ }
+}
diff --git a/specfilter.hs b/specfilter.hs
new file mode 100755
index 0000000..67c8fa5
--- /dev/null
+++ b/specfilter.hs
@@ -0,0 +1,37 @@
+#!/usr/bin/env runhaskell
+
+import Text.Pandoc.JSON
+import Text.Pandoc.Walk
+
+main = toJSONFilter go
+ where go :: Pandoc -> Pandoc
+ go = walk exampleDivs . walk anchors
+
+exampleDivs :: Block -> Block
+exampleDivs (Div (ident, ["example"], kvs)
+ [ d@(Div (_,["examplenum"],_) _),
+ c1@(CodeBlock (_,["markdown"],_) _),
+ c2@(CodeBlock (_,["html"],_) _)
+ ]) = Div (ident, ["example"], kvs)
+ [ rawtex "\\begin{minipage}[t]{\\textwidth}\n{\\scriptsize "
+ , d
+ , rawtex "\\vspace{-1em}}"
+ , rawtex "\\begin{minipage}[t]{0.49\\textwidth}\n\\definecolor{shadecolor}{gray}{0.85}\n"
+ , addBreaks c1
+ , rawtex "\\end{minipage}\n\\hfill\n\\begin{minipage}[t]{0.49\\textwidth}\n\\definecolor{shadecolor}{gray}{0.95}\n"
+ , addBreaks c2
+ , rawtex "\\end{minipage}\n\\end{minipage}"
+ ]
+ where rawtex = RawBlock (Format "latex")
+ addBreaks (CodeBlock attrs code) = CodeBlock attrs $ addBreaks' code
+ addBreaks' code =
+ if length code > 49
+ then take 49 code ++ ('\n':addBreaks' (drop 49 code))
+ else code
+exampleDivs x = x
+
+anchors :: Inline -> Inline
+anchors (RawInline (Format "html") ('<':'a':' ':'i':'d':'=':'"':xs)) =
+ RawInline (Format "latex") ("\\hyperdef{}{" ++ lab ++ "}{\\label{" ++ lab ++ "}}")
+ where lab = takeWhile (/='"') xs
+anchors x = x
diff --git a/src/blocks.c b/src/blocks.c
new file mode 100644
index 0000000..2776231
--- /dev/null
+++ b/src/blocks.c
@@ -0,0 +1,747 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <ctype.h>
+#include "bstrlib.h"
+#include "stmd.h"
+#include "uthash.h"
+#include "debug.h"
+#include "scanners.h"
+
+static block* make_block(int tag, int start_line, int start_column)
+{
+ block* e;
+ e = (block*) malloc(sizeof(block));
+ e->tag = tag;
+ e->open = true;
+ e->last_line_blank = false;
+ e->start_line = start_line;
+ e->start_column = start_column;
+ e->end_line = start_line;
+ e->children = NULL;
+ e->last_child = NULL;
+ e->parent = NULL;
+ e->top = NULL;
+ e->attributes.refmap = NULL;
+ e->string_content = bfromcstr("");
+ e->inline_content = NULL;
+ e->next = NULL;
+ e->prev = NULL;
+ return e;
+}
+
+// Create a root document block.
+extern block* make_document()
+{
+ block * e = make_block(document, 1, 1);
+ reference * map = NULL;
+ reference ** refmap;
+ refmap = (reference**) malloc(sizeof(reference*));
+ *refmap = map;
+ e->attributes.refmap = refmap;
+ e->top = e;
+ return e;
+}
+
+// Returns true if line has only space characters, else false.
+bool is_blank(bstring s, int offset)
+{
+ char c;
+ while ((c = bchar(s, offset))) {
+ if (c == '\n') {
+ return true;
+ } else if (c == ' ') {
+ offset++;
+ } else {
+ return false;
+ }
+ }
+ return true;
+}
+
+static inline bool can_contain(int parent_type, int child_type)
+{
+ return ( parent_type == document ||
+ parent_type == block_quote ||
+ parent_type == list_item ||
+ (parent_type == list && child_type == list_item) );
+}
+
+static inline bool accepts_lines(int block_type)
+{
+ return (block_type == paragraph ||
+ block_type == atx_header ||
+ block_type == indented_code ||
+ block_type == fenced_code);
+}
+
+static int add_line(block* block, bstring ln, int offset)
+{
+ bstring s = bmidstr(ln, offset, blength(ln) - offset);
+ check(block->open, "attempted to add line (%s) to closed container (%d)",
+ ln->data, block->tag);
+ check(bformata(block->string_content, "%s", s->data) == 0,
+ "could not append line to string_content");
+ bdestroy(s);
+ return 0;
+ error:
+ return -1;
+}
+
+static int remove_trailing_blank_lines(bstring ln)
+{
+ bstring tofind = bfromcstr(" \t\r\n");
+ int pos;
+ // find last nonspace:
+ pos = bninchrr(ln, blength(ln) - 1, tofind);
+ if (pos == BSTR_ERR) { // all spaces
+ bassigncstr(ln, "");
+ } else {
+ // find next newline after it
+ pos = bstrchrp(ln, '\n', pos);
+ if (pos != BSTR_ERR) {
+ check(bdelete(ln, pos, blength(ln) - pos) != BSTR_ERR,
+ "failed to delete trailing blank lines");
+ }
+ }
+ bdestroy(tofind);
+ return 0;
+ error:
+ return -1;
+}
+
+// Check to see if a block ends with a blank line, descending
+// if needed into lists and sublists.
+static bool ends_with_blank_line(block* block)
+{
+ if (block->last_line_blank) {
+ return true;
+ }
+ if ((block->tag == list || block->tag == list_item) && block->last_child) {
+ return ends_with_blank_line(block->last_child);
+ } else {
+ return false;
+ }
+}
+
+// Break out of all containing lists
+static int break_out_of_lists(block ** bptr, int line_number)
+{
+ block * container = *bptr;
+ block * b = container->top;
+ // find first containing list:
+ while (b && b->tag != list) {
+ b = b->last_child;
+ }
+ if (b) {
+ while (container && container != b) {
+ finalize(container, line_number);
+ container = container->parent;
+ }
+ finalize(b, line_number);
+ *bptr = b->parent;
+ }
+ return 0;
+}
+
+
+extern int finalize(block* b, int line_number)
+{
+ int firstlinelen;
+ int pos;
+ block* item;
+ block* subitem;
+
+ check(b != NULL, "finalize called on null block");
+ if (!b->open) {
+ return 0; // don't do anything if the block is already closed
+ }
+ b->open = false;
+ if (line_number > b->start_line) {
+ b->end_line = line_number - 1;
+ } else {
+ b->end_line = line_number;
+ }
+
+ switch (b->tag) {
+
+ case paragraph:
+ pos = 0;
+ while (bchar(b->string_content, 0) == '[' &&
+ (pos = parse_reference(b->string_content,
+ b->top->attributes.refmap))) {
+ bdelete(b->string_content, 0, pos);
+ }
+ if (is_blank(b->string_content, 0)) {
+ b->tag = reference_def;
+ }
+ break;
+
+ case indented_code:
+ remove_trailing_blank_lines(b->string_content);
+ bformata(b->string_content, "\n");
+ break;
+
+ case fenced_code:
+ // first line of contents becomes info
+ firstlinelen = bstrchr(b->string_content, '\n');
+ b->attributes.fenced_code_data.info =
+ bmidstr(b->string_content, 0, firstlinelen);
+ bdelete(b->string_content, 0, firstlinelen + 1); // +1 for \n
+ btrimws(b->attributes.fenced_code_data.info);
+ unescape(b->attributes.fenced_code_data.info);
+ break;
+
+ case list: // determine tight/loose status
+ b->attributes.list_data.tight = true; // tight by default
+ item = b->children;
+
+ while (item) {
+ // check for non-final non-empty list item ending with blank line:
+ if (item->last_line_blank && item->next) {
+ b->attributes.list_data.tight = false;
+ break;
+ }
+ // recurse into children of list item, to see if there are
+ // spaces between them:
+ subitem = item->children;
+ while (subitem) {
+ if (ends_with_blank_line(subitem) &&
+ (item->next || subitem->next)) {
+ b->attributes.list_data.tight = false;
+ break;
+ }
+ subitem = subitem->next;
+ }
+ if (!(b->attributes.list_data.tight)) {
+ break;
+ }
+ item = item->next;
+ }
+
+ break;
+
+ default:
+ break;
+ }
+
+ return 0;
+ error:
+ return -1;
+}
+
+// Add a block as child of another. Return pointer to child.
+extern block* add_child(block* parent,
+ int block_type, int start_line, int start_column)
+{
+ // if 'parent' isn't the kind of block that can accept this child,
+ // then back up til we hit a block that can.
+ while (!can_contain(parent->tag, block_type)) {
+ finalize(parent, start_line);
+ parent = parent->parent;
+ }
+
+ check(parent != NULL, "parent container cannot accept children");
+
+ block* child = make_block(block_type, start_line, start_column);
+ child->parent = parent;
+ child->top = parent->top;
+
+ if (parent->last_child) {
+ parent->last_child->next = child;
+ child->prev = parent->last_child;
+ } else {
+ parent->children = child;
+ child->prev = NULL;
+ }
+ parent->last_child = child;
+ return child;
+ error:
+ return NULL;
+}
+
+// Free a block list and any children.
+extern void free_blocks(block* e)
+{
+ block * next;
+ while (e != NULL) {
+ next = e->next;
+ free_inlines(e->inline_content);
+ bdestroy(e->string_content);
+ if (e->tag == fenced_code) {
+ bdestroy(e->attributes.fenced_code_data.info);
+ } else if (e->tag == document) {
+ free_reference_map(e->attributes.refmap);
+ }
+ free_blocks(e->children);
+ free(e);
+ e = next;
+ }
+}
+
+// Walk through block and all children, recursively, parsing
+// string content into inline content where appropriate.
+int process_inlines(block* cur, reference** refmap)
+{
+ switch (cur->tag) {
+
+ case paragraph:
+ case atx_header:
+ case setext_header:
+ check(cur->string_content != NULL, "string_content is NULL");
+ cur->inline_content = parse_inlines(cur->string_content, refmap);
+ bdestroy(cur->string_content);
+ cur->string_content = NULL;
+ break;
+
+ default:
+ break;
+ }
+
+ block * child = cur->children;
+ while (child != NULL) {
+ process_inlines(child, refmap);
+ child = child->next;
+ }
+
+ return 0;
+ error:
+ return -1;
+}
+
+// Attempts to parse a list item marker (bullet or enumerated).
+// On success, returns length of the marker, and populates
+// data with the details. On failure, returns 0.
+static int parse_list_marker(bstring ln, int pos,
+ struct ListData ** dataptr)
+{
+ char c;
+ int startpos;
+ int start = 1;
+ struct ListData * data;
+
+ startpos = pos;
+ c = bchar(ln, pos);
+
+ if ((c == '*' || c == '-' || c == '+') && !scan_hrule(ln, pos)) {
+ pos++;
+ if (!isspace(bchar(ln, pos))) {
+ return 0;
+ }
+ data = malloc(sizeof(struct ListData));
+ data->marker_offset = 0; // will be adjusted later
+ data->list_type = bullet;
+ data->bullet_char = c;
+ data->start = 1;
+ data->delimiter = period;
+ data->tight = false;
+
+ } else if (isdigit(c)) {
+
+ pos++;
+ while (isdigit(bchar(ln, pos))) {
+ pos++;
+ }
+
+ if (!sscanf((char *) ln->data + startpos, "%d", &start)) {
+ log_err("sscanf failed");
+ return 0;
+ }
+
+ c = bchar(ln, pos);
+ if (c == '.' || c == ')') {
+ pos++;
+ if (!isspace(bchar(ln, pos))) {
+ return 0;
+ }
+ data = malloc(sizeof(struct ListData));
+ data->marker_offset = 0; // will be adjusted later
+ data->list_type = ordered;
+ data->bullet_char = 0;
+ data->start = start;
+ data->delimiter = (c == '.' ? period : parens);
+ data->tight = false;
+ } else {
+ return 0;
+ }
+
+ } else {
+ return 0;
+ }
+
+ *dataptr = data;
+ return (pos - startpos);
+}
+
+// Return 1 if list item belongs in list, else 0.
+static int lists_match(struct ListData list_data,
+ struct ListData item_data)
+{
+ return (list_data.list_type == item_data.list_type &&
+ list_data.delimiter == item_data.delimiter &&
+ // list_data.marker_offset == item_data.marker_offset &&
+ list_data.bullet_char == item_data.bullet_char);
+}
+
+// Process one line at a time, modifying a block.
+// Returns 0 if successful. curptr is changed to point to
+// the currently open block.
+extern int incorporate_line(bstring ln, int line_number, block** curptr)
+{
+ block* last_matched_container;
+ int offset = 0;
+ int matched = 0;
+ int lev = 0;
+ int i;
+ struct ListData * data = NULL;
+ bool all_matched = true;
+ block* container;
+ block* cur = *curptr;
+ bool blank = false;
+ int first_nonspace;
+ int indent;
+
+ // detab input line
+ check(bdetab(ln, 1) != BSTR_ERR,
+ "invalid UTF-8 sequence in line %d\n", line_number);
+
+ // container starts at the document root.
+ container = cur->top;
+
+ // for each containing block, try to parse the associated line start.
+ // bail out on failure: container will point to the last matching block.
+
+ while (container->last_child && container->last_child->open) {
+ container = container->last_child;
+
+ first_nonspace = offset;
+ while (bchar(ln, first_nonspace) == ' ') {
+ first_nonspace++;
+ }
+
+ indent = first_nonspace - offset;
+ blank = bchar(ln, first_nonspace) == '\n';
+
+ if (container->tag == block_quote) {
+
+ matched = indent <= 3 && bchar(ln, first_nonspace) == '>';
+ if (matched) {
+ offset = first_nonspace + 1;
+ if (bchar(ln, offset) == ' ') {
+ offset++;
+ }
+ } else {
+ all_matched = false;
+ }
+
+ } else if (container->tag == list_item) {
+
+ if (indent >= container->attributes.list_data.marker_offset +
+ container->attributes.list_data.padding) {
+ offset += container->attributes.list_data.marker_offset +
+ container->attributes.list_data.padding;
+ } else if (blank) {
+ offset = first_nonspace;
+ } else {
+ all_matched = false;
+ }
+
+ } else if (container->tag == indented_code) {
+
+ if (indent >= CODE_INDENT) {
+ offset += CODE_INDENT;
+ } else if (blank) {
+ offset = first_nonspace;
+ } else {
+ all_matched = false;
+ }
+
+ } else if (container->tag == atx_header ||
+ container->tag == setext_header) {
+
+ // a header can never contain more than one line
+ all_matched = false;
+
+ } else if (container->tag == fenced_code) {
+
+ // skip optional spaces of fence offset
+ i = container->attributes.fenced_code_data.fence_offset;
+ while (i > 0 && bchar(ln, offset) == ' ') {
+ offset++;
+ i--;
+ }
+
+ } else if (container->tag == html_block) {
+
+ if (blank) {
+ all_matched = false;
+ }
+
+ } else if (container->tag == paragraph) {
+
+ if (blank) {
+ container->last_line_blank =true;
+ all_matched = false;
+ }
+
+ }
+
+ if (!all_matched) {
+ container = container->parent; // back up to last matching block
+ break;
+ }
+ }
+
+ last_matched_container = container;
+
+ // check to see if we've hit 2nd blank line, break out of list:
+ if (blank && container->last_line_blank) {
+ break_out_of_lists(&container, line_number);
+ }
+
+ // unless last matched container is code block, try new container starts:
+ while (container->tag != fenced_code && container->tag != indented_code &&
+ container->tag != html_block) {
+
+ first_nonspace = offset;
+ while (bchar(ln, first_nonspace) == ' ') {
+ first_nonspace++;
+ }
+
+ indent = first_nonspace - offset;
+ blank = bchar(ln, first_nonspace) == '\n';
+
+ if (indent >= CODE_INDENT) {
+
+ if (cur->tag != paragraph && !blank) {
+ offset += CODE_INDENT;
+ container = add_child(container, indented_code, line_number, offset + 1);
+ } else { // indent > 4 in lazy line
+ break;
+ }
+
+ } else if (bchar(ln, first_nonspace) == '>') {
+
+ offset = first_nonspace + 1;
+ // optional following character
+ if (bchar(ln, offset) == ' ') {
+ offset++;
+ }
+ container = add_child(container, block_quote, line_number, offset + 1);
+
+ } else if ((matched = scan_atx_header_start(ln, first_nonspace))) {
+
+ offset = first_nonspace + matched;
+ container = add_child(container, atx_header, line_number, offset + 1);
+ int hashpos = bstrchrp(ln, '#', first_nonspace);
+ check(hashpos != BSTR_ERR, "no # found in atx header start");
+ int level = 0;
+ while (bchar(ln, hashpos) == '#') {
+ level++;
+ hashpos++;
+ }
+ container->attributes.header_level = level;
+
+ } else if ((matched = scan_open_code_fence(ln, first_nonspace))) {
+
+ container = add_child(container, fenced_code, line_number,
+ first_nonspace + 1);
+ container->attributes.fenced_code_data.fence_char = bchar(ln,
+ first_nonspace);
+ container->attributes.fenced_code_data.fence_length = matched;
+ container->attributes.fenced_code_data.fence_offset =
+ first_nonspace - offset;
+ offset = first_nonspace + matched;
+
+ } else if ((matched = scan_html_block_tag(ln, first_nonspace))) {
+
+ container = add_child(container, html_block, line_number,
+ first_nonspace + 1);
+ // note, we don't adjust offset because the tag is part of the text
+
+ } else if (container->tag == paragraph &&
+ (lev = scan_setext_header_line(ln, first_nonspace)) &&
+ // check that there is only one line in the paragraph:
+ bstrrchrp(container->string_content, '\n',
+ blength(container->string_content) - 2) == BSTR_ERR) {
+
+ container->tag = setext_header;
+ container->attributes.header_level = lev;
+ offset = blength(ln) - 1;
+
+ } else if (!(container->tag == paragraph && !all_matched) &&
+ (matched = scan_hrule(ln, first_nonspace))) {
+
+ // it's only now that we know the line is not part of a setext header:
+ container = add_child(container, hrule, line_number, first_nonspace + 1);
+ finalize(container, line_number);
+ container = container->parent;
+ offset = blength(ln) - 1;
+
+ } else if ((matched = parse_list_marker(ln, first_nonspace, &data))) {
+
+ // compute padding:
+ offset = first_nonspace + matched;
+ i = 0;
+ while (i <= 5 && bchar(ln, offset + i) == ' ') {
+ i++;
+ }
+ // i = number of spaces after marker, up to 5
+ if (i >= 5 || i < 1 || bchar(ln, offset) == '\n') {
+ data->padding = matched + 1;
+ if (i > 0) {
+ offset += 1;
+ }
+ } else {
+ data->padding = matched + i;
+ offset += i;
+ }
+
+ // check container; if it's a list, see if this list item
+ // can continue the list; otherwise, create a list container.
+
+ data->marker_offset = indent;
+
+ if (container->tag != list ||
+ !lists_match(container->attributes.list_data, *data)) {
+ container = add_child(container, list, line_number,
+ first_nonspace + 1);
+ container->attributes.list_data = *data;
+ }
+
+ // add the list item
+ container = add_child(container, list_item, line_number,
+ first_nonspace + 1);
+ container->attributes.list_data = *data;
+ free(data);
+
+ } else {
+ break;
+ }
+
+ if (accepts_lines(container->tag)) {
+ // if it's a line container, it can't contain other containers
+ break;
+ }
+ }
+
+ // what remains at offset is a text line. add the text to the
+ // appropriate container.
+
+ first_nonspace = offset;
+ while (bchar(ln, first_nonspace) == ' ') {
+ first_nonspace++;
+ }
+
+ indent = first_nonspace - offset;
+ blank = bchar(ln, first_nonspace) == '\n';
+
+ // block quote lines are never blank as they start with >
+ // and we don't count blanks in fenced code for purposes of tight/loose
+ // lists or breaking out of lists. we also don't set last_line_blank
+ // on an empty list item.
+ container->last_line_blank = (blank &&
+ container->tag != block_quote &&
+ container->tag != fenced_code &&
+ !(container->tag == list_item &&
+ container->children == NULL &&
+ container->start_line == line_number));
+
+ block *cont = container;
+ while (cont->parent) {
+ cont->parent->last_line_blank = false;
+ cont = cont->parent;
+ }
+
+ if (cur != last_matched_container &&
+ container == last_matched_container &&
+ !blank &&
+ cur->tag == paragraph &&
+ blength(cur->string_content) > 0) {
+
+ check(add_line(cur, ln, offset) == 0, "could not add line");
+
+ } else { // not a lazy continuation
+
+ // finalize any blocks that were not matched and set cur to container:
+ while (cur != last_matched_container) {
+
+ finalize(cur, line_number);
+ cur = cur->parent;
+ check(cur != NULL, "cur is NULL, last_matched_container->tag = %d",
+ last_matched_container->tag);
+
+ }
+
+ if (container->tag == indented_code) {
+
+ check(add_line(container, ln, offset) == 0, "could not add line");
+
+ } else if (container->tag == fenced_code) {
+
+ matched = (indent <= 3
+ && bchar(ln, first_nonspace) == container->attributes.fenced_code_data.fence_char)
+ && scan_close_code_fence(ln, first_nonspace,
+ container->attributes.fenced_code_data.fence_length);
+ if (matched) {
+ // if closing fence, don't add line to container; instead, close it:
+ finalize(container, line_number);
+ container = container->parent; // back up to parent
+ } else {
+ check(add_line(container, ln, offset) == 0, "could not add line");
+ }
+
+ } else if (container->tag == html_block) {
+
+ check(add_line(container, ln, offset) == 0, "could not add line");
+
+ } else if (blank) {
+
+ // ??? do nothing
+
+ } else if (container->tag == atx_header) {
+
+ // chop off trailing ###s...use a scanner?
+ brtrimws(ln);
+ int p = blength(ln) - 1;
+ int numhashes = 0;
+ // if string ends in #s, remove these:
+ while (bchar(ln, p) == '#') {
+ p--;
+ numhashes++;
+ }
+ if (bchar(ln, p) == '\\') {
+ // the last # was escaped, so we include it.
+ p++;
+ numhashes--;
+ }
+ check(bdelete(ln, p + 1, numhashes) != BSTR_ERR,
+ "could not delete final hashes");
+ check(add_line(container, ln, first_nonspace) == 0, "could not add line");
+ finalize(container, line_number);
+ container = container->parent;
+
+ } else if (accepts_lines(container->tag)) {
+
+ check(add_line(container, ln, first_nonspace) == 0, "could not add line");
+
+ } else if (container->tag != hrule && container->tag != setext_header) {
+
+ // create paragraph container for line
+ container = add_child(container, paragraph, line_number, first_nonspace + 1);
+ check(add_line(container, ln, first_nonspace) == 0, "could not add line");
+
+ } else {
+
+ log_warn("Line %d with container type %d did not match any condition:\n\"%s\"",
+ line_number, container->tag, ln->data);
+
+ }
+ *curptr = container;
+ }
+
+ return 0;
+ error:
+ return -1;
+}
+
diff --git a/src/bstrlib.c b/src/bstrlib.c
new file mode 100644
index 0000000..1b19dbe
--- /dev/null
+++ b/src/bstrlib.c
@@ -0,0 +1,2979 @@
+/*
+ * This source file is part of the bstring string library. This code was
+ * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause
+ * BSD open source license or GPL v2.0. Refer to the accompanying documentation
+ * for details on usage and license.
+ */
+
+/*
+ * bstrlib.c
+ *
+ * This file is the core module for implementing the bstring functions.
+ */
+
+#if defined (_MSC_VER)
+/* These warnings from MSVC++ are totally pointless. */
+# define _CRT_SECURE_NO_WARNINGS
+#endif
+
+#include <stdio.h>
+#include <stddef.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include "bstrlib.h"
+
+/* Optionally include a mechanism for debugging memory */
+
+#if defined(MEMORY_DEBUG) || defined(BSTRLIB_MEMORY_DEBUG)
+#include "memdbg.h"
+#endif
+
+#ifndef bstr__alloc
+#define bstr__alloc(x) malloc (x)
+#endif
+
+#ifndef bstr__free
+#define bstr__free(p) free (p)
+#endif
+
+#ifndef bstr__realloc
+#define bstr__realloc(p,x) realloc ((p), (x))
+#endif
+
+#ifndef bstr__memcpy
+#define bstr__memcpy(d,s,l) memcpy ((d), (s), (l))
+#endif
+
+#ifndef bstr__memmove
+#define bstr__memmove(d,s,l) memmove ((d), (s), (l))
+#endif
+
+#ifndef bstr__memset
+#define bstr__memset(d,c,l) memset ((d), (c), (l))
+#endif
+
+#ifndef bstr__memcmp
+#define bstr__memcmp(d,c,l) memcmp ((d), (c), (l))
+#endif
+
+#ifndef bstr__memchr
+#define bstr__memchr(s,c,l) memchr ((s), (c), (l))
+#endif
+
+/* Just a length safe wrapper for memmove. */
+
+#define bBlockCopy(D,S,L) { if ((L) > 0) bstr__memmove ((D),(S),(L)); }
+
+/* Compute the snapped size for a given requested size. By snapping to powers
+ of 2 like this, repeated reallocations are avoided. */
+static int snapUpSize (int i) {
+ if (i < 8) {
+ i = 8;
+ } else {
+ unsigned int j;
+ j = (unsigned int) i;
+
+ j |= (j >> 1);
+ j |= (j >> 2);
+ j |= (j >> 4);
+ j |= (j >> 8); /* Ok, since int >= 16 bits */
+#if (UINT_MAX != 0xffff)
+ j |= (j >> 16); /* For 32 bit int systems */
+#if (UINT_MAX > 0xffffffffUL)
+ j |= (j >> 32); /* For 64 bit int systems */
+#endif
+#endif
+ /* Least power of two greater than i */
+ j++;
+ if ((int) j >= i) i = (int) j;
+ }
+ return i;
+}
+
+/* int balloc (bstring b, int len)
+ *
+ * Increase the size of the memory backing the bstring b to at least len.
+ */
+int balloc (bstring b, int olen) {
+ int len;
+ if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen <= 0 ||
+ b->mlen < b->slen || olen <= 0) {
+ return BSTR_ERR;
+ }
+
+ if (olen >= b->mlen) {
+ unsigned char * x;
+
+ if ((len = snapUpSize (olen)) <= b->mlen) return BSTR_OK;
+
+ /* Assume probability of a non-moving realloc is 0.125 */
+ if (7 * b->mlen < 8 * b->slen) {
+
+ /* If slen is close to mlen in size then use realloc to reduce
+ the memory defragmentation */
+
+ reallocStrategy:;
+
+ x = (unsigned char *) bstr__realloc (b->data, (size_t) len);
+ if (x == NULL) {
+
+ /* Since we failed, try allocating the tighest possible
+ allocation */
+
+ if (NULL == (x = (unsigned char *) bstr__realloc (b->data, (size_t) (len = olen)))) {
+ return BSTR_ERR;
+ }
+ }
+ } else {
+
+ /* If slen is not close to mlen then avoid the penalty of copying
+ the extra bytes that are allocated, but not considered part of
+ the string */
+
+ if (NULL == (x = (unsigned char *) bstr__alloc ((size_t) len))) {
+
+ /* Perhaps there is no available memory for the two
+ allocations to be in memory at once */
+
+ goto reallocStrategy;
+
+ } else {
+ if (b->slen) bstr__memcpy ((char *) x, (char *) b->data, (size_t) b->slen);
+ bstr__free (b->data);
+ }
+ }
+ b->data = x;
+ b->mlen = len;
+ b->data[b->slen] = (unsigned char) '\0';
+ }
+
+ return BSTR_OK;
+}
+
+/* int ballocmin (bstring b, int len)
+ *
+ * Set the size of the memory backing the bstring b to len or b->slen+1,
+ * whichever is larger. Note that repeated use of this function can degrade
+ * performance.
+ */
+int ballocmin (bstring b, int len) {
+ unsigned char * s;
+
+ if (b == NULL || b->data == NULL || (b->slen+1) < 0 || b->mlen <= 0 ||
+ b->mlen < b->slen || len <= 0) {
+ return BSTR_ERR;
+ }
+
+ if (len < b->slen + 1) len = b->slen + 1;
+
+ if (len != b->mlen) {
+ s = (unsigned char *) bstr__realloc (b->data, (size_t) len);
+ if (NULL == s) return BSTR_ERR;
+ s[b->slen] = (unsigned char) '\0';
+ b->data = s;
+ b->mlen = len;
+ }
+
+ return BSTR_OK;
+}
+
+/* bstring bfromcstr (const char * str)
+ *
+ * Create a bstring which contains the contents of the '\0' terminated char *
+ * buffer str.
+ */
+bstring bfromcstr (const char * str) {
+bstring b;
+int i;
+size_t j;
+
+ if (str == NULL) return NULL;
+ j = (strlen) (str);
+ i = snapUpSize ((int) (j + (2 - (j != 0))));
+ if (i <= (int) j) return NULL;
+
+ b = (bstring) bstr__alloc (sizeof (struct tagbstring));
+ if (NULL == b) return NULL;
+ b->slen = (int) j;
+ if (NULL == (b->data = (unsigned char *) bstr__alloc (b->mlen = i))) {
+ bstr__free (b);
+ return NULL;
+ }
+
+ bstr__memcpy (b->data, str, j+1);
+ return b;
+}
+
+/* bstring bfromcstralloc (int mlen, const char * str)
+ *
+ * Create a bstring which contains the contents of the '\0' terminated char *
+ * buffer str. The memory buffer backing the string is at least len
+ * characters in length.
+ */
+bstring bfromcstralloc (int mlen, const char * str) {
+bstring b;
+int i;
+size_t j;
+
+ if (str == NULL) return NULL;
+ j = (strlen) (str);
+ i = snapUpSize ((int) (j + (2 - (j != 0))));
+ if (i <= (int) j) return NULL;
+
+ b = (bstring) bstr__alloc (sizeof (struct tagbstring));
+ if (b == NULL) return NULL;
+ b->slen = (int) j;
+ if (i < mlen) i = mlen;
+
+ if (NULL == (b->data = (unsigned char *) bstr__alloc (b->mlen = i))) {
+ bstr__free (b);
+ return NULL;
+ }
+
+ bstr__memcpy (b->data, str, j+1);
+ return b;
+}
+
+/* bstring blk2bstr (const void * blk, int len)
+ *
+ * Create a bstring which contains the content of the block blk of length
+ * len.
+ */
+bstring blk2bstr (const void * blk, int len) {
+bstring b;
+int i;
+
+ if (blk == NULL || len < 0) return NULL;
+ b = (bstring) bstr__alloc (sizeof (struct tagbstring));
+ if (b == NULL) return NULL;
+ b->slen = len;
+
+ i = len + (2 - (len != 0));
+ i = snapUpSize (i);
+
+ b->mlen = i;
+
+ b->data = (unsigned char *) bstr__alloc ((size_t) b->mlen);
+ if (b->data == NULL) {
+ bstr__free (b);
+ return NULL;
+ }
+
+ if (len > 0) bstr__memcpy (b->data, blk, (size_t) len);
+ b->data[len] = (unsigned char) '\0';
+
+ return b;
+}
+
+/* char * bstr2cstr (const_bstring s, char z)
+ *
+ * Create a '\0' terminated char * buffer which is equal to the contents of
+ * the bstring s, except that any contained '\0' characters are converted
+ * to the character in z. This returned value should be freed with a
+ * bcstrfree () call, by the calling application.
+ */
+char * bstr2cstr (const_bstring b, char z) {
+int i, l;
+char * r;
+
+ if (b == NULL || b->slen < 0 || b->data == NULL) return NULL;
+ l = b->slen;
+ r = (char *) bstr__alloc ((size_t) (l + 1));
+ if (r == NULL) return r;
+
+ for (i=0; i < l; i ++) {
+ r[i] = (char) ((b->data[i] == '\0') ? z : (char) (b->data[i]));
+ }
+
+ r[l] = (unsigned char) '\0';
+
+ return r;
+}
+
+/* int bcstrfree (char * s)
+ *
+ * Frees a C-string generated by bstr2cstr (). This is normally unnecessary
+ * since it just wraps a call to bstr__free (), however, if bstr__alloc ()
+ * and bstr__free () have been redefined as a macros within the bstrlib
+ * module (via defining them in memdbg.h after defining
+ * BSTRLIB_MEMORY_DEBUG) with some difference in behaviour from the std
+ * library functions, then this allows a correct way of freeing the memory
+ * that allows higher level code to be independent from these macro
+ * redefinitions.
+ */
+int bcstrfree (char * s) {
+ if (s) {
+ bstr__free (s);
+ return BSTR_OK;
+ }
+ return BSTR_ERR;
+}
+
+/* int bconcat (bstring b0, const_bstring b1)
+ *
+ * Concatenate the bstring b1 to the bstring b0.
+ */
+int bconcat (bstring b0, const_bstring b1) {
+int len, d;
+bstring aux = (bstring) b1;
+
+ if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL) return BSTR_ERR;
+
+ d = b0->slen;
+ len = b1->slen;
+ if ((d | (b0->mlen - d) | len | (d + len)) < 0) return BSTR_ERR;
+
+ if (b0->mlen <= d + len + 1) {
+ ptrdiff_t pd = b1->data - b0->data;
+ if (0 <= pd && pd < b0->mlen) {
+ if (NULL == (aux = bstrcpy (b1))) return BSTR_ERR;
+ }
+ if (balloc (b0, d + len + 1) != BSTR_OK) {
+ if (aux != b1) bdestroy (aux);
+ return BSTR_ERR;
+ }
+ }
+
+ bBlockCopy (&b0->data[d], &aux->data[0], (size_t) len);
+ b0->data[d + len] = (unsigned char) '\0';
+ b0->slen = d + len;
+ if (aux != b1) bdestroy (aux);
+ return BSTR_OK;
+}
+
+/* int bconchar (bstring b, char c)
+/ *
+ * Concatenate the single character c to the bstring b.
+ */
+int bconchar (bstring b, char c) {
+int d;
+
+ if (b == NULL) return BSTR_ERR;
+ d = b->slen;
+ if ((d | (b->mlen - d)) < 0 || balloc (b, d + 2) != BSTR_OK) return BSTR_ERR;
+ b->data[d] = (unsigned char) c;
+ b->data[d + 1] = (unsigned char) '\0';
+ b->slen++;
+ return BSTR_OK;
+}
+
+/* int bcatcstr (bstring b, const char * s)
+ *
+ * Concatenate a char * string to a bstring.
+ */
+int bcatcstr (bstring b, const char * s) {
+char * d;
+int i, l;
+
+ if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen < b->slen
+ || b->mlen <= 0 || s == NULL) return BSTR_ERR;
+
+ /* Optimistically concatenate directly */
+ l = b->mlen - b->slen;
+ d = (char *) &b->data[b->slen];
+ for (i=0; i < l; i++) {
+ if ((*d++ = *s++) == '\0') {
+ b->slen += i;
+ return BSTR_OK;
+ }
+ }
+ b->slen += i;
+
+ /* Need to explicitely resize and concatenate tail */
+ return bcatblk (b, (const void *) s, (int) strlen (s));
+}
+
+/* int bcatblk (bstring b, const void * s, int len)
+ *
+ * Concatenate a fixed length buffer to a bstring.
+ */
+int bcatblk (bstring b, const void * s, int len) {
+int nl;
+
+ if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen < b->slen
+ || b->mlen <= 0 || s == NULL || len < 0) return BSTR_ERR;
+
+ if (0 > (nl = b->slen + len)) return BSTR_ERR; /* Overflow? */
+ if (b->mlen <= nl && 0 > balloc (b, nl + 1)) return BSTR_ERR;
+
+ bBlockCopy (&b->data[b->slen], s, (size_t) len);
+ b->slen = nl;
+ b->data[nl] = (unsigned char) '\0';
+ return BSTR_OK;
+}
+
+/* bstring bstrcpy (const_bstring b)
+ *
+ * Create a copy of the bstring b.
+ */
+bstring bstrcpy (const_bstring b) {
+bstring b0;
+int i,j;
+
+ /* Attempted to copy an invalid string? */
+ if (b == NULL || b->slen < 0 || b->data == NULL) return NULL;
+
+ b0 = (bstring) bstr__alloc (sizeof (struct tagbstring));
+ if (b0 == NULL) {
+ /* Unable to allocate memory for string header */
+ return NULL;
+ }
+
+ i = b->slen;
+ j = snapUpSize (i + 1);
+
+ b0->data = (unsigned char *) bstr__alloc (j);
+ if (b0->data == NULL) {
+ j = i + 1;
+ b0->data = (unsigned char *) bstr__alloc (j);
+ if (b0->data == NULL) {
+ /* Unable to allocate memory for string data */
+ bstr__free (b0);
+ return NULL;
+ }
+ }
+
+ b0->mlen = j;
+ b0->slen = i;
+
+ if (i) bstr__memcpy ((char *) b0->data, (char *) b->data, i);
+ b0->data[b0->slen] = (unsigned char) '\0';
+
+ return b0;
+}
+
+/* int bassign (bstring a, const_bstring b)
+ *
+ * Overwrite the string a with the contents of string b.
+ */
+int bassign (bstring a, const_bstring b) {
+ if (b == NULL || b->data == NULL || b->slen < 0)
+ return BSTR_ERR;
+ if (b->slen != 0) {
+ if (balloc (a, b->slen) != BSTR_OK) return BSTR_ERR;
+ bstr__memmove (a->data, b->data, b->slen);
+ } else {
+ if (a == NULL || a->data == NULL || a->mlen < a->slen ||
+ a->slen < 0 || a->mlen == 0)
+ return BSTR_ERR;
+ }
+ a->data[b->slen] = (unsigned char) '\0';
+ a->slen = b->slen;
+ return BSTR_OK;
+}
+
+/* int bassignmidstr (bstring a, const_bstring b, int left, int len)
+ *
+ * Overwrite the string a with the middle of contents of string b
+ * starting from position left and running for a length len. left and
+ * len are clamped to the ends of b as with the function bmidstr.
+ */
+int bassignmidstr (bstring a, const_bstring b, int left, int len) {
+ if (b == NULL || b->data == NULL || b->slen < 0)
+ return BSTR_ERR;
+
+ if (left < 0) {
+ len += left;
+ left = 0;
+ }
+
+ if (len > b->slen - left) len = b->slen - left;
+
+ if (a == NULL || a->data == NULL || a->mlen < a->slen ||
+ a->slen < 0 || a->mlen == 0)
+ return BSTR_ERR;
+
+ if (len > 0) {
+ if (balloc (a, len) != BSTR_OK) return BSTR_ERR;
+ bstr__memmove (a->data, b->data + left, len);
+ a->slen = len;
+ } else {
+ a->slen = 0;
+ }
+ a->data[a->slen] = (unsigned char) '\0';
+ return BSTR_OK;
+}
+
+/* int bassigncstr (bstring a, const char * str)
+ *
+ * Overwrite the string a with the contents of char * string str. Note that
+ * the bstring a must be a well defined and writable bstring. If an error
+ * occurs BSTR_ERR is returned however a may be partially overwritten.
+ */
+int bassigncstr (bstring a, const char * str) {
+int i;
+size_t len;
+ if (a == NULL || a->data == NULL || a->mlen < a->slen ||
+ a->slen < 0 || a->mlen == 0 || NULL == str)
+ return BSTR_ERR;
+
+ for (i=0; i < a->mlen; i++) {
+ if ('\0' == (a->data[i] = str[i])) {
+ a->slen = i;
+ return BSTR_OK;
+ }
+ }
+
+ a->slen = i;
+ len = strlen (str + i);
+ if (len > INT_MAX || i + len + 1 > INT_MAX ||
+ 0 > balloc (a, (int) (i + len + 1))) return BSTR_ERR;
+ bBlockCopy (a->data + i, str + i, (size_t) len + 1);
+ a->slen += (int) len;
+ return BSTR_OK;
+}
+
+/* int bassignblk (bstring a, const void * s, int len)
+ *
+ * Overwrite the string a with the contents of the block (s, len). Note that
+ * the bstring a must be a well defined and writable bstring. If an error
+ * occurs BSTR_ERR is returned and a is not overwritten.
+ */
+int bassignblk (bstring a, const void * s, int len) {
+ if (a == NULL || a->data == NULL || a->mlen < a->slen ||
+ a->slen < 0 || a->mlen == 0 || NULL == s || len + 1 < 1)
+ return BSTR_ERR;
+ if (len + 1 > a->mlen && 0 > balloc (a, len + 1)) return BSTR_ERR;
+ bBlockCopy (a->data, s, (size_t) len);
+ a->data[len] = (unsigned char) '\0';
+ a->slen = len;
+ return BSTR_OK;
+}
+
+/* int btrunc (bstring b, int n)
+ *
+ * Truncate the bstring to at most n characters.
+ */
+int btrunc (bstring b, int n) {
+ if (n < 0 || b == NULL || b->data == NULL || b->mlen < b->slen ||
+ b->slen < 0 || b->mlen <= 0) return BSTR_ERR;
+ if (b->slen > n) {
+ b->slen = n;
+ b->data[n] = (unsigned char) '\0';
+ }
+ return BSTR_OK;
+}
+
+#define upcase(c) (toupper ((unsigned char) c))
+#define downcase(c) (tolower ((unsigned char) c))
+#define wspace(c) (isspace ((unsigned char) c))
+
+/* int btoupper (bstring b)
+ *
+ * Convert contents of bstring to upper case.
+ */
+int btoupper (bstring b) {
+int i, len;
+ if (b == NULL || b->data == NULL || b->mlen < b->slen ||
+ b->slen < 0 || b->mlen <= 0) return BSTR_ERR;
+ for (i=0, len = b->slen; i < len; i++) {
+ b->data[i] = (unsigned char) upcase (b->data[i]);
+ }
+ return BSTR_OK;
+}
+
+/* int btolower (bstring b)
+ *
+ * Convert contents of bstring to lower case.
+ */
+int btolower (bstring b) {
+int i, len;
+ if (b == NULL || b->data == NULL || b->mlen < b->slen ||
+ b->slen < 0 || b->mlen <= 0) return BSTR_ERR;
+ for (i=0, len = b->slen; i < len; i++) {
+ b->data[i] = (unsigned char) downcase (b->data[i]);
+ }
+ return BSTR_OK;
+}
+
+/* int bstricmp (const_bstring b0, const_bstring b1)
+ *
+ * Compare two strings without differentiating between case. The return
+ * value is the difference of the values of the characters where the two
+ * strings first differ after lower case transformation, otherwise 0 is
+ * returned indicating that the strings are equal. If the lengths are
+ * different, then a difference from 0 is given, but if the first extra
+ * character is '\0', then it is taken to be the value UCHAR_MAX+1.
+ */
+int bstricmp (const_bstring b0, const_bstring b1) {
+int i, v, n;
+
+ if (bdata (b0) == NULL || b0->slen < 0 ||
+ bdata (b1) == NULL || b1->slen < 0) return SHRT_MIN;
+ if ((n = b0->slen) > b1->slen) n = b1->slen;
+ else if (b0->slen == b1->slen && b0->data == b1->data) return BSTR_OK;
+
+ for (i = 0; i < n; i ++) {
+ v = (char) downcase (b0->data[i])
+ - (char) downcase (b1->data[i]);
+ if (0 != v) return v;
+ }
+
+ if (b0->slen > n) {
+ v = (char) downcase (b0->data[n]);
+ if (v) return v;
+ return UCHAR_MAX + 1;
+ }
+ if (b1->slen > n) {
+ v = - (char) downcase (b1->data[n]);
+ if (v) return v;
+ return - (int) (UCHAR_MAX + 1);
+ }
+ return BSTR_OK;
+}
+
+/* int bstrnicmp (const_bstring b0, const_bstring b1, int n)
+ *
+ * Compare two strings without differentiating between case for at most n
+ * characters. If the position where the two strings first differ is
+ * before the nth position, the return value is the difference of the values
+ * of the characters, otherwise 0 is returned. If the lengths are different
+ * and less than n characters, then a difference from 0 is given, but if the
+ * first extra character is '\0', then it is taken to be the value
+ * UCHAR_MAX+1.
+ */
+int bstrnicmp (const_bstring b0, const_bstring b1, int n) {
+int i, v, m;
+
+ if (bdata (b0) == NULL || b0->slen < 0 ||
+ bdata (b1) == NULL || b1->slen < 0 || n < 0) return SHRT_MIN;
+ m = n;
+ if (m > b0->slen) m = b0->slen;
+ if (m > b1->slen) m = b1->slen;
+
+ if (b0->data != b1->data) {
+ for (i = 0; i < m; i ++) {
+ v = (char) downcase (b0->data[i]);
+ v -= (char) downcase (b1->data[i]);
+ if (v != 0) return b0->data[i] - b1->data[i];
+ }
+ }
+
+ if (n == m || b0->slen == b1->slen) return BSTR_OK;
+
+ if (b0->slen > m) {
+ v = (char) downcase (b0->data[m]);
+ if (v) return v;
+ return UCHAR_MAX + 1;
+ }
+
+ v = - (char) downcase (b1->data[m]);
+ if (v) return v;
+ return - (int) (UCHAR_MAX + 1);
+}
+
+/* int biseqcaseless (const_bstring b0, const_bstring b1)
+ *
+ * Compare two strings for equality without differentiating between case.
+ * If the strings differ other than in case, 0 is returned, if the strings
+ * are the same, 1 is returned, if there is an error, -1 is returned. If
+ * the length of the strings are different, this function is O(1). '\0'
+ * termination characters are not treated in any special way.
+ */
+int biseqcaseless (const_bstring b0, const_bstring b1) {
+int i, n;
+
+ if (bdata (b0) == NULL || b0->slen < 0 ||
+ bdata (b1) == NULL || b1->slen < 0) return BSTR_ERR;
+ if (b0->slen != b1->slen) return BSTR_OK;
+ if (b0->data == b1->data || b0->slen == 0) return 1;
+ for (i=0, n=b0->slen; i < n; i++) {
+ if (b0->data[i] != b1->data[i]) {
+ unsigned char c = (unsigned char) downcase (b0->data[i]);
+ if (c != (unsigned char) downcase (b1->data[i])) return 0;
+ }
+ }
+ return 1;
+}
+
+/* int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len)
+ *
+ * Compare beginning of string b0 with a block of memory of length len
+ * without differentiating between case for equality. If the beginning of b0
+ * differs from the memory block other than in case (or if b0 is too short),
+ * 0 is returned, if the strings are the same, 1 is returned, if there is an
+ * error, -1 is returned. '\0' characters are not treated in any special
+ * way.
+ */
+int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len) {
+int i;
+
+ if (bdata (b0) == NULL || b0->slen < 0 || NULL == blk || len < 0)
+ return BSTR_ERR;
+ if (b0->slen < len) return BSTR_OK;
+ if (b0->data == (const unsigned char *) blk || len == 0) return 1;
+
+ for (i = 0; i < len; i ++) {
+ if (b0->data[i] != ((const unsigned char *) blk)[i]) {
+ if (downcase (b0->data[i]) !=
+ downcase (((const unsigned char *) blk)[i])) return 0;
+ }
+ }
+ return 1;
+}
+
+/*
+ * int bltrimws (bstring b)
+ *
+ * Delete whitespace contiguous from the left end of the string.
+ */
+int bltrimws (bstring b) {
+int i, len;
+
+ if (b == NULL || b->data == NULL || b->mlen < b->slen ||
+ b->slen < 0 || b->mlen <= 0) return BSTR_ERR;
+
+ for (len = b->slen, i = 0; i < len; i++) {
+ if (!wspace (b->data[i])) {
+ return bdelete (b, 0, i);
+ }
+ }
+
+ b->data[0] = (unsigned char) '\0';
+ b->slen = 0;
+ return BSTR_OK;
+}
+
+/*
+ * int brtrimws (bstring b)
+ *
+ * Delete whitespace contiguous from the right end of the string.
+ */
+int brtrimws (bstring b) {
+int i;
+
+ if (b == NULL || b->data == NULL || b->mlen < b->slen ||
+ b->slen < 0 || b->mlen <= 0) return BSTR_ERR;
+
+ for (i = b->slen - 1; i >= 0; i--) {
+ if (!wspace (b->data[i])) {
+ if (b->mlen > i) b->data[i+1] = (unsigned char) '\0';
+ b->slen = i + 1;
+ return BSTR_OK;
+ }
+ }
+
+ b->data[0] = (unsigned char) '\0';
+ b->slen = 0;
+ return BSTR_OK;
+}
+
+/*
+ * int btrimws (bstring b)
+ *
+ * Delete whitespace contiguous from both ends of the string.
+ */
+int btrimws (bstring b) {
+int i, j;
+
+ if (b == NULL || b->data == NULL || b->mlen < b->slen ||
+ b->slen < 0 || b->mlen <= 0) return BSTR_ERR;
+
+ for (i = b->slen - 1; i >= 0; i--) {
+ if (!wspace (b->data[i])) {
+ if (b->mlen > i) b->data[i+1] = (unsigned char) '\0';
+ b->slen = i + 1;
+ for (j = 0; wspace (b->data[j]); j++) {}
+ return bdelete (b, 0, j);
+ }
+ }
+
+ b->data[0] = (unsigned char) '\0';
+ b->slen = 0;
+ return BSTR_OK;
+}
+
+/* int biseq (const_bstring b0, const_bstring b1)
+ *
+ * Compare the string b0 and b1. If the strings differ, 0 is returned, if
+ * the strings are the same, 1 is returned, if there is an error, -1 is
+ * returned. If the length of the strings are different, this function is
+ * O(1). '\0' termination characters are not treated in any special way.
+ */
+int biseq (const_bstring b0, const_bstring b1) {
+ if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL ||
+ b0->slen < 0 || b1->slen < 0) return BSTR_ERR;
+ if (b0->slen != b1->slen) return BSTR_OK;
+ if (b0->data == b1->data || b0->slen == 0) return 1;
+ return !bstr__memcmp (b0->data, b1->data, b0->slen);
+}
+
+/* int bisstemeqblk (const_bstring b0, const void * blk, int len)
+ *
+ * Compare beginning of string b0 with a block of memory of length len for
+ * equality. If the beginning of b0 differs from the memory block (or if b0
+ * is too short), 0 is returned, if the strings are the same, 1 is returned,
+ * if there is an error, -1 is returned. '\0' characters are not treated in
+ * any special way.
+ */
+int bisstemeqblk (const_bstring b0, const void * blk, int len) {
+int i;
+
+ if (bdata (b0) == NULL || b0->slen < 0 || NULL == blk || len < 0)
+ return BSTR_ERR;
+ if (b0->slen < len) return BSTR_OK;
+ if (b0->data == (const unsigned char *) blk || len == 0) return 1;
+
+ for (i = 0; i < len; i ++) {
+ if (b0->data[i] != ((const unsigned char *) blk)[i]) return BSTR_OK;
+ }
+ return 1;
+}
+
+/* int biseqcstr (const_bstring b, const char *s)
+ *
+ * Compare the bstring b and char * string s. The C string s must be '\0'
+ * terminated at exactly the length of the bstring b, and the contents
+ * between the two must be identical with the bstring b with no '\0'
+ * characters for the two contents to be considered equal. This is
+ * equivalent to the condition that their current contents will be always be
+ * equal when comparing them in the same format after converting one or the
+ * other. If the strings are equal 1 is returned, if they are unequal 0 is
+ * returned and if there is a detectable error BSTR_ERR is returned.
+ */
+int biseqcstr (const_bstring b, const char * s) {
+int i;
+ if (b == NULL || s == NULL || b->data == NULL || b->slen < 0) return BSTR_ERR;
+ for (i=0; i < b->slen; i++) {
+ if (s[i] == '\0' || b->data[i] != (unsigned char) s[i]) return BSTR_OK;
+ }
+ return s[i] == '\0';
+}
+
+/* int biseqcstrcaseless (const_bstring b, const char *s)
+ *
+ * Compare the bstring b and char * string s. The C string s must be '\0'
+ * terminated at exactly the length of the bstring b, and the contents
+ * between the two must be identical except for case with the bstring b with
+ * no '\0' characters for the two contents to be considered equal. This is
+ * equivalent to the condition that their current contents will be always be
+ * equal ignoring case when comparing them in the same format after
+ * converting one or the other. If the strings are equal, except for case,
+ * 1 is returned, if they are unequal regardless of case 0 is returned and
+ * if there is a detectable error BSTR_ERR is returned.
+ */
+int biseqcstrcaseless (const_bstring b, const char * s) {
+int i;
+ if (b == NULL || s == NULL || b->data == NULL || b->slen < 0) return BSTR_ERR;
+ for (i=0; i < b->slen; i++) {
+ if (s[i] == '\0' ||
+ (b->data[i] != (unsigned char) s[i] &&
+ downcase (b->data[i]) != (unsigned char) downcase (s[i])))
+ return BSTR_OK;
+ }
+ return s[i] == '\0';
+}
+
+/* int bstrcmp (const_bstring b0, const_bstring b1)
+ *
+ * Compare the string b0 and b1. If there is an error, SHRT_MIN is returned,
+ * otherwise a value less than or greater than zero, indicating that the
+ * string pointed to by b0 is lexicographically less than or greater than
+ * the string pointed to by b1 is returned. If the the string lengths are
+ * unequal but the characters up until the length of the shorter are equal
+ * then a value less than, or greater than zero, indicating that the string
+ * pointed to by b0 is shorter or longer than the string pointed to by b1 is
+ * returned. 0 is returned if and only if the two strings are the same. If
+ * the length of the strings are different, this function is O(n). Like its
+ * standard C library counter part strcmp, the comparison does not proceed
+ * past any '\0' termination characters encountered.
+ */
+int bstrcmp (const_bstring b0, const_bstring b1) {
+int i, v, n;
+
+ if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL ||
+ b0->slen < 0 || b1->slen < 0) return SHRT_MIN;
+ n = b0->slen; if (n > b1->slen) n = b1->slen;
+ if (b0->slen == b1->slen && (b0->data == b1->data || b0->slen == 0))
+ return BSTR_OK;
+
+ for (i = 0; i < n; i ++) {
+ v = ((char) b0->data[i]) - ((char) b1->data[i]);
+ if (v != 0) return v;
+ if (b0->data[i] == (unsigned char) '\0') return BSTR_OK;
+ }
+
+ if (b0->slen > n) return 1;
+ if (b1->slen > n) return -1;
+ return BSTR_OK;
+}
+
+/* int bstrncmp (const_bstring b0, const_bstring b1, int n)
+ *
+ * Compare the string b0 and b1 for at most n characters. If there is an
+ * error, SHRT_MIN is returned, otherwise a value is returned as if b0 and
+ * b1 were first truncated to at most n characters then bstrcmp was called
+ * with these new strings are paremeters. If the length of the strings are
+ * different, this function is O(n). Like its standard C library counter
+ * part strcmp, the comparison does not proceed past any '\0' termination
+ * characters encountered.
+ */
+int bstrncmp (const_bstring b0, const_bstring b1, int n) {
+int i, v, m;
+
+ if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL ||
+ b0->slen < 0 || b1->slen < 0) return SHRT_MIN;
+ m = n;
+ if (m > b0->slen) m = b0->slen;
+ if (m > b1->slen) m = b1->slen;
+
+ if (b0->data != b1->data) {
+ for (i = 0; i < m; i ++) {
+ v = ((char) b0->data[i]) - ((char) b1->data[i]);
+ if (v != 0) return v;
+ if (b0->data[i] == (unsigned char) '\0') return BSTR_OK;
+ }
+ }
+
+ if (n == m || b0->slen == b1->slen) return BSTR_OK;
+
+ if (b0->slen > m) return 1;
+ return -1;
+}
+
+/* bstring bmidstr (const_bstring b, int left, int len)
+ *
+ * Create a bstring which is the substring of b starting from position left
+ * and running for a length len (clamped by the end of the bstring b.) If
+ * b is detectably invalid, then NULL is returned. The section described
+ * by (left, len) is clamped to the boundaries of b.
+ */
+bstring bmidstr (const_bstring b, int left, int len) {
+
+ if (b == NULL || b->slen < 0 || b->data == NULL) return NULL;
+
+ if (left < 0) {
+ len += left;
+ left = 0;
+ }
+
+ if (len > b->slen - left) len = b->slen - left;
+
+ if (len <= 0) return bfromcstr ("");
+ return blk2bstr (b->data + left, len);
+}
+
+/* int bdelete (bstring b, int pos, int len)
+ *
+ * Removes characters from pos to pos+len-1 inclusive and shifts the tail of
+ * the bstring starting from pos+len to pos. len must be positive for this
+ * call to have any effect. The section of the string described by (pos,
+ * len) is clamped to boundaries of the bstring b.
+ */
+int bdelete (bstring b, int pos, int len) {
+ /* Clamp to left side of bstring */
+ if (pos < 0) {
+ len += pos;
+ pos = 0;
+ }
+
+ if (len < 0 || b == NULL || b->data == NULL || b->slen < 0 ||
+ b->mlen < b->slen || b->mlen <= 0)
+ return BSTR_ERR;
+ if (len > 0 && pos < b->slen) {
+ if (pos + len >= b->slen) {
+ b->slen = pos;
+ } else {
+ bBlockCopy ((char *) (b->data + pos),
+ (char *) (b->data + pos + len),
+ b->slen - (pos+len));
+ b->slen -= len;
+ }
+ b->data[b->slen] = (unsigned char) '\0';
+ }
+ return BSTR_OK;
+}
+
+/* int bdestroy (bstring b)
+ *
+ * Free up the bstring. Note that if b is detectably invalid or not writable
+ * then no action is performed and BSTR_ERR is returned. Like a freed memory
+ * allocation, dereferences, writes or any other action on b after it has
+ * been bdestroyed is undefined.
+ */
+int bdestroy (bstring b) {
+ if (b == NULL || b->slen < 0 || b->mlen <= 0 || b->mlen < b->slen ||
+ b->data == NULL)
+ return BSTR_ERR;
+
+ bstr__free (b->data);
+
+ /* In case there is any stale usage, there is one more chance to
+ notice this error. */
+
+ b->slen = -1;
+ b->mlen = -__LINE__;
+ b->data = NULL;
+
+ bstr__free (b);
+ return BSTR_OK;
+}
+
+/* int binstr (const_bstring b1, int pos, const_bstring b2)
+ *
+ * Search for the bstring b2 in b1 starting from position pos, and searching
+ * forward. If it is found then return with the first position where it is
+ * found, otherwise return BSTR_ERR. Note that this is just a brute force
+ * string searcher that does not attempt clever things like the Boyer-Moore
+ * search algorithm. Because of this there are many degenerate cases where
+ * this can take much longer than it needs to.
+ */
+int binstr (const_bstring b1, int pos, const_bstring b2) {
+int j, ii, ll, lf;
+unsigned char * d0;
+unsigned char c0;
+register unsigned char * d1;
+register unsigned char c1;
+register int i;
+
+ if (b1 == NULL || b1->data == NULL || b1->slen < 0 ||
+ b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR;
+ if (b1->slen == pos) return (b2->slen == 0)?pos:BSTR_ERR;
+ if (b1->slen < pos || pos < 0) return BSTR_ERR;
+ if (b2->slen == 0) return pos;
+
+ /* No space to find such a string? */
+ if ((lf = b1->slen - b2->slen + 1) <= pos) return BSTR_ERR;
+
+ /* An obvious alias case */
+ if (b1->data == b2->data && pos == 0) return 0;
+
+ i = pos;
+
+ d0 = b2->data;
+ d1 = b1->data;
+ ll = b2->slen;
+
+ /* Peel off the b2->slen == 1 case */
+ c0 = d0[0];
+ if (1 == ll) {
+ for (;i < lf; i++) if (c0 == d1[i]) return i;
+ return BSTR_ERR;
+ }
+
+ c1 = c0;
+ j = 0;
+ lf = b1->slen - 1;
+
+ ii = -1;
+ if (i < lf) do {
+ /* Unrolled current character test */
+ if (c1 != d1[i]) {
+ if (c1 != d1[1+i]) {
+ i += 2;
+ continue;
+ }
+ i++;
+ }
+
+ /* Take note if this is the start of a potential match */
+ if (0 == j) ii = i;
+
+ /* Shift the test character down by one */
+ j++;
+ i++;
+
+ /* If this isn't past the last character continue */
+ if (j < ll) {
+ c1 = d0[j];
+ continue;
+ }
+
+ N0:;
+
+ /* If no characters mismatched, then we matched */
+ if (i == ii+j) return ii;
+
+ /* Shift back to the beginning */
+ i -= j;
+ j = 0;
+ c1 = c0;
+ } while (i < lf);
+
+ /* Deal with last case if unrolling caused a misalignment */
+ if (i == lf && ll == j+1 && c1 == d1[i]) goto N0;
+
+ return BSTR_ERR;
+}
+
+/* int binstrr (const_bstring b1, int pos, const_bstring b2)
+ *
+ * Search for the bstring b2 in b1 starting from position pos, and searching
+ * backward. If it is found then return with the first position where it is
+ * found, otherwise return BSTR_ERR. Note that this is just a brute force
+ * string searcher that does not attempt clever things like the Boyer-Moore
+ * search algorithm. Because of this there are many degenerate cases where
+ * this can take much longer than it needs to.
+ */
+int binstrr (const_bstring b1, int pos, const_bstring b2) {
+int j, i, l;
+unsigned char * d0, * d1;
+
+ if (b1 == NULL || b1->data == NULL || b1->slen < 0 ||
+ b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR;
+ if (b1->slen == pos && b2->slen == 0) return pos;
+ if (b1->slen < pos || pos < 0) return BSTR_ERR;
+ if (b2->slen == 0) return pos;
+
+ /* Obvious alias case */
+ if (b1->data == b2->data && pos == 0 && b2->slen <= b1->slen) return 0;
+
+ i = pos;
+ if ((l = b1->slen - b2->slen) < 0) return BSTR_ERR;
+
+ /* If no space to find such a string then snap back */
+ if (l + 1 <= i) i = l;
+ j = 0;
+
+ d0 = b2->data;
+ d1 = b1->data;
+ l = b2->slen;
+
+ for (;;) {
+ if (d0[j] == d1[i + j]) {
+ j ++;
+ if (j >= l) return i;
+ } else {
+ i --;
+ if (i < 0) break;
+ j=0;
+ }
+ }
+
+ return BSTR_ERR;
+}
+
+/* int binstrcaseless (const_bstring b1, int pos, const_bstring b2)
+ *
+ * Search for the bstring b2 in b1 starting from position pos, and searching
+ * forward but without regard to case. If it is found then return with the
+ * first position where it is found, otherwise return BSTR_ERR. Note that
+ * this is just a brute force string searcher that does not attempt clever
+ * things like the Boyer-Moore search algorithm. Because of this there are
+ * many degenerate cases where this can take much longer than it needs to.
+ */
+int binstrcaseless (const_bstring b1, int pos, const_bstring b2) {
+int j, i, l, ll;
+unsigned char * d0, * d1;
+
+ if (b1 == NULL || b1->data == NULL || b1->slen < 0 ||
+ b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR;
+ if (b1->slen == pos) return (b2->slen == 0)?pos:BSTR_ERR;
+ if (b1->slen < pos || pos < 0) return BSTR_ERR;
+ if (b2->slen == 0) return pos;
+
+ l = b1->slen - b2->slen + 1;
+
+ /* No space to find such a string? */
+ if (l <= pos) return BSTR_ERR;
+
+ /* An obvious alias case */
+ if (b1->data == b2->data && pos == 0) return BSTR_OK;
+
+ i = pos;
+ j = 0;
+
+ d0 = b2->data;
+ d1 = b1->data;
+ ll = b2->slen;
+
+ for (;;) {
+ if (d0[j] == d1[i + j] || downcase (d0[j]) == downcase (d1[i + j])) {
+ j ++;
+ if (j >= ll) return i;
+ } else {
+ i ++;
+ if (i >= l) break;
+ j=0;
+ }
+ }
+
+ return BSTR_ERR;
+}
+
+/* int binstrrcaseless (const_bstring b1, int pos, const_bstring b2)
+ *
+ * Search for the bstring b2 in b1 starting from position pos, and searching
+ * backward but without regard to case. If it is found then return with the
+ * first position where it is found, otherwise return BSTR_ERR. Note that
+ * this is just a brute force string searcher that does not attempt clever
+ * things like the Boyer-Moore search algorithm. Because of this there are
+ * many degenerate cases where this can take much longer than it needs to.
+ */
+int binstrrcaseless (const_bstring b1, int pos, const_bstring b2) {
+int j, i, l;
+unsigned char * d0, * d1;
+
+ if (b1 == NULL || b1->data == NULL || b1->slen < 0 ||
+ b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR;
+ if (b1->slen == pos && b2->slen == 0) return pos;
+ if (b1->slen < pos || pos < 0) return BSTR_ERR;
+ if (b2->slen == 0) return pos;
+
+ /* Obvious alias case */
+ if (b1->data == b2->data && pos == 0 && b2->slen <= b1->slen) return BSTR_OK;
+
+ i = pos;
+ if ((l = b1->slen - b2->slen) < 0) return BSTR_ERR;
+
+ /* If no space to find such a string then snap back */
+ if (l + 1 <= i) i = l;
+ j = 0;
+
+ d0 = b2->data;
+ d1 = b1->data;
+ l = b2->slen;
+
+ for (;;) {
+ if (d0[j] == d1[i + j] || downcase (d0[j]) == downcase (d1[i + j])) {
+ j ++;
+ if (j >= l) return i;
+ } else {
+ i --;
+ if (i < 0) break;
+ j=0;
+ }
+ }
+
+ return BSTR_ERR;
+}
+
+
+/* int bstrchrp (const_bstring b, int c, int pos)
+ *
+ * Search for the character c in b forwards from the position pos
+ * (inclusive).
+ */
+int bstrchrp (const_bstring b, int c, int pos) {
+unsigned char * p;
+
+ if (b == NULL || b->data == NULL || b->slen <= pos || pos < 0) return BSTR_ERR;
+ p = (unsigned char *) bstr__memchr ((b->data + pos), (unsigned char) c, (b->slen - pos));
+ if (p) return (int) (p - b->data);
+ return BSTR_ERR;
+}
+
+/* int bstrrchrp (const_bstring b, int c, int pos)
+ *
+ * Search for the character c in b backwards from the position pos in string
+ * (inclusive).
+ */
+int bstrrchrp (const_bstring b, int c, int pos) {
+int i;
+
+ if (b == NULL || b->data == NULL || b->slen <= pos || pos < 0) return BSTR_ERR;
+ for (i=pos; i >= 0; i--) {
+ if (b->data[i] == (unsigned char) c) return i;
+ }
+ return BSTR_ERR;
+}
+
+#if !defined (BSTRLIB_AGGRESSIVE_MEMORY_FOR_SPEED_TRADEOFF)
+#define LONG_LOG_BITS_QTY (3)
+#define LONG_BITS_QTY (1 << LONG_LOG_BITS_QTY)
+#define LONG_TYPE unsigned char
+
+#define CFCLEN ((1 << CHAR_BIT) / LONG_BITS_QTY)
+struct charField { LONG_TYPE content[CFCLEN]; };
+#define testInCharField(cf,c) ((cf)->content[(c) >> LONG_LOG_BITS_QTY] & (((long)1) << ((c) & (LONG_BITS_QTY-1))))
+#define setInCharField(cf,idx) { \
+ unsigned int c = (unsigned int) (idx); \
+ (cf)->content[c >> LONG_LOG_BITS_QTY] |= (LONG_TYPE) (1ul << (c & (LONG_BITS_QTY-1))); \
+}
+
+#else
+
+#define CFCLEN (1 << CHAR_BIT)
+struct charField { unsigned char content[CFCLEN]; };
+#define testInCharField(cf,c) ((cf)->content[(unsigned char) (c)])
+#define setInCharField(cf,idx) (cf)->content[(unsigned int) (idx)] = ~0
+
+#endif
+
+/* Convert a bstring to charField */
+static int buildCharField (struct charField * cf, const_bstring b) {
+int i;
+ if (b == NULL || b->data == NULL || b->slen <= 0) return BSTR_ERR;
+ memset ((void *) cf->content, 0, sizeof (struct charField));
+ for (i=0; i < b->slen; i++) {
+ setInCharField (cf, b->data[i]);
+ }
+ return BSTR_OK;
+}
+
+static void invertCharField (struct charField * cf) {
+int i;
+ for (i=0; i < CFCLEN; i++) cf->content[i] = ~cf->content[i];
+}
+
+/* Inner engine for binchr */
+static int binchrCF (const unsigned char * data, int len, int pos, const struct charField * cf) {
+int i;
+ for (i=pos; i < len; i++) {
+ unsigned char c = (unsigned char) data[i];
+ if (testInCharField (cf, c)) return i;
+ }
+ return BSTR_ERR;
+}
+
+/* int binchr (const_bstring b0, int pos, const_bstring b1);
+ *
+ * Search for the first position in b0 starting from pos or after, in which
+ * one of the characters in b1 is found and return it. If such a position
+ * does not exist in b0, then BSTR_ERR is returned.
+ */
+int binchr (const_bstring b0, int pos, const_bstring b1) {
+struct charField chrs;
+ if (pos < 0 || b0 == NULL || b0->data == NULL ||
+ b0->slen <= pos) return BSTR_ERR;
+ if (1 == b1->slen) return bstrchrp (b0, b1->data[0], pos);
+ if (0 > buildCharField (&chrs, b1)) return BSTR_ERR;
+ return binchrCF (b0->data, b0->slen, pos, &chrs);
+}
+
+/* Inner engine for binchrr */
+static int binchrrCF (const unsigned char * data, int pos, const struct charField * cf) {
+int i;
+ for (i=pos; i >= 0; i--) {
+ unsigned int c = (unsigned int) data[i];
+ if (testInCharField (cf, c)) return i;
+ }
+ return BSTR_ERR;
+}
+
+/* int binchrr (const_bstring b0, int pos, const_bstring b1);
+ *
+ * Search for the last position in b0 no greater than pos, in which one of
+ * the characters in b1 is found and return it. If such a position does not
+ * exist in b0, then BSTR_ERR is returned.
+ */
+int binchrr (const_bstring b0, int pos, const_bstring b1) {
+struct charField chrs;
+ if (pos < 0 || b0 == NULL || b0->data == NULL || b1 == NULL ||
+ b0->slen < pos) return BSTR_ERR;
+ if (pos == b0->slen) pos--;
+ if (1 == b1->slen) return bstrrchrp (b0, b1->data[0], pos);
+ if (0 > buildCharField (&chrs, b1)) return BSTR_ERR;
+ return binchrrCF (b0->data, pos, &chrs);
+}
+
+/* int bninchr (const_bstring b0, int pos, const_bstring b1);
+ *
+ * Search for the first position in b0 starting from pos or after, in which
+ * none of the characters in b1 is found and return it. If such a position
+ * does not exist in b0, then BSTR_ERR is returned.
+ */
+int bninchr (const_bstring b0, int pos, const_bstring b1) {
+struct charField chrs;
+ if (pos < 0 || b0 == NULL || b0->data == NULL ||
+ b0->slen <= pos) return BSTR_ERR;
+ if (buildCharField (&chrs, b1) < 0) return BSTR_ERR;
+ invertCharField (&chrs);
+ return binchrCF (b0->data, b0->slen, pos, &chrs);
+}
+
+/* int bninchrr (const_bstring b0, int pos, const_bstring b1);
+ *
+ * Search for the last position in b0 no greater than pos, in which none of
+ * the characters in b1 is found and return it. If such a position does not
+ * exist in b0, then BSTR_ERR is returned.
+ */
+int bninchrr (const_bstring b0, int pos, const_bstring b1) {
+struct charField chrs;
+ if (pos < 0 || b0 == NULL || b0->data == NULL ||
+ b0->slen < pos) return BSTR_ERR;
+ if (pos == b0->slen) pos--;
+ if (buildCharField (&chrs, b1) < 0) return BSTR_ERR;
+ invertCharField (&chrs);
+ return binchrrCF (b0->data, pos, &chrs);
+}
+
+/* int bsetstr (bstring b0, int pos, bstring b1, unsigned char fill)
+ *
+ * Overwrite the string b0 starting at position pos with the string b1. If
+ * the position pos is past the end of b0, then the character "fill" is
+ * appended as necessary to make up the gap between the end of b0 and pos.
+ * If b1 is NULL, it behaves as if it were a 0-length string.
+ */
+int bsetstr (bstring b0, int pos, const_bstring b1, unsigned char fill) {
+int d, newlen;
+ptrdiff_t pd;
+bstring aux = (bstring) b1;
+
+ if (pos < 0 || b0 == NULL || b0->slen < 0 || NULL == b0->data ||
+ b0->mlen < b0->slen || b0->mlen <= 0) return BSTR_ERR;
+ if (b1 != NULL && (b1->slen < 0 || b1->data == NULL)) return BSTR_ERR;
+
+ d = pos;
+
+ /* Aliasing case */
+ if (NULL != aux) {
+ if ((pd = (ptrdiff_t) (b1->data - b0->data)) >= 0 && pd < (ptrdiff_t) b0->mlen) {
+ if (NULL == (aux = bstrcpy (b1))) return BSTR_ERR;
+ }
+ d += aux->slen;
+ }
+
+ /* Increase memory size if necessary */
+ if (balloc (b0, d + 1) != BSTR_OK) {
+ if (aux != b1) bdestroy (aux);
+ return BSTR_ERR;
+ }
+
+ newlen = b0->slen;
+
+ /* Fill in "fill" character as necessary */
+ if (pos > newlen) {
+ bstr__memset (b0->data + b0->slen, (int) fill, (size_t) (pos - b0->slen));
+ newlen = pos;
+ }
+
+ /* Copy b1 to position pos in b0. */
+ if (aux != NULL) {
+ bBlockCopy ((char *) (b0->data + pos), (char *) aux->data, aux->slen);
+ if (aux != b1) bdestroy (aux);
+ }
+
+ /* Indicate the potentially increased size of b0 */
+ if (d > newlen) newlen = d;
+
+ b0->slen = newlen;
+ b0->data[newlen] = (unsigned char) '\0';
+
+ return BSTR_OK;
+}
+
+/* int binsert (bstring b1, int pos, bstring b2, unsigned char fill)
+ *
+ * Inserts the string b2 into b1 at position pos. If the position pos is
+ * past the end of b1, then the character "fill" is appended as necessary to
+ * make up the gap between the end of b1 and pos. Unlike bsetstr, binsert
+ * does not allow b2 to be NULL.
+ */
+int binsert (bstring b1, int pos, const_bstring b2, unsigned char fill) {
+int d, l;
+ptrdiff_t pd;
+bstring aux = (bstring) b2;
+
+ if (pos < 0 || b1 == NULL || b2 == NULL || b1->slen < 0 ||
+ b2->slen < 0 || b1->mlen < b1->slen || b1->mlen <= 0) return BSTR_ERR;
+
+ /* Aliasing case */
+ if ((pd = (ptrdiff_t) (b2->data - b1->data)) >= 0 && pd < (ptrdiff_t) b1->mlen) {
+ if (NULL == (aux = bstrcpy (b2))) return BSTR_ERR;
+ }
+
+ /* Compute the two possible end pointers */
+ d = b1->slen + aux->slen;
+ l = pos + aux->slen;
+ if ((d|l) < 0) return BSTR_ERR;
+
+ if (l > d) {
+ /* Inserting past the end of the string */
+ if (balloc (b1, l + 1) != BSTR_OK) {
+ if (aux != b2) bdestroy (aux);
+ return BSTR_ERR;
+ }
+ bstr__memset (b1->data + b1->slen, (int) fill, (size_t) (pos - b1->slen));
+ b1->slen = l;
+ } else {
+ /* Inserting in the middle of the string */
+ if (balloc (b1, d + 1) != BSTR_OK) {
+ if (aux != b2) bdestroy (aux);
+ return BSTR_ERR;
+ }
+ bBlockCopy (b1->data + l, b1->data + pos, d - l);
+ b1->slen = d;
+ }
+ bBlockCopy (b1->data + pos, aux->data, aux->slen);
+ b1->data[b1->slen] = (unsigned char) '\0';
+ if (aux != b2) bdestroy (aux);
+ return BSTR_OK;
+}
+
+/* int breplace (bstring b1, int pos, int len, bstring b2,
+ * unsigned char fill)
+ *
+ * Replace a section of a string from pos for a length len with the string b2.
+ * fill is used is pos > b1->slen.
+ */
+int breplace (bstring b1, int pos, int len, const_bstring b2,
+ unsigned char fill) {
+int pl, ret;
+ptrdiff_t pd;
+bstring aux = (bstring) b2;
+
+ if (pos < 0 || len < 0 || (pl = pos + len) < 0 || b1 == NULL ||
+ b2 == NULL || b1->data == NULL || b2->data == NULL ||
+ b1->slen < 0 || b2->slen < 0 || b1->mlen < b1->slen ||
+ b1->mlen <= 0) return BSTR_ERR;
+
+ /* Straddles the end? */
+ if (pl >= b1->slen) {
+ if ((ret = bsetstr (b1, pos, b2, fill)) < 0) return ret;
+ if (pos + b2->slen < b1->slen) {
+ b1->slen = pos + b2->slen;
+ b1->data[b1->slen] = (unsigned char) '\0';
+ }
+ return ret;
+ }
+
+ /* Aliasing case */
+ if ((pd = (ptrdiff_t) (b2->data - b1->data)) >= 0 && pd < (ptrdiff_t) b1->slen) {
+ if (NULL == (aux = bstrcpy (b2))) return BSTR_ERR;
+ }
+
+ if (aux->slen > len) {
+ if (balloc (b1, b1->slen + aux->slen - len) != BSTR_OK) {
+ if (aux != b2) bdestroy (aux);
+ return BSTR_ERR;
+ }
+ }
+
+ if (aux->slen != len) bstr__memmove (b1->data + pos + aux->slen, b1->data + pos + len, b1->slen - (pos + len));
+ bstr__memcpy (b1->data + pos, aux->data, aux->slen);
+ b1->slen += aux->slen - len;
+ b1->data[b1->slen] = (unsigned char) '\0';
+ if (aux != b2) bdestroy (aux);
+ return BSTR_OK;
+}
+
+/*
+ * findreplaceengine is used to implement bfindreplace and
+ * bfindreplacecaseless. It works by breaking the three cases of
+ * expansion, reduction and replacement, and solving each of these
+ * in the most efficient way possible.
+ */
+
+typedef int (*instr_fnptr) (const_bstring s1, int pos, const_bstring s2);
+
+#define INITIAL_STATIC_FIND_INDEX_COUNT 32
+
+static int findreplaceengine (bstring b, const_bstring find, const_bstring repl, int pos, instr_fnptr instr) {
+int i, ret, slen, mlen, delta, acc;
+int * d;
+int static_d[INITIAL_STATIC_FIND_INDEX_COUNT+1]; /* This +1 is unnecessary, but it shuts up LINT. */
+ptrdiff_t pd;
+bstring auxf = (bstring) find;
+bstring auxr = (bstring) repl;
+
+ if (b == NULL || b->data == NULL || find == NULL ||
+ find->data == NULL || repl == NULL || repl->data == NULL ||
+ pos < 0 || find->slen <= 0 || b->mlen < 0 || b->slen > b->mlen ||
+ b->mlen <= 0 || b->slen < 0 || repl->slen < 0) return BSTR_ERR;
+ if (pos > b->slen - find->slen) return BSTR_OK;
+
+ /* Alias with find string */
+ pd = (ptrdiff_t) (find->data - b->data);
+ if ((ptrdiff_t) (pos - find->slen) < pd && pd < (ptrdiff_t) b->slen) {
+ if (NULL == (auxf = bstrcpy (find))) return BSTR_ERR;
+ }
+
+ /* Alias with repl string */
+ pd = (ptrdiff_t) (repl->data - b->data);
+ if ((ptrdiff_t) (pos - repl->slen) < pd && pd < (ptrdiff_t) b->slen) {
+ if (NULL == (auxr = bstrcpy (repl))) {
+ if (auxf != find) bdestroy (auxf);
+ return BSTR_ERR;
+ }
+ }
+
+ delta = auxf->slen - auxr->slen;
+
+ /* in-place replacement since find and replace strings are of equal
+ length */
+ if (delta == 0) {
+ while ((pos = instr (b, pos, auxf)) >= 0) {
+ bstr__memcpy (b->data + pos, auxr->data, auxr->slen);
+ pos += auxf->slen;
+ }
+ if (auxf != find) bdestroy (auxf);
+ if (auxr != repl) bdestroy (auxr);
+ return BSTR_OK;
+ }
+
+ /* shrinking replacement since auxf->slen > auxr->slen */
+ if (delta > 0) {
+ acc = 0;
+
+ while ((i = instr (b, pos, auxf)) >= 0) {
+ if (acc && i > pos)
+ bstr__memmove (b->data + pos - acc, b->data + pos, i - pos);
+ if (auxr->slen)
+ bstr__memcpy (b->data + i - acc, auxr->data, auxr->slen);
+ acc += delta;
+ pos = i + auxf->slen;
+ }
+
+ if (acc) {
+ i = b->slen;
+ if (i > pos)
+ bstr__memmove (b->data + pos - acc, b->data + pos, i - pos);
+ b->slen -= acc;
+ b->data[b->slen] = (unsigned char) '\0';
+ }
+
+ if (auxf != find) bdestroy (auxf);
+ if (auxr != repl) bdestroy (auxr);
+ return BSTR_OK;
+ }
+
+ /* expanding replacement since find->slen < repl->slen. Its a lot
+ more complicated. This works by first finding all the matches and
+ storing them to a growable array, then doing at most one resize of
+ the destination bstring and then performing the direct memory transfers
+ of the string segment pieces to form the final result. The growable
+ array of matches uses a deferred doubling reallocing strategy. What
+ this means is that it starts as a reasonably fixed sized auto array in
+ the hopes that many if not most cases will never need to grow this
+ array. But it switches as soon as the bounds of the array will be
+ exceeded. An extra find result is always appended to this array that
+ corresponds to the end of the destination string, so slen is checked
+ against mlen - 1 rather than mlen before resizing.
+ */
+
+ mlen = INITIAL_STATIC_FIND_INDEX_COUNT;
+ d = (int *) static_d; /* Avoid malloc for trivial/initial cases */
+ acc = slen = 0;
+
+ while ((pos = instr (b, pos, auxf)) >= 0) {
+ if (slen >= mlen - 1) {
+ int sl, *t;
+
+ mlen += mlen;
+ sl = sizeof (int *) * mlen;
+ if (static_d == d) d = NULL; /* static_d cannot be realloced */
+ if (mlen <= 0 || sl < mlen || NULL == (t = (int *) bstr__realloc (d, sl))) {
+ ret = BSTR_ERR;
+ goto done;
+ }
+ if (NULL == d) bstr__memcpy (t, static_d, sizeof (static_d));
+ d = t;
+ }
+ d[slen] = pos;
+ slen++;
+ acc -= delta;
+ pos += auxf->slen;
+ if (pos < 0 || acc < 0) {
+ ret = BSTR_ERR;
+ goto done;
+ }
+ }
+
+ /* slen <= INITIAL_STATIC_INDEX_COUNT-1 or mlen-1 here. */
+ d[slen] = b->slen;
+
+ if (BSTR_OK == (ret = balloc (b, b->slen + acc + 1))) {
+ b->slen += acc;
+ for (i = slen-1; i >= 0; i--) {
+ int s, l;
+ s = d[i] + auxf->slen;
+ l = d[i+1] - s; /* d[slen] may be accessed here. */
+ if (l) {
+ bstr__memmove (b->data + s + acc, b->data + s, l);
+ }
+ if (auxr->slen) {
+ bstr__memmove (b->data + s + acc - auxr->slen,
+ auxr->data, auxr->slen);
+ }
+ acc += delta;
+ }
+ b->data[b->slen] = (unsigned char) '\0';
+ }
+
+ done:;
+ if (static_d == d) d = NULL;
+ bstr__free (d);
+ if (auxf != find) bdestroy (auxf);
+ if (auxr != repl) bdestroy (auxr);
+ return ret;
+}
+
+/* int bfindreplace (bstring b, const_bstring find, const_bstring repl,
+ * int pos)
+ *
+ * Replace all occurrences of a find string with a replace string after a
+ * given point in a bstring.
+ */
+int bfindreplace (bstring b, const_bstring find, const_bstring repl, int pos) {
+ return findreplaceengine (b, find, repl, pos, binstr);
+}
+
+/* int bfindreplacecaseless (bstring b, const_bstring find, const_bstring repl,
+ * int pos)
+ *
+ * Replace all occurrences of a find string, ignoring case, with a replace
+ * string after a given point in a bstring.
+ */
+int bfindreplacecaseless (bstring b, const_bstring find, const_bstring repl, int pos) {
+ return findreplaceengine (b, find, repl, pos, binstrcaseless);
+}
+
+/* int binsertch (bstring b, int pos, int len, unsigned char fill)
+ *
+ * Inserts the character fill repeatedly into b at position pos for a
+ * length len. If the position pos is past the end of b, then the
+ * character "fill" is appended as necessary to make up the gap between the
+ * end of b and the position pos + len.
+ */
+int binsertch (bstring b, int pos, int len, unsigned char fill) {
+int d, l, i;
+
+ if (pos < 0 || b == NULL || b->slen < 0 || b->mlen < b->slen ||
+ b->mlen <= 0 || len < 0) return BSTR_ERR;
+
+ /* Compute the two possible end pointers */
+ d = b->slen + len;
+ l = pos + len;
+ if ((d|l) < 0) return BSTR_ERR;
+
+ if (l > d) {
+ /* Inserting past the end of the string */
+ if (balloc (b, l + 1) != BSTR_OK) return BSTR_ERR;
+ pos = b->slen;
+ b->slen = l;
+ } else {
+ /* Inserting in the middle of the string */
+ if (balloc (b, d + 1) != BSTR_OK) return BSTR_ERR;
+ for (i = d - 1; i >= l; i--) {
+ b->data[i] = b->data[i - len];
+ }
+ b->slen = d;
+ }
+
+ for (i=pos; i < l; i++) b->data[i] = fill;
+ b->data[b->slen] = (unsigned char) '\0';
+ return BSTR_OK;
+}
+
+/* int bpattern (bstring b, int len)
+ *
+ * Replicate the bstring, b in place, end to end repeatedly until it
+ * surpasses len characters, then chop the result to exactly len characters.
+ * This function operates in-place. The function will return with BSTR_ERR
+ * if b is NULL or of length 0, otherwise BSTR_OK is returned.
+ */
+int bpattern (bstring b, int len) {
+int i, d;
+
+ d = blength (b);
+ if (d <= 0 || len < 0 || balloc (b, len + 1) != BSTR_OK) return BSTR_ERR;
+ if (len > 0) {
+ if (d == 1) return bsetstr (b, len, NULL, b->data[0]);
+ for (i = d; i < len; i++) b->data[i] = b->data[i - d];
+ }
+ b->data[len] = (unsigned char) '\0';
+ b->slen = len;
+ return BSTR_OK;
+}
+
+#define BS_BUFF_SZ (1024)
+
+/* int breada (bstring b, bNread readPtr, void * parm)
+ *
+ * Use a finite buffer fread-like function readPtr to concatenate to the
+ * bstring b the entire contents of file-like source data in a roughly
+ * efficient way.
+ */
+int breada (bstring b, bNread readPtr, void * parm) {
+int i, l, n;
+
+ if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen ||
+ b->mlen <= 0 || readPtr == NULL) return BSTR_ERR;
+
+ i = b->slen;
+ for (n=i+16; ; n += ((n < BS_BUFF_SZ) ? n : BS_BUFF_SZ)) {
+ if (BSTR_OK != balloc (b, n + 1)) return BSTR_ERR;
+ l = (int) readPtr ((void *) (b->data + i), 1, n - i, parm);
+ i += l;
+ b->slen = i;
+ if (i < n) break;
+ }
+
+ b->data[i] = (unsigned char) '\0';
+ return BSTR_OK;
+}
+
+/* bstring bread (bNread readPtr, void * parm)
+ *
+ * Use a finite buffer fread-like function readPtr to create a bstring
+ * filled with the entire contents of file-like source data in a roughly
+ * efficient way.
+ */
+bstring bread (bNread readPtr, void * parm) {
+bstring buff;
+
+ if (0 > breada (buff = bfromcstr (""), readPtr, parm)) {
+ bdestroy (buff);
+ return NULL;
+ }
+ return buff;
+}
+
+/* int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator)
+ *
+ * Use an fgetc-like single character stream reading function (getcPtr) to
+ * obtain a sequence of characters which are concatenated to the end of the
+ * bstring b. The stream read is terminated by the passed in terminator
+ * parameter.
+ *
+ * If getcPtr returns with a negative number, or the terminator character
+ * (which is appended) is read, then the stream reading is halted and the
+ * function returns with a partial result in b. If there is an empty partial
+ * result, 1 is returned. If no characters are read, or there is some other
+ * detectable error, BSTR_ERR is returned.
+ */
+int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator) {
+int c, d, e;
+
+ if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen ||
+ b->mlen <= 0 || getcPtr == NULL) return BSTR_ERR;
+ d = 0;
+ e = b->mlen - 2;
+
+ while ((c = getcPtr (parm)) >= 0) {
+ if (d > e) {
+ b->slen = d;
+ if (balloc (b, d + 2) != BSTR_OK) return BSTR_ERR;
+ e = b->mlen - 2;
+ }
+ b->data[d] = (unsigned char) c;
+ d++;
+ if (c == terminator) break;
+ }
+
+ b->data[d] = (unsigned char) '\0';
+ b->slen = d;
+
+ return d == 0 && c < 0;
+}
+
+/* int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator)
+ *
+ * Use an fgetc-like single character stream reading function (getcPtr) to
+ * obtain a sequence of characters which are concatenated to the end of the
+ * bstring b. The stream read is terminated by the passed in terminator
+ * parameter.
+ *
+ * If getcPtr returns with a negative number, or the terminator character
+ * (which is appended) is read, then the stream reading is halted and the
+ * function returns with a partial result concatentated to b. If there is
+ * an empty partial result, 1 is returned. If no characters are read, or
+ * there is some other detectable error, BSTR_ERR is returned.
+ */
+int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator) {
+int c, d, e;
+
+ if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen ||
+ b->mlen <= 0 || getcPtr == NULL) return BSTR_ERR;
+ d = b->slen;
+ e = b->mlen - 2;
+
+ while ((c = getcPtr (parm)) >= 0) {
+ if (d > e) {
+ b->slen = d;
+ if (balloc (b, d + 2) != BSTR_OK) return BSTR_ERR;
+ e = b->mlen - 2;
+ }
+ b->data[d] = (unsigned char) c;
+ d++;
+ if (c == terminator) break;
+ }
+
+ b->data[d] = (unsigned char) '\0';
+ b->slen = d;
+
+ return d == 0 && c < 0;
+}
+
+/* bstring bgets (bNgetc getcPtr, void * parm, char terminator)
+ *
+ * Use an fgetc-like single character stream reading function (getcPtr) to
+ * obtain a sequence of characters which are concatenated into a bstring.
+ * The stream read is terminated by the passed in terminator function.
+ *
+ * If getcPtr returns with a negative number, or the terminator character
+ * (which is appended) is read, then the stream reading is halted and the
+ * result obtained thus far is returned. If no characters are read, or
+ * there is some other detectable error, NULL is returned.
+ */
+bstring bgets (bNgetc getcPtr, void * parm, char terminator) {
+bstring buff;
+
+ if (0 > bgetsa (buff = bfromcstr (""), getcPtr, parm, terminator) || 0 >= buff->slen) {
+ bdestroy (buff);
+ buff = NULL;
+ }
+ return buff;
+}
+
+struct bStream {
+ bstring buff; /* Buffer for over-reads */
+ void * parm; /* The stream handle for core stream */
+ bNread readFnPtr; /* fread compatible fnptr for core stream */
+ int isEOF; /* track file's EOF state */
+ int maxBuffSz;
+};
+
+/* struct bStream * bsopen (bNread readPtr, void * parm)
+ *
+ * Wrap a given open stream (described by a fread compatible function
+ * pointer and stream handle) into an open bStream suitable for the bstring
+ * library streaming functions.
+ */
+struct bStream * bsopen (bNread readPtr, void * parm) {
+struct bStream * s;
+
+ if (readPtr == NULL) return NULL;
+ s = (struct bStream *) bstr__alloc (sizeof (struct bStream));
+ if (s == NULL) return NULL;
+ s->parm = parm;
+ s->buff = bfromcstr ("");
+ s->readFnPtr = readPtr;
+ s->maxBuffSz = BS_BUFF_SZ;
+ s->isEOF = 0;
+ return s;
+}
+
+/* int bsbufflength (struct bStream * s, int sz)
+ *
+ * Set the length of the buffer used by the bStream. If sz is zero, the
+ * length is not set. This function returns with the previous length.
+ */
+int bsbufflength (struct bStream * s, int sz) {
+int oldSz;
+ if (s == NULL || sz < 0) return BSTR_ERR;
+ oldSz = s->maxBuffSz;
+ if (sz > 0) s->maxBuffSz = sz;
+ return oldSz;
+}
+
+int bseof (const struct bStream * s) {
+ if (s == NULL || s->readFnPtr == NULL) return BSTR_ERR;
+ return s->isEOF && (s->buff->slen == 0);
+}
+
+/* void * bsclose (struct bStream * s)
+ *
+ * Close the bStream, and return the handle to the stream that was originally
+ * used to open the given stream.
+ */
+void * bsclose (struct bStream * s) {
+void * parm;
+ if (s == NULL) return NULL;
+ s->readFnPtr = NULL;
+ if (s->buff) bdestroy (s->buff);
+ s->buff = NULL;
+ parm = s->parm;
+ s->parm = NULL;
+ s->isEOF = 1;
+ bstr__free (s);
+ return parm;
+}
+
+/* int bsreadlna (bstring r, struct bStream * s, char terminator)
+ *
+ * Read a bstring terminated by the terminator character or the end of the
+ * stream from the bStream (s) and return it into the parameter r. This
+ * function may read additional characters from the core stream that are not
+ * returned, but will be retained for subsequent read operations.
+ */
+int bsreadlna (bstring r, struct bStream * s, char terminator) {
+int i, l, ret, rlo;
+char * b;
+struct tagbstring x;
+
+ if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0 ||
+ r->slen < 0 || r->mlen < r->slen) return BSTR_ERR;
+ l = s->buff->slen;
+ if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR;
+ b = (char *) s->buff->data;
+ x.data = (unsigned char *) b;
+
+ /* First check if the current buffer holds the terminator */
+ b[l] = terminator; /* Set sentinel */
+ for (i=0; b[i] != terminator; i++) ;
+ if (i < l) {
+ x.slen = i + 1;
+ ret = bconcat (r, &x);
+ s->buff->slen = l;
+ if (BSTR_OK == ret) bdelete (s->buff, 0, i + 1);
+ return BSTR_OK;
+ }
+
+ rlo = r->slen;
+
+ /* If not then just concatenate the entire buffer to the output */
+ x.slen = l;
+ if (BSTR_OK != bconcat (r, &x)) return BSTR_ERR;
+
+ /* Perform direct in-place reads into the destination to allow for
+ the minimum of data-copies */
+ for (;;) {
+ if (BSTR_OK != balloc (r, r->slen + s->maxBuffSz + 1)) return BSTR_ERR;
+ b = (char *) (r->data + r->slen);
+ l = (int) s->readFnPtr (b, 1, s->maxBuffSz, s->parm);
+ if (l <= 0) {
+ r->data[r->slen] = (unsigned char) '\0';
+ s->buff->slen = 0;
+ s->isEOF = 1;
+ /* If nothing was read return with an error message */
+ return BSTR_ERR & -(r->slen == rlo);
+ }
+ b[l] = terminator; /* Set sentinel */
+ for (i=0; b[i] != terminator; i++) ;
+ if (i < l) break;
+ r->slen += l;
+ }
+
+ /* Terminator found, push over-read back to buffer */
+ i++;
+ r->slen += i;
+ s->buff->slen = l - i;
+ bstr__memcpy (s->buff->data, b + i, l - i);
+ r->data[r->slen] = (unsigned char) '\0';
+ return BSTR_OK;
+}
+
+/* int bsreadlnsa (bstring r, struct bStream * s, bstring term)
+ *
+ * Read a bstring terminated by any character in the term string or the end
+ * of the stream from the bStream (s) and return it into the parameter r.
+ * This function may read additional characters from the core stream that
+ * are not returned, but will be retained for subsequent read operations.
+ */
+int bsreadlnsa (bstring r, struct bStream * s, const_bstring term) {
+int i, l, ret, rlo;
+unsigned char * b;
+struct tagbstring x;
+struct charField cf;
+
+ if (s == NULL || s->buff == NULL || r == NULL || term == NULL ||
+ term->data == NULL || r->mlen <= 0 || r->slen < 0 ||
+ r->mlen < r->slen) return BSTR_ERR;
+ if (term->slen == 1) return bsreadlna (r, s, term->data[0]);
+ if (term->slen < 1 || buildCharField (&cf, term)) return BSTR_ERR;
+
+ l = s->buff->slen;
+ if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR;
+ b = (unsigned char *) s->buff->data;
+ x.data = b;
+
+ /* First check if the current buffer holds the terminator */
+ b[l] = term->data[0]; /* Set sentinel */
+ for (i=0; !testInCharField (&cf, b[i]); i++) ;
+ if (i < l) {
+ x.slen = i + 1;
+ ret = bconcat (r, &x);
+ s->buff->slen = l;
+ if (BSTR_OK == ret) bdelete (s->buff, 0, i + 1);
+ return BSTR_OK;
+ }
+
+ rlo = r->slen;
+
+ /* If not then just concatenate the entire buffer to the output */
+ x.slen = l;
+ if (BSTR_OK != bconcat (r, &x)) return BSTR_ERR;
+
+ /* Perform direct in-place reads into the destination to allow for
+ the minimum of data-copies */
+ for (;;) {
+ if (BSTR_OK != balloc (r, r->slen + s->maxBuffSz + 1)) return BSTR_ERR;
+ b = (unsigned char *) (r->data + r->slen);
+ l = (int) s->readFnPtr (b, 1, s->maxBuffSz, s->parm);
+ if (l <= 0) {
+ r->data[r->slen] = (unsigned char) '\0';
+ s->buff->slen = 0;
+ s->isEOF = 1;
+ /* If nothing was read return with an error message */
+ return BSTR_ERR & -(r->slen == rlo);
+ }
+
+ b[l] = term->data[0]; /* Set sentinel */
+ for (i=0; !testInCharField (&cf, b[i]); i++) ;
+ if (i < l) break;
+ r->slen += l;
+ }
+
+ /* Terminator found, push over-read back to buffer */
+ i++;
+ r->slen += i;
+ s->buff->slen = l - i;
+ bstr__memcpy (s->buff->data, b + i, l - i);
+ r->data[r->slen] = (unsigned char) '\0';
+ return BSTR_OK;
+}
+
+/* int bsreada (bstring r, struct bStream * s, int n)
+ *
+ * Read a bstring of length n (or, if it is fewer, as many bytes as is
+ * remaining) from the bStream. This function may read additional
+ * characters from the core stream that are not returned, but will be
+ * retained for subsequent read operations. This function will not read
+ * additional characters from the core stream beyond virtual stream pointer.
+ */
+int bsreada (bstring r, struct bStream * s, int n) {
+int l, ret, orslen;
+char * b;
+struct tagbstring x;
+
+ if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0
+ || r->slen < 0 || r->mlen < r->slen || n <= 0) return BSTR_ERR;
+
+ n += r->slen;
+ if (n <= 0) return BSTR_ERR;
+
+ l = s->buff->slen;
+
+ orslen = r->slen;
+
+ if (0 == l) {
+ if (s->isEOF) return BSTR_ERR;
+ if (r->mlen > n) {
+ l = (int) s->readFnPtr (r->data + r->slen, 1, n - r->slen, s->parm);
+ if (0 >= l || l > n - r->slen) {
+ s->isEOF = 1;
+ return BSTR_ERR;
+ }
+ r->slen += l;
+ r->data[r->slen] = (unsigned char) '\0';
+ return 0;
+ }
+ }
+
+ if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR;
+ b = (char *) s->buff->data;
+ x.data = (unsigned char *) b;
+
+ do {
+ if (l + r->slen >= n) {
+ x.slen = n - r->slen;
+ ret = bconcat (r, &x);
+ s->buff->slen = l;
+ if (BSTR_OK == ret) bdelete (s->buff, 0, x.slen);
+ return BSTR_ERR & -(r->slen == orslen);
+ }
+
+ x.slen = l;
+ if (BSTR_OK != bconcat (r, &x)) break;
+
+ l = n - r->slen;
+ if (l > s->maxBuffSz) l = s->maxBuffSz;
+
+ l = (int) s->readFnPtr (b, 1, l, s->parm);
+
+ } while (l > 0);
+ if (l < 0) l = 0;
+ if (l == 0) s->isEOF = 1;
+ s->buff->slen = l;
+ return BSTR_ERR & -(r->slen == orslen);
+}
+
+/* int bsreadln (bstring r, struct bStream * s, char terminator)
+ *
+ * Read a bstring terminated by the terminator character or the end of the
+ * stream from the bStream (s) and return it into the parameter r. This
+ * function may read additional characters from the core stream that are not
+ * returned, but will be retained for subsequent read operations.
+ */
+int bsreadln (bstring r, struct bStream * s, char terminator) {
+ if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0)
+ return BSTR_ERR;
+ if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR;
+ r->slen = 0;
+ return bsreadlna (r, s, terminator);
+}
+
+/* int bsreadlns (bstring r, struct bStream * s, bstring term)
+ *
+ * Read a bstring terminated by any character in the term string or the end
+ * of the stream from the bStream (s) and return it into the parameter r.
+ * This function may read additional characters from the core stream that
+ * are not returned, but will be retained for subsequent read operations.
+ */
+int bsreadlns (bstring r, struct bStream * s, const_bstring term) {
+ if (s == NULL || s->buff == NULL || r == NULL || term == NULL
+ || term->data == NULL || r->mlen <= 0) return BSTR_ERR;
+ if (term->slen == 1) return bsreadln (r, s, term->data[0]);
+ if (term->slen < 1) return BSTR_ERR;
+ if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR;
+ r->slen = 0;
+ return bsreadlnsa (r, s, term);
+}
+
+/* int bsread (bstring r, struct bStream * s, int n)
+ *
+ * Read a bstring of length n (or, if it is fewer, as many bytes as is
+ * remaining) from the bStream. This function may read additional
+ * characters from the core stream that are not returned, but will be
+ * retained for subsequent read operations. This function will not read
+ * additional characters from the core stream beyond virtual stream pointer.
+ */
+int bsread (bstring r, struct bStream * s, int n) {
+ if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0
+ || n <= 0) return BSTR_ERR;
+ if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR;
+ r->slen = 0;
+ return bsreada (r, s, n);
+}
+
+/* int bsunread (struct bStream * s, const_bstring b)
+ *
+ * Insert a bstring into the bStream at the current position. These
+ * characters will be read prior to those that actually come from the core
+ * stream.
+ */
+int bsunread (struct bStream * s, const_bstring b) {
+ if (s == NULL || s->buff == NULL) return BSTR_ERR;
+ return binsert (s->buff, 0, b, (unsigned char) '?');
+}
+
+/* int bspeek (bstring r, const struct bStream * s)
+ *
+ * Return the currently buffered characters from the bStream that will be
+ * read prior to reads from the core stream.
+ */
+int bspeek (bstring r, const struct bStream * s) {
+ if (s == NULL || s->buff == NULL) return BSTR_ERR;
+ return bassign (r, s->buff);
+}
+
+/* bstring bjoin (const struct bstrList * bl, const_bstring sep);
+ *
+ * Join the entries of a bstrList into one bstring by sequentially
+ * concatenating them with the sep string in between. If there is an error
+ * NULL is returned, otherwise a bstring with the correct result is returned.
+ */
+bstring bjoin (const struct bstrList * bl, const_bstring sep) {
+bstring b;
+int i, c, v;
+
+ if (bl == NULL || bl->qty < 0) return NULL;
+ if (sep != NULL && (sep->slen < 0 || sep->data == NULL)) return NULL;
+
+ for (i = 0, c = 1; i < bl->qty; i++) {
+ v = bl->entry[i]->slen;
+ if (v < 0) return NULL; /* Invalid input */
+ c += v;
+ if (c < 0) return NULL; /* Wrap around ?? */
+ }
+
+ if (sep != NULL) c += (bl->qty - 1) * sep->slen;
+
+ b = (bstring) bstr__alloc (sizeof (struct tagbstring));
+ if (NULL == b) return NULL; /* Out of memory */
+ b->data = (unsigned char *) bstr__alloc (c);
+ if (b->data == NULL) {
+ bstr__free (b);
+ return NULL;
+ }
+
+ b->mlen = c;
+ b->slen = c-1;
+
+ for (i = 0, c = 0; i < bl->qty; i++) {
+ if (i > 0 && sep != NULL) {
+ bstr__memcpy (b->data + c, sep->data, sep->slen);
+ c += sep->slen;
+ }
+ v = bl->entry[i]->slen;
+ bstr__memcpy (b->data + c, bl->entry[i]->data, v);
+ c += v;
+ }
+ b->data[c] = (unsigned char) '\0';
+ return b;
+}
+
+#define BSSSC_BUFF_LEN (256)
+
+/* int bssplitscb (struct bStream * s, const_bstring splitStr,
+ * int (* cb) (void * parm, int ofs, const_bstring entry), void * parm)
+ *
+ * Iterate the set of disjoint sequential substrings read from a stream
+ * divided by any of the characters in splitStr. An empty splitStr causes
+ * the whole stream to be iterated once.
+ *
+ * Note: At the point of calling the cb function, the bStream pointer is
+ * pointed exactly at the position right after having read the split
+ * character. The cb function can act on the stream by causing the bStream
+ * pointer to move, and bssplitscb will continue by starting the next split
+ * at the position of the pointer after the return from cb.
+ *
+ * However, if the cb causes the bStream s to be destroyed then the cb must
+ * return with a negative value, otherwise bssplitscb will continue in an
+ * undefined manner.
+ */
+int bssplitscb (struct bStream * s, const_bstring splitStr,
+ int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) {
+struct charField chrs;
+bstring buff;
+int i, p, ret;
+
+ if (cb == NULL || s == NULL || s->readFnPtr == NULL
+ || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR;
+
+ if (NULL == (buff = bfromcstr (""))) return BSTR_ERR;
+
+ if (splitStr->slen == 0) {
+ while (bsreada (buff, s, BSSSC_BUFF_LEN) >= 0) ;
+ if ((ret = cb (parm, 0, buff)) > 0)
+ ret = 0;
+ } else {
+ buildCharField (&chrs, splitStr);
+ ret = p = i = 0;
+ for (;;) {
+ if (i >= buff->slen) {
+ bsreada (buff, s, BSSSC_BUFF_LEN);
+ if (i >= buff->slen) {
+ if (0 < (ret = cb (parm, p, buff))) ret = 0;
+ break;
+ }
+ }
+ if (testInCharField (&chrs, buff->data[i])) {
+ struct tagbstring t;
+ unsigned char c;
+
+ blk2tbstr (t, buff->data + i + 1, buff->slen - (i + 1));
+ if ((ret = bsunread (s, &t)) < 0) break;
+ buff->slen = i;
+ c = buff->data[i];
+ buff->data[i] = (unsigned char) '\0';
+ if ((ret = cb (parm, p, buff)) < 0) break;
+ buff->data[i] = c;
+ buff->slen = 0;
+ p += i + 1;
+ i = -1;
+ }
+ i++;
+ }
+ }
+
+ bdestroy (buff);
+ return ret;
+}
+
+/* int bssplitstrcb (struct bStream * s, const_bstring splitStr,
+ * int (* cb) (void * parm, int ofs, const_bstring entry), void * parm)
+ *
+ * Iterate the set of disjoint sequential substrings read from a stream
+ * divided by the entire substring splitStr. An empty splitStr causes
+ * each character of the stream to be iterated.
+ *
+ * Note: At the point of calling the cb function, the bStream pointer is
+ * pointed exactly at the position right after having read the split
+ * character. The cb function can act on the stream by causing the bStream
+ * pointer to move, and bssplitscb will continue by starting the next split
+ * at the position of the pointer after the return from cb.
+ *
+ * However, if the cb causes the bStream s to be destroyed then the cb must
+ * return with a negative value, otherwise bssplitscb will continue in an
+ * undefined manner.
+ */
+int bssplitstrcb (struct bStream * s, const_bstring splitStr,
+ int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) {
+bstring buff;
+int i, p, ret;
+
+ if (cb == NULL || s == NULL || s->readFnPtr == NULL
+ || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR;
+
+ if (splitStr->slen == 1) return bssplitscb (s, splitStr, cb, parm);
+
+ if (NULL == (buff = bfromcstr (""))) return BSTR_ERR;
+
+ if (splitStr->slen == 0) {
+ for (i=0; bsreada (buff, s, BSSSC_BUFF_LEN) >= 0; i++) {
+ if ((ret = cb (parm, 0, buff)) < 0) {
+ bdestroy (buff);
+ return ret;
+ }
+ buff->slen = 0;
+ }
+ return BSTR_OK;
+ } else {
+ ret = p = i = 0;
+ for (i=p=0;;) {
+ if ((ret = binstr (buff, 0, splitStr)) >= 0) {
+ struct tagbstring t;
+ blk2tbstr (t, buff->data, ret);
+ i = ret + splitStr->slen;
+ if ((ret = cb (parm, p, &t)) < 0) break;
+ p += i;
+ bdelete (buff, 0, i);
+ } else {
+ bsreada (buff, s, BSSSC_BUFF_LEN);
+ if (bseof (s)) {
+ if ((ret = cb (parm, p, buff)) > 0) ret = 0;
+ break;
+ }
+ }
+ }
+ }
+
+ bdestroy (buff);
+ return ret;
+}
+
+/* int bstrListCreate (void)
+ *
+ * Create a bstrList.
+ */
+struct bstrList * bstrListCreate (void) {
+struct bstrList * sl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList));
+ if (sl) {
+ sl->entry = (bstring *) bstr__alloc (1*sizeof (bstring));
+ if (!sl->entry) {
+ bstr__free (sl);
+ sl = NULL;
+ } else {
+ sl->qty = 0;
+ sl->mlen = 1;
+ }
+ }
+ return sl;
+}
+
+/* int bstrListDestroy (struct bstrList * sl)
+ *
+ * Destroy a bstrList that has been created by bsplit, bsplits or bstrListCreate.
+ */
+int bstrListDestroy (struct bstrList * sl) {
+int i;
+ if (sl == NULL || sl->qty < 0) return BSTR_ERR;
+ for (i=0; i < sl->qty; i++) {
+ if (sl->entry[i]) {
+ bdestroy (sl->entry[i]);
+ sl->entry[i] = NULL;
+ }
+ }
+ sl->qty = -1;
+ sl->mlen = -1;
+ bstr__free (sl->entry);
+ sl->entry = NULL;
+ bstr__free (sl);
+ return BSTR_OK;
+}
+
+/* int bstrListAlloc (struct bstrList * sl, int msz)
+ *
+ * Ensure that there is memory for at least msz number of entries for the
+ * list.
+ */
+int bstrListAlloc (struct bstrList * sl, int msz) {
+bstring * l;
+int smsz;
+size_t nsz;
+ if (!sl || msz <= 0 || !sl->entry || sl->qty < 0 || sl->mlen <= 0 || sl->qty > sl->mlen) return BSTR_ERR;
+ if (sl->mlen >= msz) return BSTR_OK;
+ smsz = snapUpSize (msz);
+ nsz = ((size_t) smsz) * sizeof (bstring);
+ if (nsz < (size_t) smsz) return BSTR_ERR;
+ l = (bstring *) bstr__realloc (sl->entry, nsz);
+ if (!l) {
+ smsz = msz;
+ nsz = ((size_t) smsz) * sizeof (bstring);
+ l = (bstring *) bstr__realloc (sl->entry, nsz);
+ if (!l) return BSTR_ERR;
+ }
+ sl->mlen = smsz;
+ sl->entry = l;
+ return BSTR_OK;
+}
+
+/* int bstrListAllocMin (struct bstrList * sl, int msz)
+ *
+ * Try to allocate the minimum amount of memory for the list to include at
+ * least msz entries or sl->qty whichever is greater.
+ */
+int bstrListAllocMin (struct bstrList * sl, int msz) {
+bstring * l;
+size_t nsz;
+ if (!sl || msz <= 0 || !sl->entry || sl->qty < 0 || sl->mlen <= 0 || sl->qty > sl->mlen) return BSTR_ERR;
+ if (msz < sl->qty) msz = sl->qty;
+ if (sl->mlen == msz) return BSTR_OK;
+ nsz = ((size_t) msz) * sizeof (bstring);
+ if (nsz < (size_t) msz) return BSTR_ERR;
+ l = (bstring *) bstr__realloc (sl->entry, nsz);
+ if (!l) return BSTR_ERR;
+ sl->mlen = msz;
+ sl->entry = l;
+ return BSTR_OK;
+}
+
+/* int bsplitcb (const_bstring str, unsigned char splitChar, int pos,
+ * int (* cb) (void * parm, int ofs, int len), void * parm)
+ *
+ * Iterate the set of disjoint sequential substrings over str divided by the
+ * character in splitChar.
+ *
+ * Note: Non-destructive modification of str from within the cb function
+ * while performing this split is not undefined. bsplitcb behaves in
+ * sequential lock step with calls to cb. I.e., after returning from a cb
+ * that return a non-negative integer, bsplitcb continues from the position
+ * 1 character after the last detected split character and it will halt
+ * immediately if the length of str falls below this point. However, if the
+ * cb function destroys str, then it *must* return with a negative value,
+ * otherwise bsplitcb will continue in an undefined manner.
+ */
+int bsplitcb (const_bstring str, unsigned char splitChar, int pos,
+ int (* cb) (void * parm, int ofs, int len), void * parm) {
+int i, p, ret;
+
+ if (cb == NULL || str == NULL || pos < 0 || pos > str->slen)
+ return BSTR_ERR;
+
+ p = pos;
+ do {
+ for (i=p; i < str->slen; i++) {
+ if (str->data[i] == splitChar) break;
+ }
+ if ((ret = cb (parm, p, i - p)) < 0) return ret;
+ p = i + 1;
+ } while (p <= str->slen);
+ return BSTR_OK;
+}
+
+/* int bsplitscb (const_bstring str, const_bstring splitStr, int pos,
+ * int (* cb) (void * parm, int ofs, int len), void * parm)
+ *
+ * Iterate the set of disjoint sequential substrings over str divided by any
+ * of the characters in splitStr. An empty splitStr causes the whole str to
+ * be iterated once.
+ *
+ * Note: Non-destructive modification of str from within the cb function
+ * while performing this split is not undefined. bsplitscb behaves in
+ * sequential lock step with calls to cb. I.e., after returning from a cb
+ * that return a non-negative integer, bsplitscb continues from the position
+ * 1 character after the last detected split character and it will halt
+ * immediately if the length of str falls below this point. However, if the
+ * cb function destroys str, then it *must* return with a negative value,
+ * otherwise bsplitscb will continue in an undefined manner.
+ */
+int bsplitscb (const_bstring str, const_bstring splitStr, int pos,
+ int (* cb) (void * parm, int ofs, int len), void * parm) {
+struct charField chrs;
+int i, p, ret;
+
+ if (cb == NULL || str == NULL || pos < 0 || pos > str->slen
+ || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR;
+ if (splitStr->slen == 0) {
+ if ((ret = cb (parm, 0, str->slen)) > 0) ret = 0;
+ return ret;
+ }
+
+ if (splitStr->slen == 1)
+ return bsplitcb (str, splitStr->data[0], pos, cb, parm);
+
+ buildCharField (&chrs, splitStr);
+
+ p = pos;
+ do {
+ for (i=p; i < str->slen; i++) {
+ if (testInCharField (&chrs, str->data[i])) break;
+ }
+ if ((ret = cb (parm, p, i - p)) < 0) return ret;
+ p = i + 1;
+ } while (p <= str->slen);
+ return BSTR_OK;
+}
+
+/* int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos,
+ * int (* cb) (void * parm, int ofs, int len), void * parm)
+ *
+ * Iterate the set of disjoint sequential substrings over str divided by the
+ * substring splitStr. An empty splitStr causes the whole str to be
+ * iterated once.
+ *
+ * Note: Non-destructive modification of str from within the cb function
+ * while performing this split is not undefined. bsplitstrcb behaves in
+ * sequential lock step with calls to cb. I.e., after returning from a cb
+ * that return a non-negative integer, bsplitscb continues from the position
+ * 1 character after the last detected split character and it will halt
+ * immediately if the length of str falls below this point. However, if the
+ * cb function destroys str, then it *must* return with a negative value,
+ * otherwise bsplitscb will continue in an undefined manner.
+ */
+int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos,
+ int (* cb) (void * parm, int ofs, int len), void * parm) {
+int i, p, ret;
+
+ if (cb == NULL || str == NULL || pos < 0 || pos > str->slen
+ || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR;
+
+ if (0 == splitStr->slen) {
+ for (i=pos; i < str->slen; i++) {
+ if ((ret = cb (parm, i, 1)) < 0) return ret;
+ }
+ return BSTR_OK;
+ }
+
+ if (splitStr->slen == 1)
+ return bsplitcb (str, splitStr->data[0], pos, cb, parm);
+
+ for (i=p=pos; i <= str->slen - splitStr->slen; i++) {
+ if (0 == bstr__memcmp (splitStr->data, str->data + i, splitStr->slen)) {
+ if ((ret = cb (parm, p, i - p)) < 0) return ret;
+ i += splitStr->slen;
+ p = i;
+ }
+ }
+ if ((ret = cb (parm, p, str->slen - p)) < 0) return ret;
+ return BSTR_OK;
+}
+
+struct genBstrList {
+ bstring b;
+ struct bstrList * bl;
+};
+
+static int bscb (void * parm, int ofs, int len) {
+struct genBstrList * g = (struct genBstrList *) parm;
+ if (g->bl->qty >= g->bl->mlen) {
+ int mlen = g->bl->mlen * 2;
+ bstring * tbl;
+
+ while (g->bl->qty >= mlen) {
+ if (mlen < g->bl->mlen) return BSTR_ERR;
+ mlen += mlen;
+ }
+
+ tbl = (bstring *) bstr__realloc (g->bl->entry, sizeof (bstring) * mlen);
+ if (tbl == NULL) return BSTR_ERR;
+
+ g->bl->entry = tbl;
+ g->bl->mlen = mlen;
+ }
+
+ g->bl->entry[g->bl->qty] = bmidstr (g->b, ofs, len);
+ g->bl->qty++;
+ return BSTR_OK;
+}
+
+/* struct bstrList * bsplit (const_bstring str, unsigned char splitChar)
+ *
+ * Create an array of sequential substrings from str divided by the character
+ * splitChar.
+ */
+struct bstrList * bsplit (const_bstring str, unsigned char splitChar) {
+struct genBstrList g;
+
+ if (str == NULL || str->data == NULL || str->slen < 0) return NULL;
+
+ g.bl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList));
+ if (g.bl == NULL) return NULL;
+ g.bl->mlen = 4;
+ g.bl->entry = (bstring *) bstr__alloc (g.bl->mlen * sizeof (bstring));
+ if (NULL == g.bl->entry) {
+ bstr__free (g.bl);
+ return NULL;
+ }
+
+ g.b = (bstring) str;
+ g.bl->qty = 0;
+ if (bsplitcb (str, splitChar, 0, bscb, &g) < 0) {
+ bstrListDestroy (g.bl);
+ return NULL;
+ }
+ return g.bl;
+}
+
+/* struct bstrList * bsplitstr (const_bstring str, const_bstring splitStr)
+ *
+ * Create an array of sequential substrings from str divided by the entire
+ * substring splitStr.
+ */
+struct bstrList * bsplitstr (const_bstring str, const_bstring splitStr) {
+struct genBstrList g;
+
+ if (str == NULL || str->data == NULL || str->slen < 0) return NULL;
+
+ g.bl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList));
+ if (g.bl == NULL) return NULL;
+ g.bl->mlen = 4;
+ g.bl->entry = (bstring *) bstr__alloc (g.bl->mlen * sizeof (bstring));
+ if (NULL == g.bl->entry) {
+ bstr__free (g.bl);
+ return NULL;
+ }
+
+ g.b = (bstring) str;
+ g.bl->qty = 0;
+ if (bsplitstrcb (str, splitStr, 0, bscb, &g) < 0) {
+ bstrListDestroy (g.bl);
+ return NULL;
+ }
+ return g.bl;
+}
+
+/* struct bstrList * bsplits (const_bstring str, bstring splitStr)
+ *
+ * Create an array of sequential substrings from str divided by any of the
+ * characters in splitStr. An empty splitStr causes a single entry bstrList
+ * containing a copy of str to be returned.
+ */
+struct bstrList * bsplits (const_bstring str, const_bstring splitStr) {
+struct genBstrList g;
+
+ if ( str == NULL || str->slen < 0 || str->data == NULL ||
+ splitStr == NULL || splitStr->slen < 0 || splitStr->data == NULL)
+ return NULL;
+
+ g.bl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList));
+ if (g.bl == NULL) return NULL;
+ g.bl->mlen = 4;
+ g.bl->entry = (bstring *) bstr__alloc (g.bl->mlen * sizeof (bstring));
+ if (NULL == g.bl->entry) {
+ bstr__free (g.bl);
+ return NULL;
+ }
+ g.b = (bstring) str;
+ g.bl->qty = 0;
+
+ if (bsplitscb (str, splitStr, 0, bscb, &g) < 0) {
+ bstrListDestroy (g.bl);
+ return NULL;
+ }
+ return g.bl;
+}
+
+#if defined (__TURBOC__) && !defined (__BORLANDC__)
+# ifndef BSTRLIB_NOVSNP
+# define BSTRLIB_NOVSNP
+# endif
+#endif
+
+/* Give WATCOM C/C++, MSVC some latitude for their non-support of vsnprintf */
+#if defined(__WATCOMC__) || defined(_MSC_VER)
+#define exvsnprintf(r,b,n,f,a) {r = _vsnprintf (b,n,f,a);}
+#else
+#ifdef BSTRLIB_NOVSNP
+/* This is just a hack. If you are using a system without a vsnprintf, it is
+ not recommended that bformat be used at all. */
+#define exvsnprintf(r,b,n,f,a) {vsprintf (b,f,a); r = -1;}
+#define START_VSNBUFF (256)
+#else
+
+#if defined(__GNUC__) && !defined(__APPLE__)
+/* Something is making gcc complain about this prototype not being here, so
+ I've just gone ahead and put it in. */
+extern int vsnprintf (char *buf, size_t count, const char *format, va_list arg);
+#endif
+
+#define exvsnprintf(r,b,n,f,a) {r = vsnprintf (b,n,f,a);}
+#endif
+#endif
+
+#if !defined (BSTRLIB_NOVSNP)
+
+#ifndef START_VSNBUFF
+#define START_VSNBUFF (16)
+#endif
+
+/* On IRIX vsnprintf returns n-1 when the operation would overflow the target
+ buffer, WATCOM and MSVC both return -1, while C99 requires that the
+ returned value be exactly what the length would be if the buffer would be
+ large enough. This leads to the idea that if the return value is larger
+ than n, then changing n to the return value will reduce the number of
+ iterations required. */
+
+/* int bformata (bstring b, const char * fmt, ...)
+ *
+ * After the first parameter, it takes the same parameters as printf (), but
+ * rather than outputting results to stdio, it appends the results to
+ * a bstring which contains what would have been output. Note that if there
+ * is an early generation of a '\0' character, the bstring will be truncated
+ * to this end point.
+ */
+int bformata (bstring b, const char * fmt, ...) {
+va_list arglist;
+bstring buff;
+int n, r;
+
+ if (b == NULL || fmt == NULL || b->data == NULL || b->mlen <= 0
+ || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR;
+
+ /* Since the length is not determinable beforehand, a search is
+ performed using the truncating "vsnprintf" call (to avoid buffer
+ overflows) on increasing potential sizes for the output result. */
+
+ if ((n = (int) (2*strlen (fmt))) < START_VSNBUFF) n = START_VSNBUFF;
+ if (NULL == (buff = bfromcstralloc (n + 2, ""))) {
+ n = 1;
+ if (NULL == (buff = bfromcstralloc (n + 2, ""))) return BSTR_ERR;
+ }
+
+ for (;;) {
+ va_start (arglist, fmt);
+ exvsnprintf (r, (char *) buff->data, n + 1, fmt, arglist);
+ va_end (arglist);
+
+ buff->data[n] = (unsigned char) '\0';
+ buff->slen = (int) (strlen) ((char *) buff->data);
+
+ if (buff->slen < n) break;
+
+ if (r > n) n = r; else n += n;
+
+ if (BSTR_OK != balloc (buff, n + 2)) {
+ bdestroy (buff);
+ return BSTR_ERR;
+ }
+ }
+
+ r = bconcat (b, buff);
+ bdestroy (buff);
+ return r;
+}
+
+/* int bassignformat (bstring b, const char * fmt, ...)
+ *
+ * After the first parameter, it takes the same parameters as printf (), but
+ * rather than outputting results to stdio, it outputs the results to
+ * the bstring parameter b. Note that if there is an early generation of a
+ * '\0' character, the bstring will be truncated to this end point.
+ */
+int bassignformat (bstring b, const char * fmt, ...) {
+va_list arglist;
+bstring buff;
+int n, r;
+
+ if (b == NULL || fmt == NULL || b->data == NULL || b->mlen <= 0
+ || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR;
+
+ /* Since the length is not determinable beforehand, a search is
+ performed using the truncating "vsnprintf" call (to avoid buffer
+ overflows) on increasing potential sizes for the output result. */
+
+ if ((n = (int) (2*strlen (fmt))) < START_VSNBUFF) n = START_VSNBUFF;
+ if (NULL == (buff = bfromcstralloc (n + 2, ""))) {
+ n = 1;
+ if (NULL == (buff = bfromcstralloc (n + 2, ""))) return BSTR_ERR;
+ }
+
+ for (;;) {
+ va_start (arglist, fmt);
+ exvsnprintf (r, (char *) buff->data, n + 1, fmt, arglist);
+ va_end (arglist);
+
+ buff->data[n] = (unsigned char) '\0';
+ buff->slen = (int) (strlen) ((char *) buff->data);
+
+ if (buff->slen < n) break;
+
+ if (r > n) n = r; else n += n;
+
+ if (BSTR_OK != balloc (buff, n + 2)) {
+ bdestroy (buff);
+ return BSTR_ERR;
+ }
+ }
+
+ r = bassign (b, buff);
+ bdestroy (buff);
+ return r;
+}
+
+/* bstring bformat (const char * fmt, ...)
+ *
+ * Takes the same parameters as printf (), but rather than outputting results
+ * to stdio, it forms a bstring which contains what would have been output.
+ * Note that if there is an early generation of a '\0' character, the
+ * bstring will be truncated to this end point.
+ */
+bstring bformat (const char * fmt, ...) {
+va_list arglist;
+bstring buff;
+int n, r;
+
+ if (fmt == NULL) return NULL;
+
+ /* Since the length is not determinable beforehand, a search is
+ performed using the truncating "vsnprintf" call (to avoid buffer
+ overflows) on increasing potential sizes for the output result. */
+
+ if ((n = (int) (2*strlen (fmt))) < START_VSNBUFF) n = START_VSNBUFF;
+ if (NULL == (buff = bfromcstralloc (n + 2, ""))) {
+ n = 1;
+ if (NULL == (buff = bfromcstralloc (n + 2, ""))) return NULL;
+ }
+
+ for (;;) {
+ va_start (arglist, fmt);
+ exvsnprintf (r, (char *) buff->data, n + 1, fmt, arglist);
+ va_end (arglist);
+
+ buff->data[n] = (unsigned char) '\0';
+ buff->slen = (int) (strlen) ((char *) buff->data);
+
+ if (buff->slen < n) break;
+
+ if (r > n) n = r; else n += n;
+
+ if (BSTR_OK != balloc (buff, n + 2)) {
+ bdestroy (buff);
+ return NULL;
+ }
+ }
+
+ return buff;
+}
+
+/* int bvcformata (bstring b, int count, const char * fmt, va_list arglist)
+ *
+ * The bvcformata function formats data under control of the format control
+ * string fmt and attempts to append the result to b. The fmt parameter is
+ * the same as that of the printf function. The variable argument list is
+ * replaced with arglist, which has been initialized by the va_start macro.
+ * The size of the appended output is upper bounded by count. If the
+ * required output exceeds count, the string b is not augmented with any
+ * contents and a value below BSTR_ERR is returned. If a value below -count
+ * is returned then it is recommended that the negative of this value be
+ * used as an update to the count in a subsequent pass. On other errors,
+ * such as running out of memory, parameter errors or numeric wrap around
+ * BSTR_ERR is returned. BSTR_OK is returned when the output is successfully
+ * generated and appended to b.
+ *
+ * Note: There is no sanity checking of arglist, and this function is
+ * destructive of the contents of b from the b->slen point onward. If there
+ * is an early generation of a '\0' character, the bstring will be truncated
+ * to this end point.
+ */
+int bvcformata (bstring b, int count, const char * fmt, va_list arg) {
+int n, r, l;
+
+ if (b == NULL || fmt == NULL || count <= 0 || b->data == NULL
+ || b->mlen <= 0 || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR;
+
+ if (count > (n = b->slen + count) + 2) return BSTR_ERR;
+ if (BSTR_OK != balloc (b, n + 2)) return BSTR_ERR;
+
+ exvsnprintf (r, (char *) b->data + b->slen, count + 2, fmt, arg);
+
+ /* Did the operation complete successfully within bounds? */
+ for (l = b->slen; l <= n; l++) {
+ if ('\0' == b->data[l]) {
+ b->slen = l;
+ return BSTR_OK;
+ }
+ }
+
+ /* Abort, since the buffer was not large enough. The return value
+ tries to help set what the retry length should be. */
+
+ b->data[b->slen] = '\0';
+ if (r > count + 1) { /* Does r specify a particular target length? */
+ n = r;
+ } else {
+ n = count + count; /* If not, just double the size of count */
+ if (count > n) n = INT_MAX;
+ }
+ n = -n;
+
+ if (n > BSTR_ERR-1) n = BSTR_ERR-1;
+ return n;
+}
+
+#endif
diff --git a/src/bstrlib.h b/src/bstrlib.h
new file mode 100644
index 0000000..c8fa694
--- /dev/null
+++ b/src/bstrlib.h
@@ -0,0 +1,304 @@
+/*
+ * This source file is part of the bstring string library. This code was
+ * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause
+ * BSD open source license or GPL v2.0. Refer to the accompanying documentation
+ * for details on usage and license.
+ */
+
+/*
+ * bstrlib.h
+ *
+ * This file is the header file for the core module for implementing the
+ * bstring functions.
+ */
+
+#ifndef BSTRLIB_INCLUDE
+#define BSTRLIB_INCLUDE
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdarg.h>
+#include <string.h>
+#include <limits.h>
+#include <ctype.h>
+
+#if !defined (BSTRLIB_VSNP_OK) && !defined (BSTRLIB_NOVSNP)
+# if defined (__TURBOC__) && !defined (__BORLANDC__)
+# define BSTRLIB_NOVSNP
+# endif
+#endif
+
+#define BSTR_ERR (-1)
+#define BSTR_OK (0)
+#define BSTR_BS_BUFF_LENGTH_GET (0)
+
+typedef struct tagbstring * bstring;
+typedef const struct tagbstring * const_bstring;
+
+/* Copy functions */
+#define cstr2bstr bfromcstr
+extern bstring bfromcstr (const char * str);
+extern bstring bfromcstralloc (int mlen, const char * str);
+extern bstring blk2bstr (const void * blk, int len);
+extern char * bstr2cstr (const_bstring s, char z);
+extern int bcstrfree (char * s);
+extern bstring bstrcpy (const_bstring b1);
+extern int bassign (bstring a, const_bstring b);
+extern int bassignmidstr (bstring a, const_bstring b, int left, int len);
+extern int bassigncstr (bstring a, const char * str);
+extern int bassignblk (bstring a, const void * s, int len);
+
+/* Destroy function */
+extern int bdestroy (bstring b);
+
+/* Space allocation hinting functions */
+extern int balloc (bstring s, int len);
+extern int ballocmin (bstring b, int len);
+
+/* Substring extraction */
+extern bstring bmidstr (const_bstring b, int left, int len);
+
+/* Various standard manipulations */
+extern int bconcat (bstring b0, const_bstring b1);
+extern int bconchar (bstring b0, char c);
+extern int bcatcstr (bstring b, const char * s);
+extern int bcatblk (bstring b, const void * s, int len);
+extern int binsert (bstring s1, int pos, const_bstring s2, unsigned char fill);
+extern int binsertch (bstring s1, int pos, int len, unsigned char fill);
+extern int breplace (bstring b1, int pos, int len, const_bstring b2, unsigned char fill);
+extern int bdelete (bstring s1, int pos, int len);
+extern int bsetstr (bstring b0, int pos, const_bstring b1, unsigned char fill);
+extern int btrunc (bstring b, int n);
+
+/* Scan/search functions */
+extern int bstricmp (const_bstring b0, const_bstring b1);
+extern int bstrnicmp (const_bstring b0, const_bstring b1, int n);
+extern int biseqcaseless (const_bstring b0, const_bstring b1);
+extern int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len);
+extern int biseq (const_bstring b0, const_bstring b1);
+extern int bisstemeqblk (const_bstring b0, const void * blk, int len);
+extern int biseqcstr (const_bstring b, const char * s);
+extern int biseqcstrcaseless (const_bstring b, const char * s);
+extern int bstrcmp (const_bstring b0, const_bstring b1);
+extern int bstrncmp (const_bstring b0, const_bstring b1, int n);
+extern int binstr (const_bstring s1, int pos, const_bstring s2);
+extern int binstrr (const_bstring s1, int pos, const_bstring s2);
+extern int binstrcaseless (const_bstring s1, int pos, const_bstring s2);
+extern int binstrrcaseless (const_bstring s1, int pos, const_bstring s2);
+extern int bstrchrp (const_bstring b, int c, int pos);
+extern int bstrrchrp (const_bstring b, int c, int pos);
+#define bstrchr(b,c) bstrchrp ((b), (c), 0)
+#define bstrrchr(b,c) bstrrchrp ((b), (c), blength(b)-1)
+extern int binchr (const_bstring b0, int pos, const_bstring b1);
+extern int binchrr (const_bstring b0, int pos, const_bstring b1);
+extern int bninchr (const_bstring b0, int pos, const_bstring b1);
+extern int bninchrr (const_bstring b0, int pos, const_bstring b1);
+extern int bfindreplace (bstring b, const_bstring find, const_bstring repl, int pos);
+extern int bfindreplacecaseless (bstring b, const_bstring find, const_bstring repl, int pos);
+
+/* List of string container functions */
+struct bstrList {
+ int qty, mlen;
+ bstring * entry;
+};
+extern struct bstrList * bstrListCreate (void);
+extern int bstrListDestroy (struct bstrList * sl);
+extern int bstrListAlloc (struct bstrList * sl, int msz);
+extern int bstrListAllocMin (struct bstrList * sl, int msz);
+
+/* String split and join functions */
+extern struct bstrList * bsplit (const_bstring str, unsigned char splitChar);
+extern struct bstrList * bsplits (const_bstring str, const_bstring splitStr);
+extern struct bstrList * bsplitstr (const_bstring str, const_bstring splitStr);
+extern bstring bjoin (const struct bstrList * bl, const_bstring sep);
+extern int bsplitcb (const_bstring str, unsigned char splitChar, int pos,
+ int (* cb) (void * parm, int ofs, int len), void * parm);
+extern int bsplitscb (const_bstring str, const_bstring splitStr, int pos,
+ int (* cb) (void * parm, int ofs, int len), void * parm);
+extern int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos,
+ int (* cb) (void * parm, int ofs, int len), void * parm);
+
+/* Miscellaneous functions */
+extern int bpattern (bstring b, int len);
+extern int btoupper (bstring b);
+extern int btolower (bstring b);
+extern int bltrimws (bstring b);
+extern int brtrimws (bstring b);
+extern int btrimws (bstring b);
+
+/* <*>printf format functions */
+#if !defined (BSTRLIB_NOVSNP)
+extern bstring bformat (const char * fmt, ...);
+extern int bformata (bstring b, const char * fmt, ...);
+extern int bassignformat (bstring b, const char * fmt, ...);
+extern int bvcformata (bstring b, int count, const char * fmt, va_list arglist);
+
+#define bvformata(ret, b, fmt, lastarg) { \
+bstring bstrtmp_b = (b); \
+const char * bstrtmp_fmt = (fmt); \
+int bstrtmp_r = BSTR_ERR, bstrtmp_sz = 16; \
+ for (;;) { \
+ va_list bstrtmp_arglist; \
+ va_start (bstrtmp_arglist, lastarg); \
+ bstrtmp_r = bvcformata (bstrtmp_b, bstrtmp_sz, bstrtmp_fmt, bstrtmp_arglist); \
+ va_end (bstrtmp_arglist); \
+ if (bstrtmp_r >= 0) { /* Everything went ok */ \
+ bstrtmp_r = BSTR_OK; \
+ break; \
+ } else if (-bstrtmp_r <= bstrtmp_sz) { /* A real error? */ \
+ bstrtmp_r = BSTR_ERR; \
+ break; \
+ } \
+ bstrtmp_sz = -bstrtmp_r; /* Doubled or target size */ \
+ } \
+ ret = bstrtmp_r; \
+}
+
+#endif
+
+typedef int (*bNgetc) (void *parm);
+typedef size_t (* bNread) (void *buff, size_t elsize, size_t nelem, void *parm);
+
+/* Input functions */
+extern bstring bgets (bNgetc getcPtr, void * parm, char terminator);
+extern bstring bread (bNread readPtr, void * parm);
+extern int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator);
+extern int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator);
+extern int breada (bstring b, bNread readPtr, void * parm);
+
+/* Stream functions */
+extern struct bStream * bsopen (bNread readPtr, void * parm);
+extern void * bsclose (struct bStream * s);
+extern int bsbufflength (struct bStream * s, int sz);
+extern int bsreadln (bstring b, struct bStream * s, char terminator);
+extern int bsreadlns (bstring r, struct bStream * s, const_bstring term);
+extern int bsread (bstring b, struct bStream * s, int n);
+extern int bsreadlna (bstring b, struct bStream * s, char terminator);
+extern int bsreadlnsa (bstring r, struct bStream * s, const_bstring term);
+extern int bsreada (bstring b, struct bStream * s, int n);
+extern int bsunread (struct bStream * s, const_bstring b);
+extern int bspeek (bstring r, const struct bStream * s);
+extern int bssplitscb (struct bStream * s, const_bstring splitStr,
+ int (* cb) (void * parm, int ofs, const_bstring entry), void * parm);
+extern int bssplitstrcb (struct bStream * s, const_bstring splitStr,
+ int (* cb) (void * parm, int ofs, const_bstring entry), void * parm);
+extern int bseof (const struct bStream * s);
+
+struct tagbstring {
+ int mlen;
+ int slen;
+ unsigned char * data;
+};
+
+/* Accessor macros */
+#define blengthe(b, e) (((b) == (void *)0 || (b)->slen < 0) ? (int)(e) : ((b)->slen))
+#define blength(b) (blengthe ((b), 0))
+#define bdataofse(b, o, e) (((b) == (void *)0 || (b)->data == (void*)0) ? (char *)(e) : ((char *)(b)->data) + (o))
+#define bdataofs(b, o) (bdataofse ((b), (o), (void *)0))
+#define bdatae(b, e) (bdataofse (b, 0, e))
+#define bdata(b) (bdataofs (b, 0))
+#define bchare(b, p, e) ((((unsigned)(p)) < (unsigned)blength(b)) ? ((b)->data[(p)]) : (e))
+#define bchar(b, p) bchare ((b), (p), '\0')
+
+/* Static constant string initialization macro */
+#define bsStaticMlen(q,m) {(m), (int) sizeof(q)-1, (unsigned char *) ("" q "")}
+#if defined(_MSC_VER)
+/* There are many versions of MSVC which emit __LINE__ as a non-constant. */
+# define bsStatic(q) bsStaticMlen(q,-32)
+#endif
+#ifndef bsStatic
+# define bsStatic(q) bsStaticMlen(q,-__LINE__)
+#endif
+
+/* Static constant block parameter pair */
+#define bsStaticBlkParms(q) ((void *)("" q "")), ((int) sizeof(q)-1)
+
+/* Reference building macros */
+#define cstr2tbstr btfromcstr
+#define btfromcstr(t,s) { \
+ (t).data = (unsigned char *) (s); \
+ (t).slen = ((t).data) ? ((int) (strlen) ((char *)(t).data)) : 0; \
+ (t).mlen = -1; \
+}
+#define blk2tbstr(t,s,l) { \
+ (t).data = (unsigned char *) (s); \
+ (t).slen = l; \
+ (t).mlen = -1; \
+}
+#define btfromblk(t,s,l) blk2tbstr(t,s,l)
+#define bmid2tbstr(t,b,p,l) { \
+ const_bstring bstrtmp_s = (b); \
+ if (bstrtmp_s && bstrtmp_s->data && bstrtmp_s->slen >= 0) { \
+ int bstrtmp_left = (p); \
+ int bstrtmp_len = (l); \
+ if (bstrtmp_left < 0) { \
+ bstrtmp_len += bstrtmp_left; \
+ bstrtmp_left = 0; \
+ } \
+ if (bstrtmp_len > bstrtmp_s->slen - bstrtmp_left) \
+ bstrtmp_len = bstrtmp_s->slen - bstrtmp_left; \
+ if (bstrtmp_len <= 0) { \
+ (t).data = (unsigned char *)""; \
+ (t).slen = 0; \
+ } else { \
+ (t).data = bstrtmp_s->data + bstrtmp_left; \
+ (t).slen = bstrtmp_len; \
+ } \
+ } else { \
+ (t).data = (unsigned char *)""; \
+ (t).slen = 0; \
+ } \
+ (t).mlen = -__LINE__; \
+}
+#define btfromblkltrimws(t,s,l) { \
+ int bstrtmp_idx = 0, bstrtmp_len = (l); \
+ unsigned char * bstrtmp_s = (s); \
+ if (bstrtmp_s && bstrtmp_len >= 0) { \
+ for (; bstrtmp_idx < bstrtmp_len; bstrtmp_idx++) { \
+ if (!isspace (bstrtmp_s[bstrtmp_idx])) break; \
+ } \
+ } \
+ (t).data = bstrtmp_s + bstrtmp_idx; \
+ (t).slen = bstrtmp_len - bstrtmp_idx; \
+ (t).mlen = -__LINE__; \
+}
+#define btfromblkrtrimws(t,s,l) { \
+ int bstrtmp_len = (l) - 1; \
+ unsigned char * bstrtmp_s = (s); \
+ if (bstrtmp_s && bstrtmp_len >= 0) { \
+ for (; bstrtmp_len >= 0; bstrtmp_len--) { \
+ if (!isspace (bstrtmp_s[bstrtmp_len])) break; \
+ } \
+ } \
+ (t).data = bstrtmp_s; \
+ (t).slen = bstrtmp_len + 1; \
+ (t).mlen = -__LINE__; \
+}
+#define btfromblktrimws(t,s,l) { \
+ int bstrtmp_idx = 0, bstrtmp_len = (l) - 1; \
+ unsigned char * bstrtmp_s = (s); \
+ if (bstrtmp_s && bstrtmp_len >= 0) { \
+ for (; bstrtmp_idx <= bstrtmp_len; bstrtmp_idx++) { \
+ if (!isspace (bstrtmp_s[bstrtmp_idx])) break; \
+ } \
+ for (; bstrtmp_len >= bstrtmp_idx; bstrtmp_len--) { \
+ if (!isspace (bstrtmp_s[bstrtmp_len])) break; \
+ } \
+ } \
+ (t).data = bstrtmp_s + bstrtmp_idx; \
+ (t).slen = bstrtmp_len + 1 - bstrtmp_idx; \
+ (t).mlen = -__LINE__; \
+}
+
+/* Write protection macros */
+#define bwriteprotect(t) { if ((t).mlen >= 0) (t).mlen = -1; }
+#define bwriteallow(t) { if ((t).mlen == -1) (t).mlen = (t).slen + ((t).slen == 0); }
+#define biswriteprotected(t) ((t).mlen <= 0)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/case_fold_switch.c b/src/case_fold_switch.c
new file mode 100644
index 0000000..70fdd75
--- /dev/null
+++ b/src/case_fold_switch.c
@@ -0,0 +1,2637 @@
+ switch (c) {
+ case 0x0041:
+ bufpush(0x0061);
+ break;
+ case 0x0042:
+ bufpush(0x0062);
+ break;
+ case 0x0043:
+ bufpush(0x0063);
+ break;
+ case 0x0044:
+ bufpush(0x0064);
+ break;
+ case 0x0045:
+ bufpush(0x0065);
+ break;
+ case 0x0046:
+ bufpush(0x0066);
+ break;
+ case 0x0047:
+ bufpush(0x0067);
+ break;
+ case 0x0048:
+ bufpush(0x0068);
+ break;
+ case 0x0049:
+ bufpush(0x0069);
+ break;
+ case 0x004A:
+ bufpush(0x006A);
+ break;
+ case 0x004B:
+ bufpush(0x006B);
+ break;
+ case 0x004C:
+ bufpush(0x006C);
+ break;
+ case 0x004D:
+ bufpush(0x006D);
+ break;
+ case 0x004E:
+ bufpush(0x006E);
+ break;
+ case 0x004F:
+ bufpush(0x006F);
+ break;
+ case 0x0050:
+ bufpush(0x0070);
+ break;
+ case 0x0051:
+ bufpush(0x0071);
+ break;
+ case 0x0052:
+ bufpush(0x0072);
+ break;
+ case 0x0053:
+ bufpush(0x0073);
+ break;
+ case 0x0054:
+ bufpush(0x0074);
+ break;
+ case 0x0055:
+ bufpush(0x0075);
+ break;
+ case 0x0056:
+ bufpush(0x0076);
+ break;
+ case 0x0057:
+ bufpush(0x0077);
+ break;
+ case 0x0058:
+ bufpush(0x0078);
+ break;
+ case 0x0059:
+ bufpush(0x0079);
+ break;
+ case 0x005A:
+ bufpush(0x007A);
+ break;
+ case 0x00B5:
+ bufpush(0x03BC);
+ break;
+ case 0x00C0:
+ bufpush(0x00E0);
+ break;
+ case 0x00C1:
+ bufpush(0x00E1);
+ break;
+ case 0x00C2:
+ bufpush(0x00E2);
+ break;
+ case 0x00C3:
+ bufpush(0x00E3);
+ break;
+ case 0x00C4:
+ bufpush(0x00E4);
+ break;
+ case 0x00C5:
+ bufpush(0x00E5);
+ break;
+ case 0x00C6:
+ bufpush(0x00E6);
+ break;
+ case 0x00C7:
+ bufpush(0x00E7);
+ break;
+ case 0x00C8:
+ bufpush(0x00E8);
+ break;
+ case 0x00C9:
+ bufpush(0x00E9);
+ break;
+ case 0x00CA:
+ bufpush(0x00EA);
+ break;
+ case 0x00CB:
+ bufpush(0x00EB);
+ break;
+ case 0x00CC:
+ bufpush(0x00EC);
+ break;
+ case 0x00CD:
+ bufpush(0x00ED);
+ break;
+ case 0x00CE:
+ bufpush(0x00EE);
+ break;
+ case 0x00CF:
+ bufpush(0x00EF);
+ break;
+ case 0x00D0:
+ bufpush(0x00F0);
+ break;
+ case 0x00D1:
+ bufpush(0x00F1);
+ break;
+ case 0x00D2:
+ bufpush(0x00F2);
+ break;
+ case 0x00D3:
+ bufpush(0x00F3);
+ break;
+ case 0x00D4:
+ bufpush(0x00F4);
+ break;
+ case 0x00D5:
+ bufpush(0x00F5);
+ break;
+ case 0x00D6:
+ bufpush(0x00F6);
+ break;
+ case 0x00D8:
+ bufpush(0x00F8);
+ break;
+ case 0x00D9:
+ bufpush(0x00F9);
+ break;
+ case 0x00DA:
+ bufpush(0x00FA);
+ break;
+ case 0x00DB:
+ bufpush(0x00FB);
+ break;
+ case 0x00DC:
+ bufpush(0x00FC);
+ break;
+ case 0x00DD:
+ bufpush(0x00FD);
+ break;
+ case 0x00DE:
+ bufpush(0x00FE);
+ break;
+ case 0x00DF:
+ bufpush(0x0073);
+ bufpush(0x0073);
+ break;
+ case 0x0100:
+ bufpush(0x0101);
+ break;
+ case 0x0102:
+ bufpush(0x0103);
+ break;
+ case 0x0104:
+ bufpush(0x0105);
+ break;
+ case 0x0106:
+ bufpush(0x0107);
+ break;
+ case 0x0108:
+ bufpush(0x0109);
+ break;
+ case 0x010A:
+ bufpush(0x010B);
+ break;
+ case 0x010C:
+ bufpush(0x010D);
+ break;
+ case 0x010E:
+ bufpush(0x010F);
+ break;
+ case 0x0110:
+ bufpush(0x0111);
+ break;
+ case 0x0112:
+ bufpush(0x0113);
+ break;
+ case 0x0114:
+ bufpush(0x0115);
+ break;
+ case 0x0116:
+ bufpush(0x0117);
+ break;
+ case 0x0118:
+ bufpush(0x0119);
+ break;
+ case 0x011A:
+ bufpush(0x011B);
+ break;
+ case 0x011C:
+ bufpush(0x011D);
+ break;
+ case 0x011E:
+ bufpush(0x011F);
+ break;
+ case 0x0120:
+ bufpush(0x0121);
+ break;
+ case 0x0122:
+ bufpush(0x0123);
+ break;
+ case 0x0124:
+ bufpush(0x0125);
+ break;
+ case 0x0126:
+ bufpush(0x0127);
+ break;
+ case 0x0128:
+ bufpush(0x0129);
+ break;
+ case 0x012A:
+ bufpush(0x012B);
+ break;
+ case 0x012C:
+ bufpush(0x012D);
+ break;
+ case 0x012E:
+ bufpush(0x012F);
+ break;
+ case 0x0130:
+ bufpush(0x0069);
+ bufpush(0x0307);
+ break;
+ case 0x0132:
+ bufpush(0x0133);
+ break;
+ case 0x0134:
+ bufpush(0x0135);
+ break;
+ case 0x0136:
+ bufpush(0x0137);
+ break;
+ case 0x0139:
+ bufpush(0x013A);
+ break;
+ case 0x013B:
+ bufpush(0x013C);
+ break;
+ case 0x013D:
+ bufpush(0x013E);
+ break;
+ case 0x013F:
+ bufpush(0x0140);
+ break;
+ case 0x0141:
+ bufpush(0x0142);
+ break;
+ case 0x0143:
+ bufpush(0x0144);
+ break;
+ case 0x0145:
+ bufpush(0x0146);
+ break;
+ case 0x0147:
+ bufpush(0x0148);
+ break;
+ case 0x0149:
+ bufpush(0x02BC);
+ bufpush(0x006E);
+ break;
+ case 0x014A:
+ bufpush(0x014B);
+ break;
+ case 0x014C:
+ bufpush(0x014D);
+ break;
+ case 0x014E:
+ bufpush(0x014F);
+ break;
+ case 0x0150:
+ bufpush(0x0151);
+ break;
+ case 0x0152:
+ bufpush(0x0153);
+ break;
+ case 0x0154:
+ bufpush(0x0155);
+ break;
+ case 0x0156:
+ bufpush(0x0157);
+ break;
+ case 0x0158:
+ bufpush(0x0159);
+ break;
+ case 0x015A:
+ bufpush(0x015B);
+ break;
+ case 0x015C:
+ bufpush(0x015D);
+ break;
+ case 0x015E:
+ bufpush(0x015F);
+ break;
+ case 0x0160:
+ bufpush(0x0161);
+ break;
+ case 0x0162:
+ bufpush(0x0163);
+ break;
+ case 0x0164:
+ bufpush(0x0165);
+ break;
+ case 0x0166:
+ bufpush(0x0167);
+ break;
+ case 0x0168:
+ bufpush(0x0169);
+ break;
+ case 0x016A:
+ bufpush(0x016B);
+ break;
+ case 0x016C:
+ bufpush(0x016D);
+ break;
+ case 0x016E:
+ bufpush(0x016F);
+ break;
+ case 0x0170:
+ bufpush(0x0171);
+ break;
+ case 0x0172:
+ bufpush(0x0173);
+ break;
+ case 0x0174:
+ bufpush(0x0175);
+ break;
+ case 0x0176:
+ bufpush(0x0177);
+ break;
+ case 0x0178:
+ bufpush(0x00FF);
+ break;
+ case 0x0179:
+ bufpush(0x017A);
+ break;
+ case 0x017B:
+ bufpush(0x017C);
+ break;
+ case 0x017D:
+ bufpush(0x017E);
+ break;
+ case 0x017F:
+ bufpush(0x0073);
+ break;
+ case 0x0181:
+ bufpush(0x0253);
+ break;
+ case 0x0182:
+ bufpush(0x0183);
+ break;
+ case 0x0184:
+ bufpush(0x0185);
+ break;
+ case 0x0186:
+ bufpush(0x0254);
+ break;
+ case 0x0187:
+ bufpush(0x0188);
+ break;
+ case 0x0189:
+ bufpush(0x0256);
+ break;
+ case 0x018A:
+ bufpush(0x0257);
+ break;
+ case 0x018B:
+ bufpush(0x018C);
+ break;
+ case 0x018E:
+ bufpush(0x01DD);
+ break;
+ case 0x018F:
+ bufpush(0x0259);
+ break;
+ case 0x0190:
+ bufpush(0x025B);
+ break;
+ case 0x0191:
+ bufpush(0x0192);
+ break;
+ case 0x0193:
+ bufpush(0x0260);
+ break;
+ case 0x0194:
+ bufpush(0x0263);
+ break;
+ case 0x0196:
+ bufpush(0x0269);
+ break;
+ case 0x0197:
+ bufpush(0x0268);
+ break;
+ case 0x0198:
+ bufpush(0x0199);
+ break;
+ case 0x019C:
+ bufpush(0x026F);
+ break;
+ case 0x019D:
+ bufpush(0x0272);
+ break;
+ case 0x019F:
+ bufpush(0x0275);
+ break;
+ case 0x01A0:
+ bufpush(0x01A1);
+ break;
+ case 0x01A2:
+ bufpush(0x01A3);
+ break;
+ case 0x01A4:
+ bufpush(0x01A5);
+ break;
+ case 0x01A6:
+ bufpush(0x0280);
+ break;
+ case 0x01A7:
+ bufpush(0x01A8);
+ break;
+ case 0x01A9:
+ bufpush(0x0283);
+ break;
+ case 0x01AC:
+ bufpush(0x01AD);
+ break;
+ case 0x01AE:
+ bufpush(0x0288);
+ break;
+ case 0x01AF:
+ bufpush(0x01B0);
+ break;
+ case 0x01B1:
+ bufpush(0x028A);
+ break;
+ case 0x01B2:
+ bufpush(0x028B);
+ break;
+ case 0x01B3:
+ bufpush(0x01B4);
+ break;
+ case 0x01B5:
+ bufpush(0x01B6);
+ break;
+ case 0x01B7:
+ bufpush(0x0292);
+ break;
+ case 0x01B8:
+ bufpush(0x01B9);
+ break;
+ case 0x01BC:
+ bufpush(0x01BD);
+ break;
+ case 0x01C4:
+ bufpush(0x01C6);
+ break;
+ case 0x01C5:
+ bufpush(0x01C6);
+ break;
+ case 0x01C7:
+ bufpush(0x01C9);
+ break;
+ case 0x01C8:
+ bufpush(0x01C9);
+ break;
+ case 0x01CA:
+ bufpush(0x01CC);
+ break;
+ case 0x01CB:
+ bufpush(0x01CC);
+ break;
+ case 0x01CD:
+ bufpush(0x01CE);
+ break;
+ case 0x01CF:
+ bufpush(0x01D0);
+ break;
+ case 0x01D1:
+ bufpush(0x01D2);
+ break;
+ case 0x01D3:
+ bufpush(0x01D4);
+ break;
+ case 0x01D5:
+ bufpush(0x01D6);
+ break;
+ case 0x01D7:
+ bufpush(0x01D8);
+ break;
+ case 0x01D9:
+ bufpush(0x01DA);
+ break;
+ case 0x01DB:
+ bufpush(0x01DC);
+ break;
+ case 0x01DE:
+ bufpush(0x01DF);
+ break;
+ case 0x01E0:
+ bufpush(0x01E1);
+ break;
+ case 0x01E2:
+ bufpush(0x01E3);
+ break;
+ case 0x01E4:
+ bufpush(0x01E5);
+ break;
+ case 0x01E6:
+ bufpush(0x01E7);
+ break;
+ case 0x01E8:
+ bufpush(0x01E9);
+ break;
+ case 0x01EA:
+ bufpush(0x01EB);
+ break;
+ case 0x01EC:
+ bufpush(0x01ED);
+ break;
+ case 0x01EE:
+ bufpush(0x01EF);
+ break;
+ case 0x01F0:
+ bufpush(0x006A);
+ bufpush(0x030C);
+ break;
+ case 0x01F1:
+ bufpush(0x01F3);
+ break;
+ case 0x01F2:
+ bufpush(0x01F3);
+ break;
+ case 0x01F4:
+ bufpush(0x01F5);
+ break;
+ case 0x01F6:
+ bufpush(0x0195);
+ break;
+ case 0x01F7:
+ bufpush(0x01BF);
+ break;
+ case 0x01F8:
+ bufpush(0x01F9);
+ break;
+ case 0x01FA:
+ bufpush(0x01FB);
+ break;
+ case 0x01FC:
+ bufpush(0x01FD);
+ break;
+ case 0x01FE:
+ bufpush(0x01FF);
+ break;
+ case 0x0200:
+ bufpush(0x0201);
+ break;
+ case 0x0202:
+ bufpush(0x0203);
+ break;
+ case 0x0204:
+ bufpush(0x0205);
+ break;
+ case 0x0206:
+ bufpush(0x0207);
+ break;
+ case 0x0208:
+ bufpush(0x0209);
+ break;
+ case 0x020A:
+ bufpush(0x020B);
+ break;
+ case 0x020C:
+ bufpush(0x020D);
+ break;
+ case 0x020E:
+ bufpush(0x020F);
+ break;
+ case 0x0210:
+ bufpush(0x0211);
+ break;
+ case 0x0212:
+ bufpush(0x0213);
+ break;
+ case 0x0214:
+ bufpush(0x0215);
+ break;
+ case 0x0216:
+ bufpush(0x0217);
+ break;
+ case 0x0218:
+ bufpush(0x0219);
+ break;
+ case 0x021A:
+ bufpush(0x021B);
+ break;
+ case 0x021C:
+ bufpush(0x021D);
+ break;
+ case 0x021E:
+ bufpush(0x021F);
+ break;
+ case 0x0220:
+ bufpush(0x019E);
+ break;
+ case 0x0222:
+ bufpush(0x0223);
+ break;
+ case 0x0224:
+ bufpush(0x0225);
+ break;
+ case 0x0226:
+ bufpush(0x0227);
+ break;
+ case 0x0228:
+ bufpush(0x0229);
+ break;
+ case 0x022A:
+ bufpush(0x022B);
+ break;
+ case 0x022C:
+ bufpush(0x022D);
+ break;
+ case 0x022E:
+ bufpush(0x022F);
+ break;
+ case 0x0230:
+ bufpush(0x0231);
+ break;
+ case 0x0232:
+ bufpush(0x0233);
+ break;
+ case 0x0345:
+ bufpush(0x03B9);
+ break;
+ case 0x0386:
+ bufpush(0x03AC);
+ break;
+ case 0x0388:
+ bufpush(0x03AD);
+ break;
+ case 0x0389:
+ bufpush(0x03AE);
+ break;
+ case 0x038A:
+ bufpush(0x03AF);
+ break;
+ case 0x038C:
+ bufpush(0x03CC);
+ break;
+ case 0x038E:
+ bufpush(0x03CD);
+ break;
+ case 0x038F:
+ bufpush(0x03CE);
+ break;
+ case 0x0390:
+ bufpush(0x03B9);
+ bufpush(0x0308);
+ bufpush(0x0301);
+ break;
+ case 0x0391:
+ bufpush(0x03B1);
+ break;
+ case 0x0392:
+ bufpush(0x03B2);
+ break;
+ case 0x0393:
+ bufpush(0x03B3);
+ break;
+ case 0x0394:
+ bufpush(0x03B4);
+ break;
+ case 0x0395:
+ bufpush(0x03B5);
+ break;
+ case 0x0396:
+ bufpush(0x03B6);
+ break;
+ case 0x0397:
+ bufpush(0x03B7);
+ break;
+ case 0x0398:
+ bufpush(0x03B8);
+ break;
+ case 0x0399:
+ bufpush(0x03B9);
+ break;
+ case 0x039A:
+ bufpush(0x03BA);
+ break;
+ case 0x039B:
+ bufpush(0x03BB);
+ break;
+ case 0x039C:
+ bufpush(0x03BC);
+ break;
+ case 0x039D:
+ bufpush(0x03BD);
+ break;
+ case 0x039E:
+ bufpush(0x03BE);
+ break;
+ case 0x039F:
+ bufpush(0x03BF);
+ break;
+ case 0x03A0:
+ bufpush(0x03C0);
+ break;
+ case 0x03A1:
+ bufpush(0x03C1);
+ break;
+ case 0x03A3:
+ bufpush(0x03C3);
+ break;
+ case 0x03A4:
+ bufpush(0x03C4);
+ break;
+ case 0x03A5:
+ bufpush(0x03C5);
+ break;
+ case 0x03A6:
+ bufpush(0x03C6);
+ break;
+ case 0x03A7:
+ bufpush(0x03C7);
+ break;
+ case 0x03A8:
+ bufpush(0x03C8);
+ break;
+ case 0x03A9:
+ bufpush(0x03C9);
+ break;
+ case 0x03AA:
+ bufpush(0x03CA);
+ break;
+ case 0x03AB:
+ bufpush(0x03CB);
+ break;
+ case 0x03B0:
+ bufpush(0x03C5);
+ bufpush(0x0308);
+ bufpush(0x0301);
+ break;
+ case 0x03C2:
+ bufpush(0x03C3);
+ break;
+ case 0x03D0:
+ bufpush(0x03B2);
+ break;
+ case 0x03D1:
+ bufpush(0x03B8);
+ break;
+ case 0x03D5:
+ bufpush(0x03C6);
+ break;
+ case 0x03D6:
+ bufpush(0x03C0);
+ break;
+ case 0x03D8:
+ bufpush(0x03D9);
+ break;
+ case 0x03DA:
+ bufpush(0x03DB);
+ break;
+ case 0x03DC:
+ bufpush(0x03DD);
+ break;
+ case 0x03DE:
+ bufpush(0x03DF);
+ break;
+ case 0x03E0:
+ bufpush(0x03E1);
+ break;
+ case 0x03E2:
+ bufpush(0x03E3);
+ break;
+ case 0x03E4:
+ bufpush(0x03E5);
+ break;
+ case 0x03E6:
+ bufpush(0x03E7);
+ break;
+ case 0x03E8:
+ bufpush(0x03E9);
+ break;
+ case 0x03EA:
+ bufpush(0x03EB);
+ break;
+ case 0x03EC:
+ bufpush(0x03ED);
+ break;
+ case 0x03EE:
+ bufpush(0x03EF);
+ break;
+ case 0x03F0:
+ bufpush(0x03BA);
+ break;
+ case 0x03F1:
+ bufpush(0x03C1);
+ break;
+ case 0x03F2:
+ bufpush(0x03C3);
+ break;
+ case 0x03F4:
+ bufpush(0x03B8);
+ break;
+ case 0x03F5:
+ bufpush(0x03B5);
+ break;
+ case 0x0400:
+ bufpush(0x0450);
+ break;
+ case 0x0401:
+ bufpush(0x0451);
+ break;
+ case 0x0402:
+ bufpush(0x0452);
+ break;
+ case 0x0403:
+ bufpush(0x0453);
+ break;
+ case 0x0404:
+ bufpush(0x0454);
+ break;
+ case 0x0405:
+ bufpush(0x0455);
+ break;
+ case 0x0406:
+ bufpush(0x0456);
+ break;
+ case 0x0407:
+ bufpush(0x0457);
+ break;
+ case 0x0408:
+ bufpush(0x0458);
+ break;
+ case 0x0409:
+ bufpush(0x0459);
+ break;
+ case 0x040A:
+ bufpush(0x045A);
+ break;
+ case 0x040B:
+ bufpush(0x045B);
+ break;
+ case 0x040C:
+ bufpush(0x045C);
+ break;
+ case 0x040D:
+ bufpush(0x045D);
+ break;
+ case 0x040E:
+ bufpush(0x045E);
+ break;
+ case 0x040F:
+ bufpush(0x045F);
+ break;
+ case 0x0410:
+ bufpush(0x0430);
+ break;
+ case 0x0411:
+ bufpush(0x0431);
+ break;
+ case 0x0412:
+ bufpush(0x0432);
+ break;
+ case 0x0413:
+ bufpush(0x0433);
+ break;
+ case 0x0414:
+ bufpush(0x0434);
+ break;
+ case 0x0415:
+ bufpush(0x0435);
+ break;
+ case 0x0416:
+ bufpush(0x0436);
+ break;
+ case 0x0417:
+ bufpush(0x0437);
+ break;
+ case 0x0418:
+ bufpush(0x0438);
+ break;
+ case 0x0419:
+ bufpush(0x0439);
+ break;
+ case 0x041A:
+ bufpush(0x043A);
+ break;
+ case 0x041B:
+ bufpush(0x043B);
+ break;
+ case 0x041C:
+ bufpush(0x043C);
+ break;
+ case 0x041D:
+ bufpush(0x043D);
+ break;
+ case 0x041E:
+ bufpush(0x043E);
+ break;
+ case 0x041F:
+ bufpush(0x043F);
+ break;
+ case 0x0420:
+ bufpush(0x0440);
+ break;
+ case 0x0421:
+ bufpush(0x0441);
+ break;
+ case 0x0422:
+ bufpush(0x0442);
+ break;
+ case 0x0423:
+ bufpush(0x0443);
+ break;
+ case 0x0424:
+ bufpush(0x0444);
+ break;
+ case 0x0425:
+ bufpush(0x0445);
+ break;
+ case 0x0426:
+ bufpush(0x0446);
+ break;
+ case 0x0427:
+ bufpush(0x0447);
+ break;
+ case 0x0428:
+ bufpush(0x0448);
+ break;
+ case 0x0429:
+ bufpush(0x0449);
+ break;
+ case 0x042A:
+ bufpush(0x044A);
+ break;
+ case 0x042B:
+ bufpush(0x044B);
+ break;
+ case 0x042C:
+ bufpush(0x044C);
+ break;
+ case 0x042D:
+ bufpush(0x044D);
+ break;
+ case 0x042E:
+ bufpush(0x044E);
+ break;
+ case 0x042F:
+ bufpush(0x044F);
+ break;
+ case 0x0460:
+ bufpush(0x0461);
+ break;
+ case 0x0462:
+ bufpush(0x0463);
+ break;
+ case 0x0464:
+ bufpush(0x0465);
+ break;
+ case 0x0466:
+ bufpush(0x0467);
+ break;
+ case 0x0468:
+ bufpush(0x0469);
+ break;
+ case 0x046A:
+ bufpush(0x046B);
+ break;
+ case 0x046C:
+ bufpush(0x046D);
+ break;
+ case 0x046E:
+ bufpush(0x046F);
+ break;
+ case 0x0470:
+ bufpush(0x0471);
+ break;
+ case 0x0472:
+ bufpush(0x0473);
+ break;
+ case 0x0474:
+ bufpush(0x0475);
+ break;
+ case 0x0476:
+ bufpush(0x0477);
+ break;
+ case 0x0478:
+ bufpush(0x0479);
+ break;
+ case 0x047A:
+ bufpush(0x047B);
+ break;
+ case 0x047C:
+ bufpush(0x047D);
+ break;
+ case 0x047E:
+ bufpush(0x047F);
+ break;
+ case 0x0480:
+ bufpush(0x0481);
+ break;
+ case 0x048A:
+ bufpush(0x048B);
+ break;
+ case 0x048C:
+ bufpush(0x048D);
+ break;
+ case 0x048E:
+ bufpush(0x048F);
+ break;
+ case 0x0490:
+ bufpush(0x0491);
+ break;
+ case 0x0492:
+ bufpush(0x0493);
+ break;
+ case 0x0494:
+ bufpush(0x0495);
+ break;
+ case 0x0496:
+ bufpush(0x0497);
+ break;
+ case 0x0498:
+ bufpush(0x0499);
+ break;
+ case 0x049A:
+ bufpush(0x049B);
+ break;
+ case 0x049C:
+ bufpush(0x049D);
+ break;
+ case 0x049E:
+ bufpush(0x049F);
+ break;
+ case 0x04A0:
+ bufpush(0x04A1);
+ break;
+ case 0x04A2:
+ bufpush(0x04A3);
+ break;
+ case 0x04A4:
+ bufpush(0x04A5);
+ break;
+ case 0x04A6:
+ bufpush(0x04A7);
+ break;
+ case 0x04A8:
+ bufpush(0x04A9);
+ break;
+ case 0x04AA:
+ bufpush(0x04AB);
+ break;
+ case 0x04AC:
+ bufpush(0x04AD);
+ break;
+ case 0x04AE:
+ bufpush(0x04AF);
+ break;
+ case 0x04B0:
+ bufpush(0x04B1);
+ break;
+ case 0x04B2:
+ bufpush(0x04B3);
+ break;
+ case 0x04B4:
+ bufpush(0x04B5);
+ break;
+ case 0x04B6:
+ bufpush(0x04B7);
+ break;
+ case 0x04B8:
+ bufpush(0x04B9);
+ break;
+ case 0x04BA:
+ bufpush(0x04BB);
+ break;
+ case 0x04BC:
+ bufpush(0x04BD);
+ break;
+ case 0x04BE:
+ bufpush(0x04BF);
+ break;
+ case 0x04C1:
+ bufpush(0x04C2);
+ break;
+ case 0x04C3:
+ bufpush(0x04C4);
+ break;
+ case 0x04C5:
+ bufpush(0x04C6);
+ break;
+ case 0x04C7:
+ bufpush(0x04C8);
+ break;
+ case 0x04C9:
+ bufpush(0x04CA);
+ break;
+ case 0x04CB:
+ bufpush(0x04CC);
+ break;
+ case 0x04CD:
+ bufpush(0x04CE);
+ break;
+ case 0x04D0:
+ bufpush(0x04D1);
+ break;
+ case 0x04D2:
+ bufpush(0x04D3);
+ break;
+ case 0x04D4:
+ bufpush(0x04D5);
+ break;
+ case 0x04D6:
+ bufpush(0x04D7);
+ break;
+ case 0x04D8:
+ bufpush(0x04D9);
+ break;
+ case 0x04DA:
+ bufpush(0x04DB);
+ break;
+ case 0x04DC:
+ bufpush(0x04DD);
+ break;
+ case 0x04DE:
+ bufpush(0x04DF);
+ break;
+ case 0x04E0:
+ bufpush(0x04E1);
+ break;
+ case 0x04E2:
+ bufpush(0x04E3);
+ break;
+ case 0x04E4:
+ bufpush(0x04E5);
+ break;
+ case 0x04E6:
+ bufpush(0x04E7);
+ break;
+ case 0x04E8:
+ bufpush(0x04E9);
+ break;
+ case 0x04EA:
+ bufpush(0x04EB);
+ break;
+ case 0x04EC:
+ bufpush(0x04ED);
+ break;
+ case 0x04EE:
+ bufpush(0x04EF);
+ break;
+ case 0x04F0:
+ bufpush(0x04F1);
+ break;
+ case 0x04F2:
+ bufpush(0x04F3);
+ break;
+ case 0x04F4:
+ bufpush(0x04F5);
+ break;
+ case 0x04F8:
+ bufpush(0x04F9);
+ break;
+ case 0x0500:
+ bufpush(0x0501);
+ break;
+ case 0x0502:
+ bufpush(0x0503);
+ break;
+ case 0x0504:
+ bufpush(0x0505);
+ break;
+ case 0x0506:
+ bufpush(0x0507);
+ break;
+ case 0x0508:
+ bufpush(0x0509);
+ break;
+ case 0x050A:
+ bufpush(0x050B);
+ break;
+ case 0x050C:
+ bufpush(0x050D);
+ break;
+ case 0x050E:
+ bufpush(0x050F);
+ break;
+ case 0x0531:
+ bufpush(0x0561);
+ break;
+ case 0x0532:
+ bufpush(0x0562);
+ break;
+ case 0x0533:
+ bufpush(0x0563);
+ break;
+ case 0x0534:
+ bufpush(0x0564);
+ break;
+ case 0x0535:
+ bufpush(0x0565);
+ break;
+ case 0x0536:
+ bufpush(0x0566);
+ break;
+ case 0x0537:
+ bufpush(0x0567);
+ break;
+ case 0x0538:
+ bufpush(0x0568);
+ break;
+ case 0x0539:
+ bufpush(0x0569);
+ break;
+ case 0x053A:
+ bufpush(0x056A);
+ break;
+ case 0x053B:
+ bufpush(0x056B);
+ break;
+ case 0x053C:
+ bufpush(0x056C);
+ break;
+ case 0x053D:
+ bufpush(0x056D);
+ break;
+ case 0x053E:
+ bufpush(0x056E);
+ break;
+ case 0x053F:
+ bufpush(0x056F);
+ break;
+ case 0x0540:
+ bufpush(0x0570);
+ break;
+ case 0x0541:
+ bufpush(0x0571);
+ break;
+ case 0x0542:
+ bufpush(0x0572);
+ break;
+ case 0x0543:
+ bufpush(0x0573);
+ break;
+ case 0x0544:
+ bufpush(0x0574);
+ break;
+ case 0x0545:
+ bufpush(0x0575);
+ break;
+ case 0x0546:
+ bufpush(0x0576);
+ break;
+ case 0x0547:
+ bufpush(0x0577);
+ break;
+ case 0x0548:
+ bufpush(0x0578);
+ break;
+ case 0x0549:
+ bufpush(0x0579);
+ break;
+ case 0x054A:
+ bufpush(0x057A);
+ break;
+ case 0x054B:
+ bufpush(0x057B);
+ break;
+ case 0x054C:
+ bufpush(0x057C);
+ break;
+ case 0x054D:
+ bufpush(0x057D);
+ break;
+ case 0x054E:
+ bufpush(0x057E);
+ break;
+ case 0x054F:
+ bufpush(0x057F);
+ break;
+ case 0x0550:
+ bufpush(0x0580);
+ break;
+ case 0x0551:
+ bufpush(0x0581);
+ break;
+ case 0x0552:
+ bufpush(0x0582);
+ break;
+ case 0x0553:
+ bufpush(0x0583);
+ break;
+ case 0x0554:
+ bufpush(0x0584);
+ break;
+ case 0x0555:
+ bufpush(0x0585);
+ break;
+ case 0x0556:
+ bufpush(0x0586);
+ break;
+ case 0x0587:
+ bufpush(0x0565);
+ bufpush(0x0582);
+ break;
+ case 0x1E00:
+ bufpush(0x1E01);
+ break;
+ case 0x1E02:
+ bufpush(0x1E03);
+ break;
+ case 0x1E04:
+ bufpush(0x1E05);
+ break;
+ case 0x1E06:
+ bufpush(0x1E07);
+ break;
+ case 0x1E08:
+ bufpush(0x1E09);
+ break;
+ case 0x1E0A:
+ bufpush(0x1E0B);
+ break;
+ case 0x1E0C:
+ bufpush(0x1E0D);
+ break;
+ case 0x1E0E:
+ bufpush(0x1E0F);
+ break;
+ case 0x1E10:
+ bufpush(0x1E11);
+ break;
+ case 0x1E12:
+ bufpush(0x1E13);
+ break;
+ case 0x1E14:
+ bufpush(0x1E15);
+ break;
+ case 0x1E16:
+ bufpush(0x1E17);
+ break;
+ case 0x1E18:
+ bufpush(0x1E19);
+ break;
+ case 0x1E1A:
+ bufpush(0x1E1B);
+ break;
+ case 0x1E1C:
+ bufpush(0x1E1D);
+ break;
+ case 0x1E1E:
+ bufpush(0x1E1F);
+ break;
+ case 0x1E20:
+ bufpush(0x1E21);
+ break;
+ case 0x1E22:
+ bufpush(0x1E23);
+ break;
+ case 0x1E24:
+ bufpush(0x1E25);
+ break;
+ case 0x1E26:
+ bufpush(0x1E27);
+ break;
+ case 0x1E28:
+ bufpush(0x1E29);
+ break;
+ case 0x1E2A:
+ bufpush(0x1E2B);
+ break;
+ case 0x1E2C:
+ bufpush(0x1E2D);
+ break;
+ case 0x1E2E:
+ bufpush(0x1E2F);
+ break;
+ case 0x1E30:
+ bufpush(0x1E31);
+ break;
+ case 0x1E32:
+ bufpush(0x1E33);
+ break;
+ case 0x1E34:
+ bufpush(0x1E35);
+ break;
+ case 0x1E36:
+ bufpush(0x1E37);
+ break;
+ case 0x1E38:
+ bufpush(0x1E39);
+ break;
+ case 0x1E3A:
+ bufpush(0x1E3B);
+ break;
+ case 0x1E3C:
+ bufpush(0x1E3D);
+ break;
+ case 0x1E3E:
+ bufpush(0x1E3F);
+ break;
+ case 0x1E40:
+ bufpush(0x1E41);
+ break;
+ case 0x1E42:
+ bufpush(0x1E43);
+ break;
+ case 0x1E44:
+ bufpush(0x1E45);
+ break;
+ case 0x1E46:
+ bufpush(0x1E47);
+ break;
+ case 0x1E48:
+ bufpush(0x1E49);
+ break;
+ case 0x1E4A:
+ bufpush(0x1E4B);
+ break;
+ case 0x1E4C:
+ bufpush(0x1E4D);
+ break;
+ case 0x1E4E:
+ bufpush(0x1E4F);
+ break;
+ case 0x1E50:
+ bufpush(0x1E51);
+ break;
+ case 0x1E52:
+ bufpush(0x1E53);
+ break;
+ case 0x1E54:
+ bufpush(0x1E55);
+ break;
+ case 0x1E56:
+ bufpush(0x1E57);
+ break;
+ case 0x1E58:
+ bufpush(0x1E59);
+ break;
+ case 0x1E5A:
+ bufpush(0x1E5B);
+ break;
+ case 0x1E5C:
+ bufpush(0x1E5D);
+ break;
+ case 0x1E5E:
+ bufpush(0x1E5F);
+ break;
+ case 0x1E60:
+ bufpush(0x1E61);
+ break;
+ case 0x1E62:
+ bufpush(0x1E63);
+ break;
+ case 0x1E64:
+ bufpush(0x1E65);
+ break;
+ case 0x1E66:
+ bufpush(0x1E67);
+ break;
+ case 0x1E68:
+ bufpush(0x1E69);
+ break;
+ case 0x1E6A:
+ bufpush(0x1E6B);
+ break;
+ case 0x1E6C:
+ bufpush(0x1E6D);
+ break;
+ case 0x1E6E:
+ bufpush(0x1E6F);
+ break;
+ case 0x1E70:
+ bufpush(0x1E71);
+ break;
+ case 0x1E72:
+ bufpush(0x1E73);
+ break;
+ case 0x1E74:
+ bufpush(0x1E75);
+ break;
+ case 0x1E76:
+ bufpush(0x1E77);
+ break;
+ case 0x1E78:
+ bufpush(0x1E79);
+ break;
+ case 0x1E7A:
+ bufpush(0x1E7B);
+ break;
+ case 0x1E7C:
+ bufpush(0x1E7D);
+ break;
+ case 0x1E7E:
+ bufpush(0x1E7F);
+ break;
+ case 0x1E80:
+ bufpush(0x1E81);
+ break;
+ case 0x1E82:
+ bufpush(0x1E83);
+ break;
+ case 0x1E84:
+ bufpush(0x1E85);
+ break;
+ case 0x1E86:
+ bufpush(0x1E87);
+ break;
+ case 0x1E88:
+ bufpush(0x1E89);
+ break;
+ case 0x1E8A:
+ bufpush(0x1E8B);
+ break;
+ case 0x1E8C:
+ bufpush(0x1E8D);
+ break;
+ case 0x1E8E:
+ bufpush(0x1E8F);
+ break;
+ case 0x1E90:
+ bufpush(0x1E91);
+ break;
+ case 0x1E92:
+ bufpush(0x1E93);
+ break;
+ case 0x1E94:
+ bufpush(0x1E95);
+ break;
+ case 0x1E96:
+ bufpush(0x0068);
+ bufpush(0x0331);
+ break;
+ case 0x1E97:
+ bufpush(0x0074);
+ bufpush(0x0308);
+ break;
+ case 0x1E98:
+ bufpush(0x0077);
+ bufpush(0x030A);
+ break;
+ case 0x1E99:
+ bufpush(0x0079);
+ bufpush(0x030A);
+ break;
+ case 0x1E9A:
+ bufpush(0x0061);
+ bufpush(0x02BE);
+ break;
+ case 0x1E9B:
+ bufpush(0x1E61);
+ break;
+ case 0x1EA0:
+ bufpush(0x1EA1);
+ break;
+ case 0x1EA2:
+ bufpush(0x1EA3);
+ break;
+ case 0x1EA4:
+ bufpush(0x1EA5);
+ break;
+ case 0x1EA6:
+ bufpush(0x1EA7);
+ break;
+ case 0x1EA8:
+ bufpush(0x1EA9);
+ break;
+ case 0x1EAA:
+ bufpush(0x1EAB);
+ break;
+ case 0x1EAC:
+ bufpush(0x1EAD);
+ break;
+ case 0x1EAE:
+ bufpush(0x1EAF);
+ break;
+ case 0x1EB0:
+ bufpush(0x1EB1);
+ break;
+ case 0x1EB2:
+ bufpush(0x1EB3);
+ break;
+ case 0x1EB4:
+ bufpush(0x1EB5);
+ break;
+ case 0x1EB6:
+ bufpush(0x1EB7);
+ break;
+ case 0x1EB8:
+ bufpush(0x1EB9);
+ break;
+ case 0x1EBA:
+ bufpush(0x1EBB);
+ break;
+ case 0x1EBC:
+ bufpush(0x1EBD);
+ break;
+ case 0x1EBE:
+ bufpush(0x1EBF);
+ break;
+ case 0x1EC0:
+ bufpush(0x1EC1);
+ break;
+ case 0x1EC2:
+ bufpush(0x1EC3);
+ break;
+ case 0x1EC4:
+ bufpush(0x1EC5);
+ break;
+ case 0x1EC6:
+ bufpush(0x1EC7);
+ break;
+ case 0x1EC8:
+ bufpush(0x1EC9);
+ break;
+ case 0x1ECA:
+ bufpush(0x1ECB);
+ break;
+ case 0x1ECC:
+ bufpush(0x1ECD);
+ break;
+ case 0x1ECE:
+ bufpush(0x1ECF);
+ break;
+ case 0x1ED0:
+ bufpush(0x1ED1);
+ break;
+ case 0x1ED2:
+ bufpush(0x1ED3);
+ break;
+ case 0x1ED4:
+ bufpush(0x1ED5);
+ break;
+ case 0x1ED6:
+ bufpush(0x1ED7);
+ break;
+ case 0x1ED8:
+ bufpush(0x1ED9);
+ break;
+ case 0x1EDA:
+ bufpush(0x1EDB);
+ break;
+ case 0x1EDC:
+ bufpush(0x1EDD);
+ break;
+ case 0x1EDE:
+ bufpush(0x1EDF);
+ break;
+ case 0x1EE0:
+ bufpush(0x1EE1);
+ break;
+ case 0x1EE2:
+ bufpush(0x1EE3);
+ break;
+ case 0x1EE4:
+ bufpush(0x1EE5);
+ break;
+ case 0x1EE6:
+ bufpush(0x1EE7);
+ break;
+ case 0x1EE8:
+ bufpush(0x1EE9);
+ break;
+ case 0x1EEA:
+ bufpush(0x1EEB);
+ break;
+ case 0x1EEC:
+ bufpush(0x1EED);
+ break;
+ case 0x1EEE:
+ bufpush(0x1EEF);
+ break;
+ case 0x1EF0:
+ bufpush(0x1EF1);
+ break;
+ case 0x1EF2:
+ bufpush(0x1EF3);
+ break;
+ case 0x1EF4:
+ bufpush(0x1EF5);
+ break;
+ case 0x1EF6:
+ bufpush(0x1EF7);
+ break;
+ case 0x1EF8:
+ bufpush(0x1EF9);
+ break;
+ case 0x1F08:
+ bufpush(0x1F00);
+ break;
+ case 0x1F09:
+ bufpush(0x1F01);
+ break;
+ case 0x1F0A:
+ bufpush(0x1F02);
+ break;
+ case 0x1F0B:
+ bufpush(0x1F03);
+ break;
+ case 0x1F0C:
+ bufpush(0x1F04);
+ break;
+ case 0x1F0D:
+ bufpush(0x1F05);
+ break;
+ case 0x1F0E:
+ bufpush(0x1F06);
+ break;
+ case 0x1F0F:
+ bufpush(0x1F07);
+ break;
+ case 0x1F18:
+ bufpush(0x1F10);
+ break;
+ case 0x1F19:
+ bufpush(0x1F11);
+ break;
+ case 0x1F1A:
+ bufpush(0x1F12);
+ break;
+ case 0x1F1B:
+ bufpush(0x1F13);
+ break;
+ case 0x1F1C:
+ bufpush(0x1F14);
+ break;
+ case 0x1F1D:
+ bufpush(0x1F15);
+ break;
+ case 0x1F28:
+ bufpush(0x1F20);
+ break;
+ case 0x1F29:
+ bufpush(0x1F21);
+ break;
+ case 0x1F2A:
+ bufpush(0x1F22);
+ break;
+ case 0x1F2B:
+ bufpush(0x1F23);
+ break;
+ case 0x1F2C:
+ bufpush(0x1F24);
+ break;
+ case 0x1F2D:
+ bufpush(0x1F25);
+ break;
+ case 0x1F2E:
+ bufpush(0x1F26);
+ break;
+ case 0x1F2F:
+ bufpush(0x1F27);
+ break;
+ case 0x1F38:
+ bufpush(0x1F30);
+ break;
+ case 0x1F39:
+ bufpush(0x1F31);
+ break;
+ case 0x1F3A:
+ bufpush(0x1F32);
+ break;
+ case 0x1F3B:
+ bufpush(0x1F33);
+ break;
+ case 0x1F3C:
+ bufpush(0x1F34);
+ break;
+ case 0x1F3D:
+ bufpush(0x1F35);
+ break;
+ case 0x1F3E:
+ bufpush(0x1F36);
+ break;
+ case 0x1F3F:
+ bufpush(0x1F37);
+ break;
+ case 0x1F48:
+ bufpush(0x1F40);
+ break;
+ case 0x1F49:
+ bufpush(0x1F41);
+ break;
+ case 0x1F4A:
+ bufpush(0x1F42);
+ break;
+ case 0x1F4B:
+ bufpush(0x1F43);
+ break;
+ case 0x1F4C:
+ bufpush(0x1F44);
+ break;
+ case 0x1F4D:
+ bufpush(0x1F45);
+ break;
+ case 0x1F50:
+ bufpush(0x03C5);
+ bufpush(0x0313);
+ break;
+ case 0x1F52:
+ bufpush(0x03C5);
+ bufpush(0x0313);
+ bufpush(0x0300);
+ break;
+ case 0x1F54:
+ bufpush(0x03C5);
+ bufpush(0x0313);
+ bufpush(0x0301);
+ break;
+ case 0x1F56:
+ bufpush(0x03C5);
+ bufpush(0x0313);
+ bufpush(0x0342);
+ break;
+ case 0x1F59:
+ bufpush(0x1F51);
+ break;
+ case 0x1F5B:
+ bufpush(0x1F53);
+ break;
+ case 0x1F5D:
+ bufpush(0x1F55);
+ break;
+ case 0x1F5F:
+ bufpush(0x1F57);
+ break;
+ case 0x1F68:
+ bufpush(0x1F60);
+ break;
+ case 0x1F69:
+ bufpush(0x1F61);
+ break;
+ case 0x1F6A:
+ bufpush(0x1F62);
+ break;
+ case 0x1F6B:
+ bufpush(0x1F63);
+ break;
+ case 0x1F6C:
+ bufpush(0x1F64);
+ break;
+ case 0x1F6D:
+ bufpush(0x1F65);
+ break;
+ case 0x1F6E:
+ bufpush(0x1F66);
+ break;
+ case 0x1F6F:
+ bufpush(0x1F67);
+ break;
+ case 0x1F80:
+ bufpush(0x1F00);
+ bufpush(0x03B9);
+ break;
+ case 0x1F81:
+ bufpush(0x1F01);
+ bufpush(0x03B9);
+ break;
+ case 0x1F82:
+ bufpush(0x1F02);
+ bufpush(0x03B9);
+ break;
+ case 0x1F83:
+ bufpush(0x1F03);
+ bufpush(0x03B9);
+ break;
+ case 0x1F84:
+ bufpush(0x1F04);
+ bufpush(0x03B9);
+ break;
+ case 0x1F85:
+ bufpush(0x1F05);
+ bufpush(0x03B9);
+ break;
+ case 0x1F86:
+ bufpush(0x1F06);
+ bufpush(0x03B9);
+ break;
+ case 0x1F87:
+ bufpush(0x1F07);
+ bufpush(0x03B9);
+ break;
+ case 0x1F88:
+ bufpush(0x1F00);
+ bufpush(0x03B9);
+ break;
+ case 0x1F89:
+ bufpush(0x1F01);
+ bufpush(0x03B9);
+ break;
+ case 0x1F8A:
+ bufpush(0x1F02);
+ bufpush(0x03B9);
+ break;
+ case 0x1F8B:
+ bufpush(0x1F03);
+ bufpush(0x03B9);
+ break;
+ case 0x1F8C:
+ bufpush(0x1F04);
+ bufpush(0x03B9);
+ break;
+ case 0x1F8D:
+ bufpush(0x1F05);
+ bufpush(0x03B9);
+ break;
+ case 0x1F8E:
+ bufpush(0x1F06);
+ bufpush(0x03B9);
+ break;
+ case 0x1F8F:
+ bufpush(0x1F07);
+ bufpush(0x03B9);
+ break;
+ case 0x1F90:
+ bufpush(0x1F20);
+ bufpush(0x03B9);
+ break;
+ case 0x1F91:
+ bufpush(0x1F21);
+ bufpush(0x03B9);
+ break;
+ case 0x1F92:
+ bufpush(0x1F22);
+ bufpush(0x03B9);
+ break;
+ case 0x1F93:
+ bufpush(0x1F23);
+ bufpush(0x03B9);
+ break;
+ case 0x1F94:
+ bufpush(0x1F24);
+ bufpush(0x03B9);
+ break;
+ case 0x1F95:
+ bufpush(0x1F25);
+ bufpush(0x03B9);
+ break;
+ case 0x1F96:
+ bufpush(0x1F26);
+ bufpush(0x03B9);
+ break;
+ case 0x1F97:
+ bufpush(0x1F27);
+ bufpush(0x03B9);
+ break;
+ case 0x1F98:
+ bufpush(0x1F20);
+ bufpush(0x03B9);
+ break;
+ case 0x1F99:
+ bufpush(0x1F21);
+ bufpush(0x03B9);
+ break;
+ case 0x1F9A:
+ bufpush(0x1F22);
+ bufpush(0x03B9);
+ break;
+ case 0x1F9B:
+ bufpush(0x1F23);
+ bufpush(0x03B9);
+ break;
+ case 0x1F9C:
+ bufpush(0x1F24);
+ bufpush(0x03B9);
+ break;
+ case 0x1F9D:
+ bufpush(0x1F25);
+ bufpush(0x03B9);
+ break;
+ case 0x1F9E:
+ bufpush(0x1F26);
+ bufpush(0x03B9);
+ break;
+ case 0x1F9F:
+ bufpush(0x1F27);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA0:
+ bufpush(0x1F60);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA1:
+ bufpush(0x1F61);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA2:
+ bufpush(0x1F62);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA3:
+ bufpush(0x1F63);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA4:
+ bufpush(0x1F64);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA5:
+ bufpush(0x1F65);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA6:
+ bufpush(0x1F66);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA7:
+ bufpush(0x1F67);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA8:
+ bufpush(0x1F60);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA9:
+ bufpush(0x1F61);
+ bufpush(0x03B9);
+ break;
+ case 0x1FAA:
+ bufpush(0x1F62);
+ bufpush(0x03B9);
+ break;
+ case 0x1FAB:
+ bufpush(0x1F63);
+ bufpush(0x03B9);
+ break;
+ case 0x1FAC:
+ bufpush(0x1F64);
+ bufpush(0x03B9);
+ break;
+ case 0x1FAD:
+ bufpush(0x1F65);
+ bufpush(0x03B9);
+ break;
+ case 0x1FAE:
+ bufpush(0x1F66);
+ bufpush(0x03B9);
+ break;
+ case 0x1FAF:
+ bufpush(0x1F67);
+ bufpush(0x03B9);
+ break;
+ case 0x1FB2:
+ bufpush(0x1F70);
+ bufpush(0x03B9);
+ break;
+ case 0x1FB3:
+ bufpush(0x03B1);
+ bufpush(0x03B9);
+ break;
+ case 0x1FB4:
+ bufpush(0x03AC);
+ bufpush(0x03B9);
+ break;
+ case 0x1FB6:
+ bufpush(0x03B1);
+ bufpush(0x0342);
+ break;
+ case 0x1FB7:
+ bufpush(0x03B1);
+ bufpush(0x0342);
+ bufpush(0x03B9);
+ break;
+ case 0x1FB8:
+ bufpush(0x1FB0);
+ break;
+ case 0x1FB9:
+ bufpush(0x1FB1);
+ break;
+ case 0x1FBA:
+ bufpush(0x1F70);
+ break;
+ case 0x1FBB:
+ bufpush(0x1F71);
+ break;
+ case 0x1FBC:
+ bufpush(0x03B1);
+ bufpush(0x03B9);
+ break;
+ case 0x1FBE:
+ bufpush(0x03B9);
+ break;
+ case 0x1FC2:
+ bufpush(0x1F74);
+ bufpush(0x03B9);
+ break;
+ case 0x1FC3:
+ bufpush(0x03B7);
+ bufpush(0x03B9);
+ break;
+ case 0x1FC4:
+ bufpush(0x03AE);
+ bufpush(0x03B9);
+ break;
+ case 0x1FC6:
+ bufpush(0x03B7);
+ bufpush(0x0342);
+ break;
+ case 0x1FC7:
+ bufpush(0x03B7);
+ bufpush(0x0342);
+ bufpush(0x03B9);
+ break;
+ case 0x1FC8:
+ bufpush(0x1F72);
+ break;
+ case 0x1FC9:
+ bufpush(0x1F73);
+ break;
+ case 0x1FCA:
+ bufpush(0x1F74);
+ break;
+ case 0x1FCB:
+ bufpush(0x1F75);
+ break;
+ case 0x1FCC:
+ bufpush(0x03B7);
+ bufpush(0x03B9);
+ break;
+ case 0x1FD2:
+ bufpush(0x03B9);
+ bufpush(0x0308);
+ bufpush(0x0300);
+ break;
+ case 0x1FD3:
+ bufpush(0x03B9);
+ bufpush(0x0308);
+ bufpush(0x0301);
+ break;
+ case 0x1FD6:
+ bufpush(0x03B9);
+ bufpush(0x0342);
+ break;
+ case 0x1FD7:
+ bufpush(0x03B9);
+ bufpush(0x0308);
+ bufpush(0x0342);
+ break;
+ case 0x1FD8:
+ bufpush(0x1FD0);
+ break;
+ case 0x1FD9:
+ bufpush(0x1FD1);
+ break;
+ case 0x1FDA:
+ bufpush(0x1F76);
+ break;
+ case 0x1FDB:
+ bufpush(0x1F77);
+ break;
+ case 0x1FE2:
+ bufpush(0x03C5);
+ bufpush(0x0308);
+ bufpush(0x0300);
+ break;
+ case 0x1FE3:
+ bufpush(0x03C5);
+ bufpush(0x0308);
+ bufpush(0x0301);
+ break;
+ case 0x1FE4:
+ bufpush(0x03C1);
+ bufpush(0x0313);
+ break;
+ case 0x1FE6:
+ bufpush(0x03C5);
+ bufpush(0x0342);
+ break;
+ case 0x1FE7:
+ bufpush(0x03C5);
+ bufpush(0x0308);
+ bufpush(0x0342);
+ break;
+ case 0x1FE8:
+ bufpush(0x1FE0);
+ break;
+ case 0x1FE9:
+ bufpush(0x1FE1);
+ break;
+ case 0x1FEA:
+ bufpush(0x1F7A);
+ break;
+ case 0x1FEB:
+ bufpush(0x1F7B);
+ break;
+ case 0x1FEC:
+ bufpush(0x1FE5);
+ break;
+ case 0x1FF2:
+ bufpush(0x1F7C);
+ bufpush(0x03B9);
+ break;
+ case 0x1FF3:
+ bufpush(0x03C9);
+ bufpush(0x03B9);
+ break;
+ case 0x1FF4:
+ bufpush(0x03CE);
+ bufpush(0x03B9);
+ break;
+ case 0x1FF6:
+ bufpush(0x03C9);
+ bufpush(0x0342);
+ break;
+ case 0x1FF7:
+ bufpush(0x03C9);
+ bufpush(0x0342);
+ bufpush(0x03B9);
+ break;
+ case 0x1FF8:
+ bufpush(0x1F78);
+ break;
+ case 0x1FF9:
+ bufpush(0x1F79);
+ break;
+ case 0x1FFA:
+ bufpush(0x1F7C);
+ break;
+ case 0x1FFB:
+ bufpush(0x1F7D);
+ break;
+ case 0x1FFC:
+ bufpush(0x03C9);
+ bufpush(0x03B9);
+ break;
+ case 0x2126:
+ bufpush(0x03C9);
+ break;
+ case 0x212A:
+ bufpush(0x006B);
+ break;
+ case 0x212B:
+ bufpush(0x00E5);
+ break;
+ case 0x2160:
+ bufpush(0x2170);
+ break;
+ case 0x2161:
+ bufpush(0x2171);
+ break;
+ case 0x2162:
+ bufpush(0x2172);
+ break;
+ case 0x2163:
+ bufpush(0x2173);
+ break;
+ case 0x2164:
+ bufpush(0x2174);
+ break;
+ case 0x2165:
+ bufpush(0x2175);
+ break;
+ case 0x2166:
+ bufpush(0x2176);
+ break;
+ case 0x2167:
+ bufpush(0x2177);
+ break;
+ case 0x2168:
+ bufpush(0x2178);
+ break;
+ case 0x2169:
+ bufpush(0x2179);
+ break;
+ case 0x216A:
+ bufpush(0x217A);
+ break;
+ case 0x216B:
+ bufpush(0x217B);
+ break;
+ case 0x216C:
+ bufpush(0x217C);
+ break;
+ case 0x216D:
+ bufpush(0x217D);
+ break;
+ case 0x216E:
+ bufpush(0x217E);
+ break;
+ case 0x216F:
+ bufpush(0x217F);
+ break;
+ case 0x24B6:
+ bufpush(0x24D0);
+ break;
+ case 0x24B7:
+ bufpush(0x24D1);
+ break;
+ case 0x24B8:
+ bufpush(0x24D2);
+ break;
+ case 0x24B9:
+ bufpush(0x24D3);
+ break;
+ case 0x24BA:
+ bufpush(0x24D4);
+ break;
+ case 0x24BB:
+ bufpush(0x24D5);
+ break;
+ case 0x24BC:
+ bufpush(0x24D6);
+ break;
+ case 0x24BD:
+ bufpush(0x24D7);
+ break;
+ case 0x24BE:
+ bufpush(0x24D8);
+ break;
+ case 0x24BF:
+ bufpush(0x24D9);
+ break;
+ case 0x24C0:
+ bufpush(0x24DA);
+ break;
+ case 0x24C1:
+ bufpush(0x24DB);
+ break;
+ case 0x24C2:
+ bufpush(0x24DC);
+ break;
+ case 0x24C3:
+ bufpush(0x24DD);
+ break;
+ case 0x24C4:
+ bufpush(0x24DE);
+ break;
+ case 0x24C5:
+ bufpush(0x24DF);
+ break;
+ case 0x24C6:
+ bufpush(0x24E0);
+ break;
+ case 0x24C7:
+ bufpush(0x24E1);
+ break;
+ case 0x24C8:
+ bufpush(0x24E2);
+ break;
+ case 0x24C9:
+ bufpush(0x24E3);
+ break;
+ case 0x24CA:
+ bufpush(0x24E4);
+ break;
+ case 0x24CB:
+ bufpush(0x24E5);
+ break;
+ case 0x24CC:
+ bufpush(0x24E6);
+ break;
+ case 0x24CD:
+ bufpush(0x24E7);
+ break;
+ case 0x24CE:
+ bufpush(0x24E8);
+ break;
+ case 0x24CF:
+ bufpush(0x24E9);
+ break;
+ case 0xFB00:
+ bufpush(0x0066);
+ bufpush(0x0066);
+ break;
+ case 0xFB01:
+ bufpush(0x0066);
+ bufpush(0x0069);
+ break;
+ case 0xFB02:
+ bufpush(0x0066);
+ bufpush(0x006C);
+ break;
+ case 0xFB03:
+ bufpush(0x0066);
+ bufpush(0x0066);
+ bufpush(0x0069);
+ break;
+ case 0xFB04:
+ bufpush(0x0066);
+ bufpush(0x0066);
+ bufpush(0x006C);
+ break;
+ case 0xFB05:
+ bufpush(0x0073);
+ bufpush(0x0074);
+ break;
+ case 0xFB06:
+ bufpush(0x0073);
+ bufpush(0x0074);
+ break;
+ case 0xFB13:
+ bufpush(0x0574);
+ bufpush(0x0576);
+ break;
+ case 0xFB14:
+ bufpush(0x0574);
+ bufpush(0x0565);
+ break;
+ case 0xFB15:
+ bufpush(0x0574);
+ bufpush(0x056B);
+ break;
+ case 0xFB16:
+ bufpush(0x057E);
+ bufpush(0x0576);
+ break;
+ case 0xFB17:
+ bufpush(0x0574);
+ bufpush(0x056D);
+ break;
+ case 0xFF21:
+ bufpush(0xFF41);
+ break;
+ case 0xFF22:
+ bufpush(0xFF42);
+ break;
+ case 0xFF23:
+ bufpush(0xFF43);
+ break;
+ case 0xFF24:
+ bufpush(0xFF44);
+ break;
+ case 0xFF25:
+ bufpush(0xFF45);
+ break;
+ case 0xFF26:
+ bufpush(0xFF46);
+ break;
+ case 0xFF27:
+ bufpush(0xFF47);
+ break;
+ case 0xFF28:
+ bufpush(0xFF48);
+ break;
+ case 0xFF29:
+ bufpush(0xFF49);
+ break;
+ case 0xFF2A:
+ bufpush(0xFF4A);
+ break;
+ case 0xFF2B:
+ bufpush(0xFF4B);
+ break;
+ case 0xFF2C:
+ bufpush(0xFF4C);
+ break;
+ case 0xFF2D:
+ bufpush(0xFF4D);
+ break;
+ case 0xFF2E:
+ bufpush(0xFF4E);
+ break;
+ case 0xFF2F:
+ bufpush(0xFF4F);
+ break;
+ case 0xFF30:
+ bufpush(0xFF50);
+ break;
+ case 0xFF31:
+ bufpush(0xFF51);
+ break;
+ case 0xFF32:
+ bufpush(0xFF52);
+ break;
+ case 0xFF33:
+ bufpush(0xFF53);
+ break;
+ case 0xFF34:
+ bufpush(0xFF54);
+ break;
+ case 0xFF35:
+ bufpush(0xFF55);
+ break;
+ case 0xFF36:
+ bufpush(0xFF56);
+ break;
+ case 0xFF37:
+ bufpush(0xFF57);
+ break;
+ case 0xFF38:
+ bufpush(0xFF58);
+ break;
+ case 0xFF39:
+ bufpush(0xFF59);
+ break;
+ case 0xFF3A:
+ bufpush(0xFF5A);
+ break;
+ case 0x10400:
+ bufpush(0x10428);
+ break;
+ case 0x10401:
+ bufpush(0x10429);
+ break;
+ case 0x10402:
+ bufpush(0x1042A);
+ break;
+ case 0x10403:
+ bufpush(0x1042B);
+ break;
+ case 0x10404:
+ bufpush(0x1042C);
+ break;
+ case 0x10405:
+ bufpush(0x1042D);
+ break;
+ case 0x10406:
+ bufpush(0x1042E);
+ break;
+ case 0x10407:
+ bufpush(0x1042F);
+ break;
+ case 0x10408:
+ bufpush(0x10430);
+ break;
+ case 0x10409:
+ bufpush(0x10431);
+ break;
+ case 0x1040A:
+ bufpush(0x10432);
+ break;
+ case 0x1040B:
+ bufpush(0x10433);
+ break;
+ case 0x1040C:
+ bufpush(0x10434);
+ break;
+ case 0x1040D:
+ bufpush(0x10435);
+ break;
+ case 0x1040E:
+ bufpush(0x10436);
+ break;
+ case 0x1040F:
+ bufpush(0x10437);
+ break;
+ case 0x10410:
+ bufpush(0x10438);
+ break;
+ case 0x10411:
+ bufpush(0x10439);
+ break;
+ case 0x10412:
+ bufpush(0x1043A);
+ break;
+ case 0x10413:
+ bufpush(0x1043B);
+ break;
+ case 0x10414:
+ bufpush(0x1043C);
+ break;
+ case 0x10415:
+ bufpush(0x1043D);
+ break;
+ case 0x10416:
+ bufpush(0x1043E);
+ break;
+ case 0x10417:
+ bufpush(0x1043F);
+ break;
+ case 0x10418:
+ bufpush(0x10440);
+ break;
+ case 0x10419:
+ bufpush(0x10441);
+ break;
+ case 0x1041A:
+ bufpush(0x10442);
+ break;
+ case 0x1041B:
+ bufpush(0x10443);
+ break;
+ case 0x1041C:
+ bufpush(0x10444);
+ break;
+ case 0x1041D:
+ bufpush(0x10445);
+ break;
+ case 0x1041E:
+ bufpush(0x10446);
+ break;
+ case 0x1041F:
+ bufpush(0x10447);
+ break;
+ case 0x10420:
+ bufpush(0x10448);
+ break;
+ case 0x10421:
+ bufpush(0x10449);
+ break;
+ case 0x10422:
+ bufpush(0x1044A);
+ break;
+ case 0x10423:
+ bufpush(0x1044B);
+ break;
+ case 0x10424:
+ bufpush(0x1044C);
+ break;
+ case 0x10425:
+ bufpush(0x1044D);
+ break;
+ default:
+ bufpush(c);
+ }
diff --git a/src/casefold.c b/src/casefold.c
new file mode 100644
index 0000000..33f18aa
--- /dev/null
+++ b/src/casefold.c
@@ -0,0 +1,2699 @@
+#include <stdlib.h>
+#include <stdio.h>
+
+
+ switch c {
+ case 0x0041:
+ bufpush(0x0061);
+ break;
+ case 0x0042:
+ bufpush(0x0062);
+ break;
+ case 0x0043:
+ bufpush(0x0063);
+ break;
+ case 0x0044:
+ bufpush(0x0064);
+ break;
+ case 0x0045:
+ bufpush(0x0065);
+ break;
+ case 0x0046:
+ bufpush(0x0066);
+ break;
+ case 0x0047:
+ bufpush(0x0067);
+ break;
+ case 0x0048:
+ bufpush(0x0068);
+ break;
+ case 0x0049:
+ bufpush(0x0069);
+ break;
+ case 0x0049:
+ bufpush(0x0131);
+ break;
+ case 0x004A:
+ bufpush(0x006A);
+ break;
+ case 0x004B:
+ bufpush(0x006B);
+ break;
+ case 0x004C:
+ bufpush(0x006C);
+ break;
+ case 0x004D:
+ bufpush(0x006D);
+ break;
+ case 0x004E:
+ bufpush(0x006E);
+ break;
+ case 0x004F:
+ bufpush(0x006F);
+ break;
+ case 0x0050:
+ bufpush(0x0070);
+ break;
+ case 0x0051:
+ bufpush(0x0071);
+ break;
+ case 0x0052:
+ bufpush(0x0072);
+ break;
+ case 0x0053:
+ bufpush(0x0073);
+ break;
+ case 0x0054:
+ bufpush(0x0074);
+ break;
+ case 0x0055:
+ bufpush(0x0075);
+ break;
+ case 0x0056:
+ bufpush(0x0076);
+ break;
+ case 0x0057:
+ bufpush(0x0077);
+ break;
+ case 0x0058:
+ bufpush(0x0078);
+ break;
+ case 0x0059:
+ bufpush(0x0079);
+ break;
+ case 0x005A:
+ bufpush(0x007A);
+ break;
+ case 0x00B5:
+ bufpush(0x03BC);
+ break;
+ case 0x00C0:
+ bufpush(0x00E0);
+ break;
+ case 0x00C1:
+ bufpush(0x00E1);
+ break;
+ case 0x00C2:
+ bufpush(0x00E2);
+ break;
+ case 0x00C3:
+ bufpush(0x00E3);
+ break;
+ case 0x00C4:
+ bufpush(0x00E4);
+ break;
+ case 0x00C5:
+ bufpush(0x00E5);
+ break;
+ case 0x00C6:
+ bufpush(0x00E6);
+ break;
+ case 0x00C7:
+ bufpush(0x00E7);
+ break;
+ case 0x00C8:
+ bufpush(0x00E8);
+ break;
+ case 0x00C9:
+ bufpush(0x00E9);
+ break;
+ case 0x00CA:
+ bufpush(0x00EA);
+ break;
+ case 0x00CB:
+ bufpush(0x00EB);
+ break;
+ case 0x00CC:
+ bufpush(0x00EC);
+ break;
+ case 0x00CD:
+ bufpush(0x00ED);
+ break;
+ case 0x00CE:
+ bufpush(0x00EE);
+ break;
+ case 0x00CF:
+ bufpush(0x00EF);
+ break;
+ case 0x00D0:
+ bufpush(0x00F0);
+ break;
+ case 0x00D1:
+ bufpush(0x00F1);
+ break;
+ case 0x00D2:
+ bufpush(0x00F2);
+ break;
+ case 0x00D3:
+ bufpush(0x00F3);
+ break;
+ case 0x00D4:
+ bufpush(0x00F4);
+ break;
+ case 0x00D5:
+ bufpush(0x00F5);
+ break;
+ case 0x00D6:
+ bufpush(0x00F6);
+ break;
+ case 0x00D8:
+ bufpush(0x00F8);
+ break;
+ case 0x00D9:
+ bufpush(0x00F9);
+ break;
+ case 0x00DA:
+ bufpush(0x00FA);
+ break;
+ case 0x00DB:
+ bufpush(0x00FB);
+ break;
+ case 0x00DC:
+ bufpush(0x00FC);
+ break;
+ case 0x00DD:
+ bufpush(0x00FD);
+ break;
+ case 0x00DE:
+ bufpush(0x00FE);
+ break;
+ case 0x00DF:
+ bufpush(0x0073);
+ bufpush(0x0073);
+ break;
+ case 0x0100:
+ bufpush(0x0101);
+ break;
+ case 0x0102:
+ bufpush(0x0103);
+ break;
+ case 0x0104:
+ bufpush(0x0105);
+ break;
+ case 0x0106:
+ bufpush(0x0107);
+ break;
+ case 0x0108:
+ bufpush(0x0109);
+ break;
+ case 0x010A:
+ bufpush(0x010B);
+ break;
+ case 0x010C:
+ bufpush(0x010D);
+ break;
+ case 0x010E:
+ bufpush(0x010F);
+ break;
+ case 0x0110:
+ bufpush(0x0111);
+ break;
+ case 0x0112:
+ bufpush(0x0113);
+ break;
+ case 0x0114:
+ bufpush(0x0115);
+ break;
+ case 0x0116:
+ bufpush(0x0117);
+ break;
+ case 0x0118:
+ bufpush(0x0119);
+ break;
+ case 0x011A:
+ bufpush(0x011B);
+ break;
+ case 0x011C:
+ bufpush(0x011D);
+ break;
+ case 0x011E:
+ bufpush(0x011F);
+ break;
+ case 0x0120:
+ bufpush(0x0121);
+ break;
+ case 0x0122:
+ bufpush(0x0123);
+ break;
+ case 0x0124:
+ bufpush(0x0125);
+ break;
+ case 0x0126:
+ bufpush(0x0127);
+ break;
+ case 0x0128:
+ bufpush(0x0129);
+ break;
+ case 0x012A:
+ bufpush(0x012B);
+ break;
+ case 0x012C:
+ bufpush(0x012D);
+ break;
+ case 0x012E:
+ bufpush(0x012F);
+ break;
+ case 0x0130:
+ bufpush(0x0069);
+ bufpush(0x0307);
+ break;
+ case 0x0130:
+ bufpush(0x0069);
+ break;
+ case 0x0132:
+ bufpush(0x0133);
+ break;
+ case 0x0134:
+ bufpush(0x0135);
+ break;
+ case 0x0136:
+ bufpush(0x0137);
+ break;
+ case 0x0139:
+ bufpush(0x013A);
+ break;
+ case 0x013B:
+ bufpush(0x013C);
+ break;
+ case 0x013D:
+ bufpush(0x013E);
+ break;
+ case 0x013F:
+ bufpush(0x0140);
+ break;
+ case 0x0141:
+ bufpush(0x0142);
+ break;
+ case 0x0143:
+ bufpush(0x0144);
+ break;
+ case 0x0145:
+ bufpush(0x0146);
+ break;
+ case 0x0147:
+ bufpush(0x0148);
+ break;
+ case 0x0149:
+ bufpush(0x02BC);
+ bufpush(0x006E);
+ break;
+ case 0x014A:
+ bufpush(0x014B);
+ break;
+ case 0x014C:
+ bufpush(0x014D);
+ break;
+ case 0x014E:
+ bufpush(0x014F);
+ break;
+ case 0x0150:
+ bufpush(0x0151);
+ break;
+ case 0x0152:
+ bufpush(0x0153);
+ break;
+ case 0x0154:
+ bufpush(0x0155);
+ break;
+ case 0x0156:
+ bufpush(0x0157);
+ break;
+ case 0x0158:
+ bufpush(0x0159);
+ break;
+ case 0x015A:
+ bufpush(0x015B);
+ break;
+ case 0x015C:
+ bufpush(0x015D);
+ break;
+ case 0x015E:
+ bufpush(0x015F);
+ break;
+ case 0x0160:
+ bufpush(0x0161);
+ break;
+ case 0x0162:
+ bufpush(0x0163);
+ break;
+ case 0x0164:
+ bufpush(0x0165);
+ break;
+ case 0x0166:
+ bufpush(0x0167);
+ break;
+ case 0x0168:
+ bufpush(0x0169);
+ break;
+ case 0x016A:
+ bufpush(0x016B);
+ break;
+ case 0x016C:
+ bufpush(0x016D);
+ break;
+ case 0x016E:
+ bufpush(0x016F);
+ break;
+ case 0x0170:
+ bufpush(0x0171);
+ break;
+ case 0x0172:
+ bufpush(0x0173);
+ break;
+ case 0x0174:
+ bufpush(0x0175);
+ break;
+ case 0x0176:
+ bufpush(0x0177);
+ break;
+ case 0x0178:
+ bufpush(0x00FF);
+ break;
+ case 0x0179:
+ bufpush(0x017A);
+ break;
+ case 0x017B:
+ bufpush(0x017C);
+ break;
+ case 0x017D:
+ bufpush(0x017E);
+ break;
+ case 0x017F:
+ bufpush(0x0073);
+ break;
+ case 0x0181:
+ bufpush(0x0253);
+ break;
+ case 0x0182:
+ bufpush(0x0183);
+ break;
+ case 0x0184:
+ bufpush(0x0185);
+ break;
+ case 0x0186:
+ bufpush(0x0254);
+ break;
+ case 0x0187:
+ bufpush(0x0188);
+ break;
+ case 0x0189:
+ bufpush(0x0256);
+ break;
+ case 0x018A:
+ bufpush(0x0257);
+ break;
+ case 0x018B:
+ bufpush(0x018C);
+ break;
+ case 0x018E:
+ bufpush(0x01DD);
+ break;
+ case 0x018F:
+ bufpush(0x0259);
+ break;
+ case 0x0190:
+ bufpush(0x025B);
+ break;
+ case 0x0191:
+ bufpush(0x0192);
+ break;
+ case 0x0193:
+ bufpush(0x0260);
+ break;
+ case 0x0194:
+ bufpush(0x0263);
+ break;
+ case 0x0196:
+ bufpush(0x0269);
+ break;
+ case 0x0197:
+ bufpush(0x0268);
+ break;
+ case 0x0198:
+ bufpush(0x0199);
+ break;
+ case 0x019C:
+ bufpush(0x026F);
+ break;
+ case 0x019D:
+ bufpush(0x0272);
+ break;
+ case 0x019F:
+ bufpush(0x0275);
+ break;
+ case 0x01A0:
+ bufpush(0x01A1);
+ break;
+ case 0x01A2:
+ bufpush(0x01A3);
+ break;
+ case 0x01A4:
+ bufpush(0x01A5);
+ break;
+ case 0x01A6:
+ bufpush(0x0280);
+ break;
+ case 0x01A7:
+ bufpush(0x01A8);
+ break;
+ case 0x01A9:
+ bufpush(0x0283);
+ break;
+ case 0x01AC:
+ bufpush(0x01AD);
+ break;
+ case 0x01AE:
+ bufpush(0x0288);
+ break;
+ case 0x01AF:
+ bufpush(0x01B0);
+ break;
+ case 0x01B1:
+ bufpush(0x028A);
+ break;
+ case 0x01B2:
+ bufpush(0x028B);
+ break;
+ case 0x01B3:
+ bufpush(0x01B4);
+ break;
+ case 0x01B5:
+ bufpush(0x01B6);
+ break;
+ case 0x01B7:
+ bufpush(0x0292);
+ break;
+ case 0x01B8:
+ bufpush(0x01B9);
+ break;
+ case 0x01BC:
+ bufpush(0x01BD);
+ break;
+ case 0x01C4:
+ bufpush(0x01C6);
+ break;
+ case 0x01C5:
+ bufpush(0x01C6);
+ break;
+ case 0x01C7:
+ bufpush(0x01C9);
+ break;
+ case 0x01C8:
+ bufpush(0x01C9);
+ break;
+ case 0x01CA:
+ bufpush(0x01CC);
+ break;
+ case 0x01CB:
+ bufpush(0x01CC);
+ break;
+ case 0x01CD:
+ bufpush(0x01CE);
+ break;
+ case 0x01CF:
+ bufpush(0x01D0);
+ break;
+ case 0x01D1:
+ bufpush(0x01D2);
+ break;
+ case 0x01D3:
+ bufpush(0x01D4);
+ break;
+ case 0x01D5:
+ bufpush(0x01D6);
+ break;
+ case 0x01D7:
+ bufpush(0x01D8);
+ break;
+ case 0x01D9:
+ bufpush(0x01DA);
+ break;
+ case 0x01DB:
+ bufpush(0x01DC);
+ break;
+ case 0x01DE:
+ bufpush(0x01DF);
+ break;
+ case 0x01E0:
+ bufpush(0x01E1);
+ break;
+ case 0x01E2:
+ bufpush(0x01E3);
+ break;
+ case 0x01E4:
+ bufpush(0x01E5);
+ break;
+ case 0x01E6:
+ bufpush(0x01E7);
+ break;
+ case 0x01E8:
+ bufpush(0x01E9);
+ break;
+ case 0x01EA:
+ bufpush(0x01EB);
+ break;
+ case 0x01EC:
+ bufpush(0x01ED);
+ break;
+ case 0x01EE:
+ bufpush(0x01EF);
+ break;
+ case 0x01F0:
+ bufpush(0x006A);
+ bufpush(0x030C);
+ break;
+ case 0x01F1:
+ bufpush(0x01F3);
+ break;
+ case 0x01F2:
+ bufpush(0x01F3);
+ break;
+ case 0x01F4:
+ bufpush(0x01F5);
+ break;
+ case 0x01F6:
+ bufpush(0x0195);
+ break;
+ case 0x01F7:
+ bufpush(0x01BF);
+ break;
+ case 0x01F8:
+ bufpush(0x01F9);
+ break;
+ case 0x01FA:
+ bufpush(0x01FB);
+ break;
+ case 0x01FC:
+ bufpush(0x01FD);
+ break;
+ case 0x01FE:
+ bufpush(0x01FF);
+ break;
+ case 0x0200:
+ bufpush(0x0201);
+ break;
+ case 0x0202:
+ bufpush(0x0203);
+ break;
+ case 0x0204:
+ bufpush(0x0205);
+ break;
+ case 0x0206:
+ bufpush(0x0207);
+ break;
+ case 0x0208:
+ bufpush(0x0209);
+ break;
+ case 0x020A:
+ bufpush(0x020B);
+ break;
+ case 0x020C:
+ bufpush(0x020D);
+ break;
+ case 0x020E:
+ bufpush(0x020F);
+ break;
+ case 0x0210:
+ bufpush(0x0211);
+ break;
+ case 0x0212:
+ bufpush(0x0213);
+ break;
+ case 0x0214:
+ bufpush(0x0215);
+ break;
+ case 0x0216:
+ bufpush(0x0217);
+ break;
+ case 0x0218:
+ bufpush(0x0219);
+ break;
+ case 0x021A:
+ bufpush(0x021B);
+ break;
+ case 0x021C:
+ bufpush(0x021D);
+ break;
+ case 0x021E:
+ bufpush(0x021F);
+ break;
+ case 0x0220:
+ bufpush(0x019E);
+ break;
+ case 0x0222:
+ bufpush(0x0223);
+ break;
+ case 0x0224:
+ bufpush(0x0225);
+ break;
+ case 0x0226:
+ bufpush(0x0227);
+ break;
+ case 0x0228:
+ bufpush(0x0229);
+ break;
+ case 0x022A:
+ bufpush(0x022B);
+ break;
+ case 0x022C:
+ bufpush(0x022D);
+ break;
+ case 0x022E:
+ bufpush(0x022F);
+ break;
+ case 0x0230:
+ bufpush(0x0231);
+ break;
+ case 0x0232:
+ bufpush(0x0233);
+ break;
+ case 0x0345:
+ bufpush(0x03B9);
+ break;
+ case 0x0386:
+ bufpush(0x03AC);
+ break;
+ case 0x0388:
+ bufpush(0x03AD);
+ break;
+ case 0x0389:
+ bufpush(0x03AE);
+ break;
+ case 0x038A:
+ bufpush(0x03AF);
+ break;
+ case 0x038C:
+ bufpush(0x03CC);
+ break;
+ case 0x038E:
+ bufpush(0x03CD);
+ break;
+ case 0x038F:
+ bufpush(0x03CE);
+ break;
+ case 0x0390:
+ bufpush(0x03B9);
+ bufpush(0x0308);
+ bufpush(0x0301);
+ break;
+ case 0x0391:
+ bufpush(0x03B1);
+ break;
+ case 0x0392:
+ bufpush(0x03B2);
+ break;
+ case 0x0393:
+ bufpush(0x03B3);
+ break;
+ case 0x0394:
+ bufpush(0x03B4);
+ break;
+ case 0x0395:
+ bufpush(0x03B5);
+ break;
+ case 0x0396:
+ bufpush(0x03B6);
+ break;
+ case 0x0397:
+ bufpush(0x03B7);
+ break;
+ case 0x0398:
+ bufpush(0x03B8);
+ break;
+ case 0x0399:
+ bufpush(0x03B9);
+ break;
+ case 0x039A:
+ bufpush(0x03BA);
+ break;
+ case 0x039B:
+ bufpush(0x03BB);
+ break;
+ case 0x039C:
+ bufpush(0x03BC);
+ break;
+ case 0x039D:
+ bufpush(0x03BD);
+ break;
+ case 0x039E:
+ bufpush(0x03BE);
+ break;
+ case 0x039F:
+ bufpush(0x03BF);
+ break;
+ case 0x03A0:
+ bufpush(0x03C0);
+ break;
+ case 0x03A1:
+ bufpush(0x03C1);
+ break;
+ case 0x03A3:
+ bufpush(0x03C3);
+ break;
+ case 0x03A4:
+ bufpush(0x03C4);
+ break;
+ case 0x03A5:
+ bufpush(0x03C5);
+ break;
+ case 0x03A6:
+ bufpush(0x03C6);
+ break;
+ case 0x03A7:
+ bufpush(0x03C7);
+ break;
+ case 0x03A8:
+ bufpush(0x03C8);
+ break;
+ case 0x03A9:
+ bufpush(0x03C9);
+ break;
+ case 0x03AA:
+ bufpush(0x03CA);
+ break;
+ case 0x03AB:
+ bufpush(0x03CB);
+ break;
+ case 0x03B0:
+ bufpush(0x03C5);
+ bufpush(0x0308);
+ bufpush(0x0301);
+ break;
+ case 0x03C2:
+ bufpush(0x03C3);
+ break;
+ case 0x03D0:
+ bufpush(0x03B2);
+ break;
+ case 0x03D1:
+ bufpush(0x03B8);
+ break;
+ case 0x03D5:
+ bufpush(0x03C6);
+ break;
+ case 0x03D6:
+ bufpush(0x03C0);
+ break;
+ case 0x03D8:
+ bufpush(0x03D9);
+ break;
+ case 0x03DA:
+ bufpush(0x03DB);
+ break;
+ case 0x03DC:
+ bufpush(0x03DD);
+ break;
+ case 0x03DE:
+ bufpush(0x03DF);
+ break;
+ case 0x03E0:
+ bufpush(0x03E1);
+ break;
+ case 0x03E2:
+ bufpush(0x03E3);
+ break;
+ case 0x03E4:
+ bufpush(0x03E5);
+ break;
+ case 0x03E6:
+ bufpush(0x03E7);
+ break;
+ case 0x03E8:
+ bufpush(0x03E9);
+ break;
+ case 0x03EA:
+ bufpush(0x03EB);
+ break;
+ case 0x03EC:
+ bufpush(0x03ED);
+ break;
+ case 0x03EE:
+ bufpush(0x03EF);
+ break;
+ case 0x03F0:
+ bufpush(0x03BA);
+ break;
+ case 0x03F1:
+ bufpush(0x03C1);
+ break;
+ case 0x03F2:
+ bufpush(0x03C3);
+ break;
+ case 0x03F4:
+ bufpush(0x03B8);
+ break;
+ case 0x03F5:
+ bufpush(0x03B5);
+ break;
+ case 0x0400:
+ bufpush(0x0450);
+ break;
+ case 0x0401:
+ bufpush(0x0451);
+ break;
+ case 0x0402:
+ bufpush(0x0452);
+ break;
+ case 0x0403:
+ bufpush(0x0453);
+ break;
+ case 0x0404:
+ bufpush(0x0454);
+ break;
+ case 0x0405:
+ bufpush(0x0455);
+ break;
+ case 0x0406:
+ bufpush(0x0456);
+ break;
+ case 0x0407:
+ bufpush(0x0457);
+ break;
+ case 0x0408:
+ bufpush(0x0458);
+ break;
+ case 0x0409:
+ bufpush(0x0459);
+ break;
+ case 0x040A:
+ bufpush(0x045A);
+ break;
+ case 0x040B:
+ bufpush(0x045B);
+ break;
+ case 0x040C:
+ bufpush(0x045C);
+ break;
+ case 0x040D:
+ bufpush(0x045D);
+ break;
+ case 0x040E:
+ bufpush(0x045E);
+ break;
+ case 0x040F:
+ bufpush(0x045F);
+ break;
+ case 0x0410:
+ bufpush(0x0430);
+ break;
+ case 0x0411:
+ bufpush(0x0431);
+ break;
+ case 0x0412:
+ bufpush(0x0432);
+ break;
+ case 0x0413:
+ bufpush(0x0433);
+ break;
+ case 0x0414:
+ bufpush(0x0434);
+ break;
+ case 0x0415:
+ bufpush(0x0435);
+ break;
+ case 0x0416:
+ bufpush(0x0436);
+ break;
+ case 0x0417:
+ bufpush(0x0437);
+ break;
+ case 0x0418:
+ bufpush(0x0438);
+ break;
+ case 0x0419:
+ bufpush(0x0439);
+ break;
+ case 0x041A:
+ bufpush(0x043A);
+ break;
+ case 0x041B:
+ bufpush(0x043B);
+ break;
+ case 0x041C:
+ bufpush(0x043C);
+ break;
+ case 0x041D:
+ bufpush(0x043D);
+ break;
+ case 0x041E:
+ bufpush(0x043E);
+ break;
+ case 0x041F:
+ bufpush(0x043F);
+ break;
+ case 0x0420:
+ bufpush(0x0440);
+ break;
+ case 0x0421:
+ bufpush(0x0441);
+ break;
+ case 0x0422:
+ bufpush(0x0442);
+ break;
+ case 0x0423:
+ bufpush(0x0443);
+ break;
+ case 0x0424:
+ bufpush(0x0444);
+ break;
+ case 0x0425:
+ bufpush(0x0445);
+ break;
+ case 0x0426:
+ bufpush(0x0446);
+ break;
+ case 0x0427:
+ bufpush(0x0447);
+ break;
+ case 0x0428:
+ bufpush(0x0448);
+ break;
+ case 0x0429:
+ bufpush(0x0449);
+ break;
+ case 0x042A:
+ bufpush(0x044A);
+ break;
+ case 0x042B:
+ bufpush(0x044B);
+ break;
+ case 0x042C:
+ bufpush(0x044C);
+ break;
+ case 0x042D:
+ bufpush(0x044D);
+ break;
+ case 0x042E:
+ bufpush(0x044E);
+ break;
+ case 0x042F:
+ bufpush(0x044F);
+ break;
+ case 0x0460:
+ bufpush(0x0461);
+ break;
+ case 0x0462:
+ bufpush(0x0463);
+ break;
+ case 0x0464:
+ bufpush(0x0465);
+ break;
+ case 0x0466:
+ bufpush(0x0467);
+ break;
+ case 0x0468:
+ bufpush(0x0469);
+ break;
+ case 0x046A:
+ bufpush(0x046B);
+ break;
+ case 0x046C:
+ bufpush(0x046D);
+ break;
+ case 0x046E:
+ bufpush(0x046F);
+ break;
+ case 0x0470:
+ bufpush(0x0471);
+ break;
+ case 0x0472:
+ bufpush(0x0473);
+ break;
+ case 0x0474:
+ bufpush(0x0475);
+ break;
+ case 0x0476:
+ bufpush(0x0477);
+ break;
+ case 0x0478:
+ bufpush(0x0479);
+ break;
+ case 0x047A:
+ bufpush(0x047B);
+ break;
+ case 0x047C:
+ bufpush(0x047D);
+ break;
+ case 0x047E:
+ bufpush(0x047F);
+ break;
+ case 0x0480:
+ bufpush(0x0481);
+ break;
+ case 0x048A:
+ bufpush(0x048B);
+ break;
+ case 0x048C:
+ bufpush(0x048D);
+ break;
+ case 0x048E:
+ bufpush(0x048F);
+ break;
+ case 0x0490:
+ bufpush(0x0491);
+ break;
+ case 0x0492:
+ bufpush(0x0493);
+ break;
+ case 0x0494:
+ bufpush(0x0495);
+ break;
+ case 0x0496:
+ bufpush(0x0497);
+ break;
+ case 0x0498:
+ bufpush(0x0499);
+ break;
+ case 0x049A:
+ bufpush(0x049B);
+ break;
+ case 0x049C:
+ bufpush(0x049D);
+ break;
+ case 0x049E:
+ bufpush(0x049F);
+ break;
+ case 0x04A0:
+ bufpush(0x04A1);
+ break;
+ case 0x04A2:
+ bufpush(0x04A3);
+ break;
+ case 0x04A4:
+ bufpush(0x04A5);
+ break;
+ case 0x04A6:
+ bufpush(0x04A7);
+ break;
+ case 0x04A8:
+ bufpush(0x04A9);
+ break;
+ case 0x04AA:
+ bufpush(0x04AB);
+ break;
+ case 0x04AC:
+ bufpush(0x04AD);
+ break;
+ case 0x04AE:
+ bufpush(0x04AF);
+ break;
+ case 0x04B0:
+ bufpush(0x04B1);
+ break;
+ case 0x04B2:
+ bufpush(0x04B3);
+ break;
+ case 0x04B4:
+ bufpush(0x04B5);
+ break;
+ case 0x04B6:
+ bufpush(0x04B7);
+ break;
+ case 0x04B8:
+ bufpush(0x04B9);
+ break;
+ case 0x04BA:
+ bufpush(0x04BB);
+ break;
+ case 0x04BC:
+ bufpush(0x04BD);
+ break;
+ case 0x04BE:
+ bufpush(0x04BF);
+ break;
+ case 0x04C1:
+ bufpush(0x04C2);
+ break;
+ case 0x04C3:
+ bufpush(0x04C4);
+ break;
+ case 0x04C5:
+ bufpush(0x04C6);
+ break;
+ case 0x04C7:
+ bufpush(0x04C8);
+ break;
+ case 0x04C9:
+ bufpush(0x04CA);
+ break;
+ case 0x04CB:
+ bufpush(0x04CC);
+ break;
+ case 0x04CD:
+ bufpush(0x04CE);
+ break;
+ case 0x04D0:
+ bufpush(0x04D1);
+ break;
+ case 0x04D2:
+ bufpush(0x04D3);
+ break;
+ case 0x04D4:
+ bufpush(0x04D5);
+ break;
+ case 0x04D6:
+ bufpush(0x04D7);
+ break;
+ case 0x04D8:
+ bufpush(0x04D9);
+ break;
+ case 0x04DA:
+ bufpush(0x04DB);
+ break;
+ case 0x04DC:
+ bufpush(0x04DD);
+ break;
+ case 0x04DE:
+ bufpush(0x04DF);
+ break;
+ case 0x04E0:
+ bufpush(0x04E1);
+ break;
+ case 0x04E2:
+ bufpush(0x04E3);
+ break;
+ case 0x04E4:
+ bufpush(0x04E5);
+ break;
+ case 0x04E6:
+ bufpush(0x04E7);
+ break;
+ case 0x04E8:
+ bufpush(0x04E9);
+ break;
+ case 0x04EA:
+ bufpush(0x04EB);
+ break;
+ case 0x04EC:
+ bufpush(0x04ED);
+ break;
+ case 0x04EE:
+ bufpush(0x04EF);
+ break;
+ case 0x04F0:
+ bufpush(0x04F1);
+ break;
+ case 0x04F2:
+ bufpush(0x04F3);
+ break;
+ case 0x04F4:
+ bufpush(0x04F5);
+ break;
+ case 0x04F8:
+ bufpush(0x04F9);
+ break;
+ case 0x0500:
+ bufpush(0x0501);
+ break;
+ case 0x0502:
+ bufpush(0x0503);
+ break;
+ case 0x0504:
+ bufpush(0x0505);
+ break;
+ case 0x0506:
+ bufpush(0x0507);
+ break;
+ case 0x0508:
+ bufpush(0x0509);
+ break;
+ case 0x050A:
+ bufpush(0x050B);
+ break;
+ case 0x050C:
+ bufpush(0x050D);
+ break;
+ case 0x050E:
+ bufpush(0x050F);
+ break;
+ case 0x0531:
+ bufpush(0x0561);
+ break;
+ case 0x0532:
+ bufpush(0x0562);
+ break;
+ case 0x0533:
+ bufpush(0x0563);
+ break;
+ case 0x0534:
+ bufpush(0x0564);
+ break;
+ case 0x0535:
+ bufpush(0x0565);
+ break;
+ case 0x0536:
+ bufpush(0x0566);
+ break;
+ case 0x0537:
+ bufpush(0x0567);
+ break;
+ case 0x0538:
+ bufpush(0x0568);
+ break;
+ case 0x0539:
+ bufpush(0x0569);
+ break;
+ case 0x053A:
+ bufpush(0x056A);
+ break;
+ case 0x053B:
+ bufpush(0x056B);
+ break;
+ case 0x053C:
+ bufpush(0x056C);
+ break;
+ case 0x053D:
+ bufpush(0x056D);
+ break;
+ case 0x053E:
+ bufpush(0x056E);
+ break;
+ case 0x053F:
+ bufpush(0x056F);
+ break;
+ case 0x0540:
+ bufpush(0x0570);
+ break;
+ case 0x0541:
+ bufpush(0x0571);
+ break;
+ case 0x0542:
+ bufpush(0x0572);
+ break;
+ case 0x0543:
+ bufpush(0x0573);
+ break;
+ case 0x0544:
+ bufpush(0x0574);
+ break;
+ case 0x0545:
+ bufpush(0x0575);
+ break;
+ case 0x0546:
+ bufpush(0x0576);
+ break;
+ case 0x0547:
+ bufpush(0x0577);
+ break;
+ case 0x0548:
+ bufpush(0x0578);
+ break;
+ case 0x0549:
+ bufpush(0x0579);
+ break;
+ case 0x054A:
+ bufpush(0x057A);
+ break;
+ case 0x054B:
+ bufpush(0x057B);
+ break;
+ case 0x054C:
+ bufpush(0x057C);
+ break;
+ case 0x054D:
+ bufpush(0x057D);
+ break;
+ case 0x054E:
+ bufpush(0x057E);
+ break;
+ case 0x054F:
+ bufpush(0x057F);
+ break;
+ case 0x0550:
+ bufpush(0x0580);
+ break;
+ case 0x0551:
+ bufpush(0x0581);
+ break;
+ case 0x0552:
+ bufpush(0x0582);
+ break;
+ case 0x0553:
+ bufpush(0x0583);
+ break;
+ case 0x0554:
+ bufpush(0x0584);
+ break;
+ case 0x0555:
+ bufpush(0x0585);
+ break;
+ case 0x0556:
+ bufpush(0x0586);
+ break;
+ case 0x0587:
+ bufpush(0x0565);
+ bufpush(0x0582);
+ break;
+ case 0x1E00:
+ bufpush(0x1E01);
+ break;
+ case 0x1E02:
+ bufpush(0x1E03);
+ break;
+ case 0x1E04:
+ bufpush(0x1E05);
+ break;
+ case 0x1E06:
+ bufpush(0x1E07);
+ break;
+ case 0x1E08:
+ bufpush(0x1E09);
+ break;
+ case 0x1E0A:
+ bufpush(0x1E0B);
+ break;
+ case 0x1E0C:
+ bufpush(0x1E0D);
+ break;
+ case 0x1E0E:
+ bufpush(0x1E0F);
+ break;
+ case 0x1E10:
+ bufpush(0x1E11);
+ break;
+ case 0x1E12:
+ bufpush(0x1E13);
+ break;
+ case 0x1E14:
+ bufpush(0x1E15);
+ break;
+ case 0x1E16:
+ bufpush(0x1E17);
+ break;
+ case 0x1E18:
+ bufpush(0x1E19);
+ break;
+ case 0x1E1A:
+ bufpush(0x1E1B);
+ break;
+ case 0x1E1C:
+ bufpush(0x1E1D);
+ break;
+ case 0x1E1E:
+ bufpush(0x1E1F);
+ break;
+ case 0x1E20:
+ bufpush(0x1E21);
+ break;
+ case 0x1E22:
+ bufpush(0x1E23);
+ break;
+ case 0x1E24:
+ bufpush(0x1E25);
+ break;
+ case 0x1E26:
+ bufpush(0x1E27);
+ break;
+ case 0x1E28:
+ bufpush(0x1E29);
+ break;
+ case 0x1E2A:
+ bufpush(0x1E2B);
+ break;
+ case 0x1E2C:
+ bufpush(0x1E2D);
+ break;
+ case 0x1E2E:
+ bufpush(0x1E2F);
+ break;
+ case 0x1E30:
+ bufpush(0x1E31);
+ break;
+ case 0x1E32:
+ bufpush(0x1E33);
+ break;
+ case 0x1E34:
+ bufpush(0x1E35);
+ break;
+ case 0x1E36:
+ bufpush(0x1E37);
+ break;
+ case 0x1E38:
+ bufpush(0x1E39);
+ break;
+ case 0x1E3A:
+ bufpush(0x1E3B);
+ break;
+ case 0x1E3C:
+ bufpush(0x1E3D);
+ break;
+ case 0x1E3E:
+ bufpush(0x1E3F);
+ break;
+ case 0x1E40:
+ bufpush(0x1E41);
+ break;
+ case 0x1E42:
+ bufpush(0x1E43);
+ break;
+ case 0x1E44:
+ bufpush(0x1E45);
+ break;
+ case 0x1E46:
+ bufpush(0x1E47);
+ break;
+ case 0x1E48:
+ bufpush(0x1E49);
+ break;
+ case 0x1E4A:
+ bufpush(0x1E4B);
+ break;
+ case 0x1E4C:
+ bufpush(0x1E4D);
+ break;
+ case 0x1E4E:
+ bufpush(0x1E4F);
+ break;
+ case 0x1E50:
+ bufpush(0x1E51);
+ break;
+ case 0x1E52:
+ bufpush(0x1E53);
+ break;
+ case 0x1E54:
+ bufpush(0x1E55);
+ break;
+ case 0x1E56:
+ bufpush(0x1E57);
+ break;
+ case 0x1E58:
+ bufpush(0x1E59);
+ break;
+ case 0x1E5A:
+ bufpush(0x1E5B);
+ break;
+ case 0x1E5C:
+ bufpush(0x1E5D);
+ break;
+ case 0x1E5E:
+ bufpush(0x1E5F);
+ break;
+ case 0x1E60:
+ bufpush(0x1E61);
+ break;
+ case 0x1E62:
+ bufpush(0x1E63);
+ break;
+ case 0x1E64:
+ bufpush(0x1E65);
+ break;
+ case 0x1E66:
+ bufpush(0x1E67);
+ break;
+ case 0x1E68:
+ bufpush(0x1E69);
+ break;
+ case 0x1E6A:
+ bufpush(0x1E6B);
+ break;
+ case 0x1E6C:
+ bufpush(0x1E6D);
+ break;
+ case 0x1E6E:
+ bufpush(0x1E6F);
+ break;
+ case 0x1E70:
+ bufpush(0x1E71);
+ break;
+ case 0x1E72:
+ bufpush(0x1E73);
+ break;
+ case 0x1E74:
+ bufpush(0x1E75);
+ break;
+ case 0x1E76:
+ bufpush(0x1E77);
+ break;
+ case 0x1E78:
+ bufpush(0x1E79);
+ break;
+ case 0x1E7A:
+ bufpush(0x1E7B);
+ break;
+ case 0x1E7C:
+ bufpush(0x1E7D);
+ break;
+ case 0x1E7E:
+ bufpush(0x1E7F);
+ break;
+ case 0x1E80:
+ bufpush(0x1E81);
+ break;
+ case 0x1E82:
+ bufpush(0x1E83);
+ break;
+ case 0x1E84:
+ bufpush(0x1E85);
+ break;
+ case 0x1E86:
+ bufpush(0x1E87);
+ break;
+ case 0x1E88:
+ bufpush(0x1E89);
+ break;
+ case 0x1E8A:
+ bufpush(0x1E8B);
+ break;
+ case 0x1E8C:
+ bufpush(0x1E8D);
+ break;
+ case 0x1E8E:
+ bufpush(0x1E8F);
+ break;
+ case 0x1E90:
+ bufpush(0x1E91);
+ break;
+ case 0x1E92:
+ bufpush(0x1E93);
+ break;
+ case 0x1E94:
+ bufpush(0x1E95);
+ break;
+ case 0x1E96:
+ bufpush(0x0068);
+ bufpush(0x0331);
+ break;
+ case 0x1E97:
+ bufpush(0x0074);
+ bufpush(0x0308);
+ break;
+ case 0x1E98:
+ bufpush(0x0077);
+ bufpush(0x030A);
+ break;
+ case 0x1E99:
+ bufpush(0x0079);
+ bufpush(0x030A);
+ break;
+ case 0x1E9A:
+ bufpush(0x0061);
+ bufpush(0x02BE);
+ break;
+ case 0x1E9B:
+ bufpush(0x1E61);
+ break;
+ case 0x1EA0:
+ bufpush(0x1EA1);
+ break;
+ case 0x1EA2:
+ bufpush(0x1EA3);
+ break;
+ case 0x1EA4:
+ bufpush(0x1EA5);
+ break;
+ case 0x1EA6:
+ bufpush(0x1EA7);
+ break;
+ case 0x1EA8:
+ bufpush(0x1EA9);
+ break;
+ case 0x1EAA:
+ bufpush(0x1EAB);
+ break;
+ case 0x1EAC:
+ bufpush(0x1EAD);
+ break;
+ case 0x1EAE:
+ bufpush(0x1EAF);
+ break;
+ case 0x1EB0:
+ bufpush(0x1EB1);
+ break;
+ case 0x1EB2:
+ bufpush(0x1EB3);
+ break;
+ case 0x1EB4:
+ bufpush(0x1EB5);
+ break;
+ case 0x1EB6:
+ bufpush(0x1EB7);
+ break;
+ case 0x1EB8:
+ bufpush(0x1EB9);
+ break;
+ case 0x1EBA:
+ bufpush(0x1EBB);
+ break;
+ case 0x1EBC:
+ bufpush(0x1EBD);
+ break;
+ case 0x1EBE:
+ bufpush(0x1EBF);
+ break;
+ case 0x1EC0:
+ bufpush(0x1EC1);
+ break;
+ case 0x1EC2:
+ bufpush(0x1EC3);
+ break;
+ case 0x1EC4:
+ bufpush(0x1EC5);
+ break;
+ case 0x1EC6:
+ bufpush(0x1EC7);
+ break;
+ case 0x1EC8:
+ bufpush(0x1EC9);
+ break;
+ case 0x1ECA:
+ bufpush(0x1ECB);
+ break;
+ case 0x1ECC:
+ bufpush(0x1ECD);
+ break;
+ case 0x1ECE:
+ bufpush(0x1ECF);
+ break;
+ case 0x1ED0:
+ bufpush(0x1ED1);
+ break;
+ case 0x1ED2:
+ bufpush(0x1ED3);
+ break;
+ case 0x1ED4:
+ bufpush(0x1ED5);
+ break;
+ case 0x1ED6:
+ bufpush(0x1ED7);
+ break;
+ case 0x1ED8:
+ bufpush(0x1ED9);
+ break;
+ case 0x1EDA:
+ bufpush(0x1EDB);
+ break;
+ case 0x1EDC:
+ bufpush(0x1EDD);
+ break;
+ case 0x1EDE:
+ bufpush(0x1EDF);
+ break;
+ case 0x1EE0:
+ bufpush(0x1EE1);
+ break;
+ case 0x1EE2:
+ bufpush(0x1EE3);
+ break;
+ case 0x1EE4:
+ bufpush(0x1EE5);
+ break;
+ case 0x1EE6:
+ bufpush(0x1EE7);
+ break;
+ case 0x1EE8:
+ bufpush(0x1EE9);
+ break;
+ case 0x1EEA:
+ bufpush(0x1EEB);
+ break;
+ case 0x1EEC:
+ bufpush(0x1EED);
+ break;
+ case 0x1EEE:
+ bufpush(0x1EEF);
+ break;
+ case 0x1EF0:
+ bufpush(0x1EF1);
+ break;
+ case 0x1EF2:
+ bufpush(0x1EF3);
+ break;
+ case 0x1EF4:
+ bufpush(0x1EF5);
+ break;
+ case 0x1EF6:
+ bufpush(0x1EF7);
+ break;
+ case 0x1EF8:
+ bufpush(0x1EF9);
+ break;
+ case 0x1F08:
+ bufpush(0x1F00);
+ break;
+ case 0x1F09:
+ bufpush(0x1F01);
+ break;
+ case 0x1F0A:
+ bufpush(0x1F02);
+ break;
+ case 0x1F0B:
+ bufpush(0x1F03);
+ break;
+ case 0x1F0C:
+ bufpush(0x1F04);
+ break;
+ case 0x1F0D:
+ bufpush(0x1F05);
+ break;
+ case 0x1F0E:
+ bufpush(0x1F06);
+ break;
+ case 0x1F0F:
+ bufpush(0x1F07);
+ break;
+ case 0x1F18:
+ bufpush(0x1F10);
+ break;
+ case 0x1F19:
+ bufpush(0x1F11);
+ break;
+ case 0x1F1A:
+ bufpush(0x1F12);
+ break;
+ case 0x1F1B:
+ bufpush(0x1F13);
+ break;
+ case 0x1F1C:
+ bufpush(0x1F14);
+ break;
+ case 0x1F1D:
+ bufpush(0x1F15);
+ break;
+ case 0x1F28:
+ bufpush(0x1F20);
+ break;
+ case 0x1F29:
+ bufpush(0x1F21);
+ break;
+ case 0x1F2A:
+ bufpush(0x1F22);
+ break;
+ case 0x1F2B:
+ bufpush(0x1F23);
+ break;
+ case 0x1F2C:
+ bufpush(0x1F24);
+ break;
+ case 0x1F2D:
+ bufpush(0x1F25);
+ break;
+ case 0x1F2E:
+ bufpush(0x1F26);
+ break;
+ case 0x1F2F:
+ bufpush(0x1F27);
+ break;
+ case 0x1F38:
+ bufpush(0x1F30);
+ break;
+ case 0x1F39:
+ bufpush(0x1F31);
+ break;
+ case 0x1F3A:
+ bufpush(0x1F32);
+ break;
+ case 0x1F3B:
+ bufpush(0x1F33);
+ break;
+ case 0x1F3C:
+ bufpush(0x1F34);
+ break;
+ case 0x1F3D:
+ bufpush(0x1F35);
+ break;
+ case 0x1F3E:
+ bufpush(0x1F36);
+ break;
+ case 0x1F3F:
+ bufpush(0x1F37);
+ break;
+ case 0x1F48:
+ bufpush(0x1F40);
+ break;
+ case 0x1F49:
+ bufpush(0x1F41);
+ break;
+ case 0x1F4A:
+ bufpush(0x1F42);
+ break;
+ case 0x1F4B:
+ bufpush(0x1F43);
+ break;
+ case 0x1F4C:
+ bufpush(0x1F44);
+ break;
+ case 0x1F4D:
+ bufpush(0x1F45);
+ break;
+ case 0x1F50:
+ bufpush(0x03C5);
+ bufpush(0x0313);
+ break;
+ case 0x1F52:
+ bufpush(0x03C5);
+ bufpush(0x0313);
+ bufpush(0x0300);
+ break;
+ case 0x1F54:
+ bufpush(0x03C5);
+ bufpush(0x0313);
+ bufpush(0x0301);
+ break;
+ case 0x1F56:
+ bufpush(0x03C5);
+ bufpush(0x0313);
+ bufpush(0x0342);
+ break;
+ case 0x1F59:
+ bufpush(0x1F51);
+ break;
+ case 0x1F5B:
+ bufpush(0x1F53);
+ break;
+ case 0x1F5D:
+ bufpush(0x1F55);
+ break;
+ case 0x1F5F:
+ bufpush(0x1F57);
+ break;
+ case 0x1F68:
+ bufpush(0x1F60);
+ break;
+ case 0x1F69:
+ bufpush(0x1F61);
+ break;
+ case 0x1F6A:
+ bufpush(0x1F62);
+ break;
+ case 0x1F6B:
+ bufpush(0x1F63);
+ break;
+ case 0x1F6C:
+ bufpush(0x1F64);
+ break;
+ case 0x1F6D:
+ bufpush(0x1F65);
+ break;
+ case 0x1F6E:
+ bufpush(0x1F66);
+ break;
+ case 0x1F6F:
+ bufpush(0x1F67);
+ break;
+ case 0x1F80:
+ bufpush(0x1F00);
+ bufpush(0x03B9);
+ break;
+ case 0x1F81:
+ bufpush(0x1F01);
+ bufpush(0x03B9);
+ break;
+ case 0x1F82:
+ bufpush(0x1F02);
+ bufpush(0x03B9);
+ break;
+ case 0x1F83:
+ bufpush(0x1F03);
+ bufpush(0x03B9);
+ break;
+ case 0x1F84:
+ bufpush(0x1F04);
+ bufpush(0x03B9);
+ break;
+ case 0x1F85:
+ bufpush(0x1F05);
+ bufpush(0x03B9);
+ break;
+ case 0x1F86:
+ bufpush(0x1F06);
+ bufpush(0x03B9);
+ break;
+ case 0x1F87:
+ bufpush(0x1F07);
+ bufpush(0x03B9);
+ break;
+ case 0x1F88:
+ bufpush(0x1F00);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1F89:
+ bufpush(0x1F01);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1F8A:
+ bufpush(0x1F02);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1F8B:
+ bufpush(0x1F03);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1F8C:
+ bufpush(0x1F04);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1F8D:
+ bufpush(0x1F05);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1F8E:
+ bufpush(0x1F06);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1F8F:
+ bufpush(0x1F07);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1F90:
+ bufpush(0x1F20);
+ bufpush(0x03B9);
+ break;
+ case 0x1F91:
+ bufpush(0x1F21);
+ bufpush(0x03B9);
+ break;
+ case 0x1F92:
+ bufpush(0x1F22);
+ bufpush(0x03B9);
+ break;
+ case 0x1F93:
+ bufpush(0x1F23);
+ bufpush(0x03B9);
+ break;
+ case 0x1F94:
+ bufpush(0x1F24);
+ bufpush(0x03B9);
+ break;
+ case 0x1F95:
+ bufpush(0x1F25);
+ bufpush(0x03B9);
+ break;
+ case 0x1F96:
+ bufpush(0x1F26);
+ bufpush(0x03B9);
+ break;
+ case 0x1F97:
+ bufpush(0x1F27);
+ bufpush(0x03B9);
+ break;
+ case 0x1F98:
+ bufpush(0x1F20);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1F99:
+ bufpush(0x1F21);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1F9A:
+ bufpush(0x1F22);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1F9B:
+ bufpush(0x1F23);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1F9C:
+ bufpush(0x1F24);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1F9D:
+ bufpush(0x1F25);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1F9E:
+ bufpush(0x1F26);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1F9F:
+ bufpush(0x1F27);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1FA0:
+ bufpush(0x1F60);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA1:
+ bufpush(0x1F61);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA2:
+ bufpush(0x1F62);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA3:
+ bufpush(0x1F63);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA4:
+ bufpush(0x1F64);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA5:
+ bufpush(0x1F65);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA6:
+ bufpush(0x1F66);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA7:
+ bufpush(0x1F67);
+ bufpush(0x03B9);
+ break;
+ case 0x1FA8:
+ bufpush(0x1F60);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1FA9:
+ bufpush(0x1F61);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1FAA:
+ bufpush(0x1F62);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1FAB:
+ bufpush(0x1F63);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1FAC:
+ bufpush(0x1F64);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1FAD:
+ bufpush(0x1F65);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1FAE:
+ bufpush(0x1F66);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1FAF:
+ bufpush(0x1F67);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1FB2:
+ bufpush(0x1F70);
+ bufpush(0x03B9);
+ break;
+ case 0x1FB3:
+ bufpush(0x03B1);
+ bufpush(0x03B9);
+ break;
+ case 0x1FB4:
+ bufpush(0x03AC);
+ bufpush(0x03B9);
+ break;
+ case 0x1FB6:
+ bufpush(0x03B1);
+ bufpush(0x0342);
+ break;
+ case 0x1FB7:
+ bufpush(0x03B1);
+ bufpush(0x0342);
+ bufpush(0x03B9);
+ break;
+ case 0x1FB8:
+ bufpush(0x1FB0);
+ break;
+ case 0x1FB9:
+ bufpush(0x1FB1);
+ break;
+ case 0x1FBA:
+ bufpush(0x1F70);
+ break;
+ case 0x1FBB:
+ bufpush(0x1F71);
+ break;
+ case 0x1FBC:
+ bufpush(0x03B1);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1FBE:
+ bufpush(0x03B9);
+ break;
+ case 0x1FC2:
+ bufpush(0x1F74);
+ bufpush(0x03B9);
+ break;
+ case 0x1FC3:
+ bufpush(0x03B7);
+ bufpush(0x03B9);
+ break;
+ case 0x1FC4:
+ bufpush(0x03AE);
+ bufpush(0x03B9);
+ break;
+ case 0x1FC6:
+ bufpush(0x03B7);
+ bufpush(0x0342);
+ break;
+ case 0x1FC7:
+ bufpush(0x03B7);
+ bufpush(0x0342);
+ bufpush(0x03B9);
+ break;
+ case 0x1FC8:
+ bufpush(0x1F72);
+ break;
+ case 0x1FC9:
+ bufpush(0x1F73);
+ break;
+ case 0x1FCA:
+ bufpush(0x1F74);
+ break;
+ case 0x1FCB:
+ bufpush(0x1F75);
+ break;
+ case 0x1FCC:
+ bufpush(0x03B7);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x1FD2:
+ bufpush(0x03B9);
+ bufpush(0x0308);
+ bufpush(0x0300);
+ break;
+ case 0x1FD3:
+ bufpush(0x03B9);
+ bufpush(0x0308);
+ bufpush(0x0301);
+ break;
+ case 0x1FD6:
+ bufpush(0x03B9);
+ bufpush(0x0342);
+ break;
+ case 0x1FD7:
+ bufpush(0x03B9);
+ bufpush(0x0308);
+ bufpush(0x0342);
+ break;
+ case 0x1FD8:
+ bufpush(0x1FD0);
+ break;
+ case 0x1FD9:
+ bufpush(0x1FD1);
+ break;
+ case 0x1FDA:
+ bufpush(0x1F76);
+ break;
+ case 0x1FDB:
+ bufpush(0x1F77);
+ break;
+ case 0x1FE2:
+ bufpush(0x03C5);
+ bufpush(0x0308);
+ bufpush(0x0300);
+ break;
+ case 0x1FE3:
+ bufpush(0x03C5);
+ bufpush(0x0308);
+ bufpush(0x0301);
+ break;
+ case 0x1FE4:
+ bufpush(0x03C1);
+ bufpush(0x0313);
+ break;
+ case 0x1FE6:
+ bufpush(0x03C5);
+ bufpush(0x0342);
+ break;
+ case 0x1FE7:
+ bufpush(0x03C5);
+ bufpush(0x0308);
+ bufpush(0x0342);
+ break;
+ case 0x1FE8:
+ bufpush(0x1FE0);
+ break;
+ case 0x1FE9:
+ bufpush(0x1FE1);
+ break;
+ case 0x1FEA:
+ bufpush(0x1F7A);
+ break;
+ case 0x1FEB:
+ bufpush(0x1F7B);
+ break;
+ case 0x1FEC:
+ bufpush(0x1FE5);
+ break;
+ case 0x1FF2:
+ bufpush(0x1F7C);
+ bufpush(0x03B9);
+ break;
+ case 0x1FF3:
+ bufpush(0x03C9);
+ bufpush(0x03B9);
+ break;
+ case 0x1FF4:
+ bufpush(0x03CE);
+ bufpush(0x03B9);
+ break;
+ case 0x1FF6:
+ bufpush(0x03C9);
+ bufpush(0x0342);
+ break;
+ case 0x1FF7:
+ bufpush(0x03C9);
+ bufpush(0x0342);
+ bufpush(0x03B9);
+ break;
+ case 0x1FF8:
+ bufpush(0x1F78);
+ break;
+ case 0x1FF9:
+ bufpush(0x1F79);
+ break;
+ case 0x1FFA:
+ bufpush(0x1F7C);
+ break;
+ case 0x1FFB:
+ bufpush(0x1F7D);
+ break;
+ case 0x1FFC:
+ bufpush(0x03C9);
+ bufpush(0x03B9);
+ break;
+ case 0x:
+ break;
+ case 0x2126:
+ bufpush(0x03C9);
+ break;
+ case 0x212A:
+ bufpush(0x006B);
+ break;
+ case 0x212B:
+ bufpush(0x00E5);
+ break;
+ case 0x2160:
+ bufpush(0x2170);
+ break;
+ case 0x2161:
+ bufpush(0x2171);
+ break;
+ case 0x2162:
+ bufpush(0x2172);
+ break;
+ case 0x2163:
+ bufpush(0x2173);
+ break;
+ case 0x2164:
+ bufpush(0x2174);
+ break;
+ case 0x2165:
+ bufpush(0x2175);
+ break;
+ case 0x2166:
+ bufpush(0x2176);
+ break;
+ case 0x2167:
+ bufpush(0x2177);
+ break;
+ case 0x2168:
+ bufpush(0x2178);
+ break;
+ case 0x2169:
+ bufpush(0x2179);
+ break;
+ case 0x216A:
+ bufpush(0x217A);
+ break;
+ case 0x216B:
+ bufpush(0x217B);
+ break;
+ case 0x216C:
+ bufpush(0x217C);
+ break;
+ case 0x216D:
+ bufpush(0x217D);
+ break;
+ case 0x216E:
+ bufpush(0x217E);
+ break;
+ case 0x216F:
+ bufpush(0x217F);
+ break;
+ case 0x24B6:
+ bufpush(0x24D0);
+ break;
+ case 0x24B7:
+ bufpush(0x24D1);
+ break;
+ case 0x24B8:
+ bufpush(0x24D2);
+ break;
+ case 0x24B9:
+ bufpush(0x24D3);
+ break;
+ case 0x24BA:
+ bufpush(0x24D4);
+ break;
+ case 0x24BB:
+ bufpush(0x24D5);
+ break;
+ case 0x24BC:
+ bufpush(0x24D6);
+ break;
+ case 0x24BD:
+ bufpush(0x24D7);
+ break;
+ case 0x24BE:
+ bufpush(0x24D8);
+ break;
+ case 0x24BF:
+ bufpush(0x24D9);
+ break;
+ case 0x24C0:
+ bufpush(0x24DA);
+ break;
+ case 0x24C1:
+ bufpush(0x24DB);
+ break;
+ case 0x24C2:
+ bufpush(0x24DC);
+ break;
+ case 0x24C3:
+ bufpush(0x24DD);
+ break;
+ case 0x24C4:
+ bufpush(0x24DE);
+ break;
+ case 0x24C5:
+ bufpush(0x24DF);
+ break;
+ case 0x24C6:
+ bufpush(0x24E0);
+ break;
+ case 0x24C7:
+ bufpush(0x24E1);
+ break;
+ case 0x24C8:
+ bufpush(0x24E2);
+ break;
+ case 0x24C9:
+ bufpush(0x24E3);
+ break;
+ case 0x24CA:
+ bufpush(0x24E4);
+ break;
+ case 0x24CB:
+ bufpush(0x24E5);
+ break;
+ case 0x24CC:
+ bufpush(0x24E6);
+ break;
+ case 0x24CD:
+ bufpush(0x24E7);
+ break;
+ case 0x24CE:
+ bufpush(0x24E8);
+ break;
+ case 0x24CF:
+ bufpush(0x24E9);
+ break;
+ case 0xFB00:
+ bufpush(0x0066);
+ bufpush(0x0066);
+ break;
+ case 0xFB01:
+ bufpush(0x0066);
+ bufpush(0x0069);
+ break;
+ case 0xFB02:
+ bufpush(0x0066);
+ bufpush(0x006C);
+ break;
+ case 0xFB03:
+ bufpush(0x0066);
+ bufpush(0x0066);
+ bufpush(0x0069);
+ break;
+ case 0xFB04:
+ bufpush(0x0066);
+ bufpush(0x0066);
+ bufpush(0x006C);
+ break;
+ case 0xFB05:
+ bufpush(0x0073);
+ bufpush(0x0074);
+ break;
+ case 0xFB06:
+ bufpush(0x0073);
+ bufpush(0x0074);
+ break;
+ case 0xFB13:
+ bufpush(0x0574);
+ bufpush(0x0576);
+ break;
+ case 0xFB14:
+ bufpush(0x0574);
+ bufpush(0x0565);
+ break;
+ case 0xFB15:
+ bufpush(0x0574);
+ bufpush(0x056B);
+ break;
+ case 0xFB16:
+ bufpush(0x057E);
+ bufpush(0x0576);
+ break;
+ case 0xFB17:
+ bufpush(0x0574);
+ bufpush(0x056D);
+ break;
+ case 0xFF21:
+ bufpush(0xFF41);
+ break;
+ case 0xFF22:
+ bufpush(0xFF42);
+ break;
+ case 0xFF23:
+ bufpush(0xFF43);
+ break;
+ case 0xFF24:
+ bufpush(0xFF44);
+ break;
+ case 0xFF25:
+ bufpush(0xFF45);
+ break;
+ case 0xFF26:
+ bufpush(0xFF46);
+ break;
+ case 0xFF27:
+ bufpush(0xFF47);
+ break;
+ case 0xFF28:
+ bufpush(0xFF48);
+ break;
+ case 0xFF29:
+ bufpush(0xFF49);
+ break;
+ case 0xFF2A:
+ bufpush(0xFF4A);
+ break;
+ case 0xFF2B:
+ bufpush(0xFF4B);
+ break;
+ case 0xFF2C:
+ bufpush(0xFF4C);
+ break;
+ case 0xFF2D:
+ bufpush(0xFF4D);
+ break;
+ case 0xFF2E:
+ bufpush(0xFF4E);
+ break;
+ case 0xFF2F:
+ bufpush(0xFF4F);
+ break;
+ case 0xFF30:
+ bufpush(0xFF50);
+ break;
+ case 0xFF31:
+ bufpush(0xFF51);
+ break;
+ case 0xFF32:
+ bufpush(0xFF52);
+ break;
+ case 0xFF33:
+ bufpush(0xFF53);
+ break;
+ case 0xFF34:
+ bufpush(0xFF54);
+ break;
+ case 0xFF35:
+ bufpush(0xFF55);
+ break;
+ case 0xFF36:
+ bufpush(0xFF56);
+ break;
+ case 0xFF37:
+ bufpush(0xFF57);
+ break;
+ case 0xFF38:
+ bufpush(0xFF58);
+ break;
+ case 0xFF39:
+ bufpush(0xFF59);
+ break;
+ case 0xFF3A:
+ bufpush(0xFF5A);
+ break;
+ case 0x10400:
+ bufpush(0x10428);
+ break;
+ case 0x10401:
+ bufpush(0x10429);
+ break;
+ case 0x10402:
+ bufpush(0x1042A);
+ break;
+ case 0x10403:
+ bufpush(0x1042B);
+ break;
+ case 0x10404:
+ bufpush(0x1042C);
+ break;
+ case 0x10405:
+ bufpush(0x1042D);
+ break;
+ case 0x10406:
+ bufpush(0x1042E);
+ break;
+ case 0x10407:
+ bufpush(0x1042F);
+ break;
+ case 0x10408:
+ bufpush(0x10430);
+ break;
+ case 0x10409:
+ bufpush(0x10431);
+ break;
+ case 0x1040A:
+ bufpush(0x10432);
+ break;
+ case 0x1040B:
+ bufpush(0x10433);
+ break;
+ case 0x1040C:
+ bufpush(0x10434);
+ break;
+ case 0x1040D:
+ bufpush(0x10435);
+ break;
+ case 0x1040E:
+ bufpush(0x10436);
+ break;
+ case 0x1040F:
+ bufpush(0x10437);
+ break;
+ case 0x10410:
+ bufpush(0x10438);
+ break;
+ case 0x10411:
+ bufpush(0x10439);
+ break;
+ case 0x10412:
+ bufpush(0x1043A);
+ break;
+ case 0x10413:
+ bufpush(0x1043B);
+ break;
+ case 0x10414:
+ bufpush(0x1043C);
+ break;
+ case 0x10415:
+ bufpush(0x1043D);
+ break;
+ case 0x10416:
+ bufpush(0x1043E);
+ break;
+ case 0x10417:
+ bufpush(0x1043F);
+ break;
+ case 0x10418:
+ bufpush(0x10440);
+ break;
+ case 0x10419:
+ bufpush(0x10441);
+ break;
+ case 0x1041A:
+ bufpush(0x10442);
+ break;
+ case 0x1041B:
+ bufpush(0x10443);
+ break;
+ case 0x1041C:
+ bufpush(0x10444);
+ break;
+ case 0x1041D:
+ bufpush(0x10445);
+ break;
+ case 0x1041E:
+ bufpush(0x10446);
+ break;
+ case 0x1041F:
+ bufpush(0x10447);
+ break;
+ case 0x10420:
+ bufpush(0x10448);
+ break;
+ case 0x10421:
+ bufpush(0x10449);
+ break;
+ case 0x10422:
+ bufpush(0x1044A);
+ break;
+ case 0x10423:
+ bufpush(0x1044B);
+ break;
+ case 0x10424:
+ bufpush(0x1044C);
+ break;
+ case 0x10425:
+ bufpush(0x1044D);
+ break;
+ }
diff --git a/src/debug.h b/src/debug.h
new file mode 100644
index 0000000..af1d017
--- /dev/null
+++ b/src/debug.h
@@ -0,0 +1,36 @@
+#ifndef __debug_h__
+#define __debug_h__
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+
+#ifdef NDEBUG
+#define debug(M, ...)
+#else
+#define debug(M, ...) \
+ fprintf(stderr, "DEBUG %s:%d: " M "\n", __FILE__, __LINE__, ##__VA_ARGS__)
+#endif
+
+#define clean_errno() (errno == 0 ? "None" : strerror(errno))
+
+#define log_err(M, ...) \
+ fprintf(stderr, "[ERROR] (%s:%d: errno: %s) " M "\n", __FILE__, __LINE__, \
+ clean_errno(), ##__VA_ARGS__)
+
+#define log_warn(M, ...) \
+ fprintf(stderr, "[WARN] (%s:%d: errno: %s) " M "\n", __FILE__, __LINE__, \
+ clean_errno(), ##__VA_ARGS__)
+
+#define log_info(M, ...) fprintf(stderr, "[INFO] (%s:%d) " M "\n", __FILE__, \
+ __LINE__, ##__VA_ARGS__)
+
+#define check(A, M, ...) \
+ if(!(A)) { log_err(M, ##__VA_ARGS__); errno=0; goto error; }
+
+#define sentinel(M, ...) \
+ { log_err(M, ##__VA_ARGS__); errno=0; goto error; }
+
+#define check_debug(A, M, ...) \
+ if(!(A)) { debug(M, ##__VA_ARGS__); errno=0; goto error; }
+
+#endif
diff --git a/src/detab.c b/src/detab.c
new file mode 100644
index 0000000..e03fcf7
--- /dev/null
+++ b/src/detab.c
@@ -0,0 +1,48 @@
+#include "bstrlib.h"
+
+// UTF-8 aware detab: assumes s has no newlines, or only a final newline.
+// Return 0 on success, BSTR_ERR if invalid UTF-8.
+extern int bdetab(bstring s, int utf8)
+{
+ unsigned char c;
+ int pos = 0; // a count of characters
+ int byte = 0; // a count of bytes
+ int high_chars_to_skip = 0;
+ int numspaces = 0;
+ while ((c = bchar(s, byte))) {
+ if (utf8 && high_chars_to_skip > 0) {
+ if (c >= 0x80) {
+ high_chars_to_skip--;
+ byte++;
+ } else {
+ return BSTR_ERR; // invalid utf-8
+ }
+ } else if (c == '\t') {
+ bdelete(s, byte, 1); // delete tab character
+ numspaces = 4 - (pos % 4);
+ binsertch(s, byte, numspaces, ' ');
+ byte += numspaces;
+ pos += numspaces;
+ } else if (c <= 0x80 || !utf8) {
+ byte++;
+ pos++;
+ } else { // multibyte utf8 sequences
+ if (c >> 1 == 0176) {
+ high_chars_to_skip = 5;
+ } else if (c >> 2 == 076) {
+ high_chars_to_skip = 4;
+ } else if (c >> 3 == 036) {
+ high_chars_to_skip = 3;
+ } else if (c >> 4 == 016) {
+ high_chars_to_skip = 2;
+ } else if (c >> 5 == 06) {
+ high_chars_to_skip = 1;
+ } else {
+ return BSTR_ERR; // invalid utf-8
+ }
+ pos++;
+ byte++;
+ }
+ }
+ return 0;
+}
diff --git a/src/getopt.c b/src/getopt.c
new file mode 100644
index 0000000..321dd9f
--- /dev/null
+++ b/src/getopt.c
@@ -0,0 +1,199 @@
+/* $Id: getopt.c 4022 2008-03-31 06:11:07Z rra $
+ *
+ * Replacement implementation of getopt.
+ *
+ * This is a replacement implementation for getopt based on the my_getopt
+ * distribution by Benjamin Sittler. Only the getopt interface is included,
+ * since remctl doesn't use GNU long options, and the code has been rearranged
+ * and reworked somewhat to fit with the remctl coding style.
+ *
+ * Copyright 1997, 2000, 2001, 2002 Benjamin Sittler
+ * Copyright 2008 Russ Allbery <rra@stanford.edu>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <config.h>
+#include <portable/system.h>
+#include <portable/getopt.h>
+
+/*
+ * If we're running the test suite, rename getopt and the global variables to
+ * avoid conflicts with the system version.
+ */
+#if TESTING
+# define getopt test_getopt
+int test_getopt(int, char **, const char *);
+# define optind test_optind
+# define opterr test_opterr
+# define optopt test_optopt
+# define optarg test_optarg
+#endif
+
+/* Initialize global interface variables. */
+int optind = 1;
+int opterr = 1;
+int optopt = 0;
+char *optarg = NULL;
+
+/*
+ * This is the plain old UNIX getopt, with GNU-style extensions. If you're
+ * porting some piece of UNIX software, this is all you need. It supports
+ * GNU-style permutation and optional arguments, but does not support the GNU
+ * -W extension.
+ *
+ * This function is not re-entrant or thread-safe, has static variables, and
+ * generally isn't a great interface, but normally you only call it once.
+ */
+int
+getopt(int argc, char *argv[], const char *optstring)
+{
+ const char *p;
+ size_t offset = 0;
+ char mode = '\0';
+ int colon_mode = 0;
+ int option = -1;
+
+ /* Holds the current position in the parameter being parsed. */
+ static int charind = 0;
+
+ /*
+ * By default, getopt permutes argv as it scans and leaves all non-options
+ * at the end. This can be changed with the first character of optstring
+ * or the environment variable POSIXLY_CORRECT. With a first character of
+ * '+' or when POSIXLY_CORRECT is set, option processing stops at the
+ * first non-option. If the first character is '-', each non-option argv
+ * element is handled as if it were the argument of an option with
+ * character code 1. mode holds this character.
+ *
+ * After the optional leading '+' and '-', optstring may contain ':'. If
+ * present, missing arguments return ':' instead of '?'. colon_mode holds
+ * this setting.
+ */
+ if (getenv("POSIXLY_CORRECT") != NULL) {
+ mode = '+';
+ colon_mode = '+';
+ } else {
+ if (optstring[offset] == '+' || optstring[offset] == '-') {
+ mode = optstring[offset];
+ offset++;
+ }
+ if (optstring[offset] == ':') {
+ colon_mode = 1;
+ offset++;
+ }
+ }
+
+ /*
+ * charind holds where we left off. If it's set, we were in the middle
+ * of an argv element; if not, we pick up with the next element of
+ * optind.
+ */
+ optarg = NULL;
+ if (charind == 0) {
+ if (optind >= argc)
+ option = -1;
+ else if (strcmp(argv[optind], "--") == 0) {
+ optind++;
+ option = -1;
+ } else if (argv[optind][0] != '-' || argv[optind][1] == '\0') {
+ char *tmp;
+ int i, j, k, end;
+
+ if (mode == '+')
+ option = -1;
+ else if (mode == '-') {
+ optarg = argv[optind];
+ optind++;
+ option = 1;
+ } else {
+ for (i = optind + 1, j = optind; i < argc; i++)
+ if ((argv[i][0] == '-') && (argv[i][1] != '\0')) {
+ optind = i;
+ option = getopt(argc, argv, optstring);
+ while (i > j) {
+ --i;
+ tmp = argv[i];
+ end = (charind == 0) ? optind - 1 : optind;
+ for (k = i; k + 1 <= end; k++) {
+ argv[k] = argv[k + 1];
+ }
+ argv[end] = tmp;
+ --optind;
+ }
+ break;
+ }
+ if (i == argc)
+ option = -1;
+ }
+ return option;
+ } else {
+ charind = 1;
+ }
+ }
+ if (charind != 0) {
+ optopt = argv[optind][charind];
+ for (p = optstring + offset; *p != '\0'; p++)
+ if (optopt == *p) {
+ p++;
+ if (*p == ':') {
+ if (argv[optind][charind + 1] != '\0') {
+ optarg = &argv[optind][charind + 1];
+ optind++;
+ charind = 0;
+ } else {
+ p++;
+ if (*p != ':') {
+ charind = 0;
+ optind++;
+ if (optind >= argc) {
+ if (opterr)
+ fprintf(stderr, "%s: option requires"
+ " an argument -- %c\n", argv[0],
+ optopt);
+ option = colon_mode ? ':' : '?';
+ goto done;
+ } else {
+ optarg = argv[optind];
+ optind++;
+ }
+ }
+ }
+ }
+ option = optopt;
+ }
+ if (option == -1) {
+ if (opterr)
+ fprintf(stderr, "%s: illegal option -- %c\n", argv[0], optopt);
+ option = '?';
+ }
+ }
+
+done:
+ if (charind != 0) {
+ charind++;
+ if (argv[optind][charind] == '\0') {
+ optind++;
+ charind = 0;
+ }
+ }
+ if (optind > argc)
+ optind = argc;
+ return option;
+}
diff --git a/src/html.c b/src/html.c
new file mode 100644
index 0000000..56d5dbb
--- /dev/null
+++ b/src/html.c
@@ -0,0 +1,276 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include "bstrlib.h"
+#include "stmd.h"
+#include "debug.h"
+#include "scanners.h"
+
+// Functions to convert block and inline lists to HTML strings.
+
+// Escape special characters in HTML. More efficient than
+// three calls to bfindreplace. If preserve_entities is set,
+// existing entities are left alone.
+static bstring escape_html(bstring inp, bool preserve_entities)
+{
+ int pos = 0;
+ int match;
+ char c;
+ bstring escapable = blk2bstr("&<>\"", 4);
+ bstring ent;
+ bstring s = bstrcpy(inp);
+ while ((pos = binchr(s, pos, escapable)) != BSTR_ERR) {
+ c = bchar(s,pos);
+ switch (c) {
+ case '<':
+ bdelete(s, pos, 1);
+ ent = blk2bstr("&lt;", 4);
+ binsert(s, pos, ent, ' ');
+ bdestroy(ent);
+ pos += 4;
+ break;
+ case '>':
+ bdelete(s, pos, 1);
+ ent = blk2bstr("&gt;", 4);
+ binsert(s, pos, ent, ' ');
+ bdestroy(ent);
+ pos += 4;
+ break;
+ case '&':
+ if (preserve_entities && (match = scan_entity(s, pos))) {
+ pos += match;
+ } else {
+ bdelete(s, pos, 1);
+ ent = blk2bstr("&amp;", 5);
+ binsert(s, pos, ent, ' ');
+ bdestroy(ent);
+ pos += 5;
+ }
+ break;
+ case '"':
+ bdelete(s, pos, 1);
+ ent = blk2bstr("&quot;", 6);
+ binsert(s, pos, ent, ' ');
+ bdestroy(ent);
+ pos += 6;
+ break;
+ default:
+ bdelete(s, pos, 1);
+ log_err("unexpected character %02x", c);
+ }
+ }
+ bdestroy(escapable);
+ return s;
+}
+
+static inline void cr(bstring buffer)
+{
+ int c = bchar(buffer, blength(buffer) - 1);
+ if (c != '\n' && c) {
+ bconchar(buffer, '\n');
+ }
+}
+
+// Convert a block list to HTML. Returns 0 on success, and sets result.
+extern int blocks_to_html(block* b, bstring* result, bool tight)
+{
+ bstring contents = NULL;
+ bstring escaped, escaped2;
+ struct bstrList * info_words;
+ struct ListData * data;
+ bstring mbstart;
+ bstring html = blk2bstr("", 0);
+
+ while(b != NULL) {
+ switch(b->tag) {
+ case document:
+ check(blocks_to_html(b->children, &contents, false) == 0,
+ "error converting blocks to html");
+ bformata(html, "%s", contents->data);
+ bdestroy(contents);
+ break;
+ case paragraph:
+ check(inlines_to_html(b->inline_content, &contents) == 0,
+ "error converting inlines to html");
+ if (tight) {
+ bformata(html, "%s", contents->data);
+ } else {
+ cr(html);
+ bformata(html, "<p>%s</p>", contents->data);
+ cr(html);
+ }
+ bdestroy(contents);
+ break;
+ case block_quote:
+ check(blocks_to_html(b->children, &contents, false) == 0,
+ "error converting blocks to html");
+ cr(html);
+ bformata(html, "<blockquote>\n%s</blockquote>", contents->data);
+ cr(html);
+ bdestroy(contents);
+ break;
+ case list_item:
+ check(blocks_to_html(b->children, &contents, tight) == 0,
+ "error converting blocks to html");
+ brtrimws(contents);
+ cr(html);
+ bformata(html, "<li>%s</li>", contents->data);
+ cr(html);
+ bdestroy(contents);
+ break;
+ case list:
+ // make sure a list starts at the beginning of the line:
+ cr(html);
+ data = &(b->attributes.list_data);
+ check(blocks_to_html(b->children, &contents, data->tight) == 0,
+ "error converting blocks to html");
+ mbstart = bformat(" start=\"%d\"", data->start);
+ bformata(html, "<%s%s>\n%s</%s>",
+ data->list_type == bullet ? "ul" : "ol",
+ data->start == 1 ? "" : (char*) mbstart->data,
+ contents->data,
+ data->list_type == bullet ? "ul" : "ol");
+ cr(html);
+ bdestroy(contents);
+ bdestroy(mbstart);
+ break;
+ case atx_header:
+ case setext_header:
+ check(inlines_to_html(b->inline_content, &contents) == 0,
+ "error converting inlines to html");
+ cr(html);
+ bformata(html, "<h%d>%s</h%d>",
+ b->attributes.header_level,
+ contents->data,
+ b->attributes.header_level);
+ cr(html);
+ bdestroy(contents);
+ break;
+ case indented_code:
+ escaped = escape_html(b->string_content, false);
+ cr(html);
+ bformata(html, "<pre><code>%s</code></pre>", escaped->data);
+ cr(html);
+ bdestroy(escaped);
+ break;
+ case fenced_code:
+ escaped = escape_html(b->string_content, false);
+ cr(html);
+ bformata(html, "<pre");
+ if (blength(b->attributes.fenced_code_data.info) > 0) {
+ escaped2 = escape_html(b->attributes.fenced_code_data.info, true);
+ info_words = bsplit(escaped2, ' ');
+ bformata(html, " class=\"%s\"", info_words->entry[0]->data);
+ bdestroy(escaped2);
+ bstrListDestroy(info_words);
+ }
+ bformata(html, "><code>%s</code></pre>", escaped->data);
+ cr(html);
+ bdestroy(escaped);
+ break;
+ case html_block:
+ bformata(html, "%s", b->string_content->data);
+ break;
+ case hrule:
+ bformata(html, "<hr />");
+ cr(html);
+ break;
+ case reference_def:
+ break;
+ default:
+ log_warn("block type %d not implemented\n", b->tag);
+ break;
+ }
+ b = b->next;
+ }
+ *result = html;
+ return 0;
+ error:
+ return -1;
+}
+
+// Convert an inline list to HTML. Returns 0 on success, and sets result.
+extern int inlines_to_html(inl* ils, bstring* result)
+{
+ bstring contents = NULL;
+ bstring html = blk2bstr("", 0);
+ bstring mbtitle, escaped, escaped2;
+
+ while(ils != NULL) {
+ switch(ils->tag) {
+ case str:
+ escaped = escape_html(ils->content.literal, false);
+ bformata(html, "%s", escaped->data);
+ bdestroy(escaped);
+ break;
+ case linebreak:
+ bformata(html, "<br />\n");
+ break;
+ case softbreak:
+ bformata(html, "\n");
+ break;
+ case code:
+ escaped = escape_html(ils->content.literal, false);
+ bformata(html, "<code>%s</code>", escaped->data);
+ bdestroy(escaped);
+ break;
+ case raw_html:
+ case entity:
+ bformata(html, "%s", ils->content.literal->data);
+ break;
+ case link:
+ check(inlines_to_html(ils->content.inlines, &contents) == 0,
+ "error converting inlines to html");
+ if (blength(ils->content.linkable.title) > 0) {
+ escaped = escape_html(ils->content.linkable.title, true);
+ mbtitle = bformat(" title=\"%s\"", escaped->data);
+ bdestroy(escaped);
+ } else {
+ mbtitle = blk2bstr("",0);
+ }
+ escaped = escape_html(ils->content.linkable.url, true);
+ bformata(html, "<a href=\"%s\"%s>%s</a>",
+ escaped->data,
+ mbtitle->data,
+ contents->data);
+ bdestroy(escaped);
+ bdestroy(mbtitle);
+ bdestroy(contents);
+ break;
+ case image:
+ check(inlines_to_html(ils->content.inlines, &contents) == 0,
+ "error converting inlines to html");
+ escaped = escape_html(ils->content.linkable.url, true);
+ escaped2 = escape_html(contents, false);
+ bdestroy(contents);
+ bformata(html, "<img src=\"%s\" alt=\"%s\"",
+ escaped->data, escaped2->data);
+ bdestroy(escaped);
+ bdestroy(escaped2);
+ if (blength(ils->content.linkable.title) > 0) {
+ escaped = escape_html(ils->content.linkable.title, true);
+ bformata(html, " title=\"%s\"", escaped->data);
+ bdestroy(escaped);
+ }
+ bformata(html, " />");
+ break;
+ case strong:
+ check(inlines_to_html(ils->content.inlines, &contents) == 0,
+ "error converting inlines to html");
+ bformata(html, "<strong>%s</strong>", contents->data);
+ bdestroy(contents);
+ break;
+ case emph:
+ check(inlines_to_html(ils->content.inlines, &contents) == 0,
+ "error converting inlines to html");
+ bformata(html, "<em>%s</em>", contents->data);
+ bdestroy(contents);
+ break;
+ }
+ ils = ils->next;
+ }
+ *result = html;
+ return 0;
+ error:
+ return -1;
+}
diff --git a/src/inlines.c b/src/inlines.c
new file mode 100644
index 0000000..9e35178
--- /dev/null
+++ b/src/inlines.c
@@ -0,0 +1,998 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <ctype.h>
+#include "bstrlib.h"
+#include "stmd.h"
+#include "uthash.h"
+#include "debug.h"
+#include "scanners.h"
+#include "utf8.h"
+
+extern void free_reference(reference *ref) {
+ bdestroy(ref->label);
+ bdestroy(ref->url);
+ bdestroy(ref->title);
+ free(ref);
+}
+
+extern void free_reference_map(reference **refmap) {
+ /* free the hash table contents */
+ reference *s;
+ reference *tmp;
+ if (refmap != NULL) {
+ HASH_ITER(hh, *refmap, s, tmp) {
+ HASH_DEL(*refmap, s);
+ free_reference(s);
+ }
+ free(refmap);
+ }
+}
+
+// normalize reference: collapse internal whitespace to single space,
+// remove leading/trailing whitespace, case fold
+static bstring normalize_reference(bstring s)
+{
+ bstring normalized = case_fold(s);
+ int pos = 0;
+ int startpos;
+ char c;
+ while ((c = bchar(normalized, pos))) {
+ if (isspace(c)) {
+ startpos = pos;
+ // skip til next non-space
+ pos++;
+ while (isspace(bchar(s, pos))) {
+ pos++;
+ }
+ bdelete(normalized, startpos, pos - startpos);
+ binsertch(normalized, startpos, 1, ' ');
+ pos = startpos + 1;
+ }
+ pos++;
+ }
+ btrimws(normalized);
+ return normalized;
+}
+
+// Returns reference if refmap contains a reference with matching
+// label, otherwise NULL.
+extern reference* lookup_reference(reference** refmap, bstring lab)
+{
+ reference * ref = NULL;
+ bstring label = normalize_reference(lab);
+ if (refmap != NULL) {
+ HASH_FIND_STR(*refmap, (char*) label->data, ref);
+ }
+ bdestroy(label);
+ return ref;
+}
+
+extern reference* make_reference(bstring label, bstring url, bstring title)
+{
+ reference * ref;
+ ref = malloc(sizeof(reference));
+ ref->label = normalize_reference(label);
+ ref->url = bstrcpy(url);
+ ref->title = bstrcpy(title);
+ return ref;
+}
+
+extern void add_reference(reference** refmap, reference* ref)
+{
+ reference * t = NULL;
+ HASH_FIND(hh, *refmap, (char*) ref->label->data,
+ (unsigned) blength(ref->label), t);
+ if (t == NULL) {
+ HASH_ADD_KEYPTR(hh, *refmap, (char*) ref->label->data,
+ (unsigned) blength(ref->label), ref);
+ } else {
+ free_reference(ref); // we free this now since it won't be in the refmap
+ }
+}
+
+// Create an inline with a linkable string value.
+inline static inl* make_linkable(int t, inl* label, bstring url, bstring title)
+{
+ inl* e = (inl*) malloc(sizeof(inl));
+ e->tag = t;
+ e->content.linkable.label = label;
+ e->content.linkable.url = url;
+ e->content.linkable.title = title;
+ e->next = NULL;
+ return e;
+}
+
+inline static inl* make_inlines(int t, inl* contents)
+{
+ inl* e = (inl*) malloc(sizeof(inl));
+ e->tag = t;
+ e->content.inlines = contents;
+ e->next = NULL;
+ return e;
+}
+
+// Create an inline with a literal string value.
+inline static inl* make_literal(int t, bstring s)
+{
+ inl* e = (inl*) malloc(sizeof(inl));
+ e->tag = t;
+ e->content.literal = s;
+ e->next = NULL;
+ return e;
+}
+
+// Create an inline with no value.
+inline static inl* make_simple(int t)
+{
+ inl* e = (inl*) malloc(sizeof(inl));
+ e->tag = t;
+ e->next = NULL;
+ return e;
+}
+
+// Macros for creating various kinds of inlines.
+#define make_str(s) make_literal(str, s)
+#define make_code(s) make_literal(code, s)
+#define make_raw_html(s) make_literal(raw_html, s)
+#define make_entity(s) make_literal(entity, s)
+#define make_linebreak() make_simple(linebreak)
+#define make_softbreak() make_simple(softbreak)
+#define make_link(label, url, title) make_linkable(link, label, url, title)
+#define make_image(alt, url, title) make_linkable(image, alt, url, title)
+#define make_emph(contents) make_inlines(emph, contents)
+#define make_strong(contents) make_inlines(strong, contents)
+
+// Free an inline list.
+extern void free_inlines(inl* e)
+{
+ inl * next;
+ while (e != NULL) {
+ switch (e->tag){
+ case str:
+ case raw_html:
+ case code:
+ case entity:
+ bdestroy(e->content.literal);
+ break;
+ case linebreak:
+ case softbreak:
+ break;
+ case link:
+ case image:
+ bdestroy(e->content.linkable.url);
+ bdestroy(e->content.linkable.title);
+ free_inlines(e->content.linkable.label);
+ break;
+ case emph:
+ case strong:
+ free_inlines(e->content.inlines);
+ break;
+ default:
+ break;
+ }
+ next = e->next;
+ free(e);
+ e = next;
+ }
+}
+
+// Append inline list b to the end of inline list a.
+// Return pointer to head of new list.
+inline static inl* append_inlines(inl* a, inl* b)
+{
+ if (a == NULL) { // NULL acts like an empty list
+ return b;
+ }
+ inl* cur = a;
+ while (cur->next) {
+ cur = cur->next;
+ }
+ cur->next = b;
+ return a;
+}
+
+// Make a 'subject' from an input string.
+static subject* make_subject(bstring s, reference** refmap)
+{
+ subject* e = (subject*) malloc(sizeof(subject));
+ // remove final whitespace
+ brtrimws(s);
+ e->buffer = s;
+ e->pos = 0;
+ e->label_nestlevel = 0;
+ e->reference_map = refmap;
+ return e;
+}
+
+inline static int isbacktick(int c)
+{
+ return (c == '`');
+}
+
+// Return the next character in the subject, without advancing.
+// Return 0 if at the end of the subject.
+#define peek_char(subj) bchar(subj->buffer, subj->pos)
+
+// Return true if there are more characters in the subject.
+inline static int is_eof(subject* subj)
+{
+ return (subj->pos >= blength(subj->buffer));
+}
+
+// Advance the subject. Doesn't check for eof.
+#define advance(subj) subj->pos += 1
+
+// Take characters while a predicate holds, and return a string.
+inline static bstring take_while(subject* subj, int (*f)(int))
+{
+ unsigned char c;
+ int startpos = subj->pos;
+ int len = 0;
+ while ((c = peek_char(subj)) && (*f)(c)) {
+ advance(subj);
+ len++;
+ }
+ return bmidstr(subj->buffer, startpos, len);
+}
+
+// Take one character and return a string, or NULL if eof.
+inline static bstring take_one(subject* subj)
+{
+ int startpos = subj->pos;
+ if (is_eof(subj)){
+ return NULL;
+ } else {
+ advance(subj);
+ return bmidstr(subj->buffer, startpos, 1);
+ }
+}
+
+// Try to process a backtick code span that began with a
+// span of ticks of length openticklength length (already
+// parsed). Return 0 if you don't find matching closing
+// backticks, otherwise return the position in the subject
+// after the closing backticks.
+static int scan_to_closing_backticks(subject* subj, int openticklength)
+{
+ // read non backticks
+ char c;
+ while ((c = peek_char(subj)) && c != '`') {
+ advance(subj);
+ }
+ if (is_eof(subj)) {
+ return 0; // did not find closing ticks, return 0
+ }
+ int numticks = 0;
+ while (peek_char(subj) == '`') {
+ advance(subj);
+ numticks++;
+ }
+ if (numticks != openticklength){
+ return(scan_to_closing_backticks(subj, openticklength));
+ }
+ return (subj->pos);
+}
+
+// Destructively modify bstring, collapsing consecutive
+// space and newline characters into a single space.
+static int normalize_whitespace(bstring s)
+{
+ bool last_char_was_space = false;
+ int pos = 0;
+ char c;
+ while ((c = bchar(s, pos))) {
+ switch (c) {
+ case ' ':
+ if (last_char_was_space) {
+ bdelete(s, pos, 1);
+ } else {
+ pos++;
+ }
+ last_char_was_space = true;
+ break;
+ case '\n':
+ if (last_char_was_space) {
+ bdelete(s, pos, 1);
+ } else {
+ bdelete(s, pos, 1);
+ binsertch(s, pos, 1, ' ');
+ pos++;
+ }
+ last_char_was_space = true;
+ break;
+ default:
+ pos++;
+ last_char_was_space = false;
+ }
+ }
+ return 0;
+}
+
+// Parse backtick code section or raw backticks, return an inline.
+// Assumes that the subject has a backtick at the current position.
+static inl* handle_backticks(subject *subj)
+{
+ bstring openticks = take_while(subj, isbacktick);
+ bstring result;
+ int ticklength = blength(openticks);
+ int startpos = subj->pos;
+ int endpos = scan_to_closing_backticks(subj, ticklength);
+ if (endpos == 0) { // not found
+ subj->pos = startpos; // rewind
+ return make_str(openticks);
+ } else {
+ bdestroy(openticks);
+ result = bmidstr(subj->buffer, startpos, endpos - startpos - ticklength);
+ btrimws(result);
+ normalize_whitespace(result);
+ return make_code(result);
+ }
+}
+
+// Scan ***, **, or * and return number scanned, or 0.
+// Don't advance position.
+static int scan_delims(subject* subj, char c, bool * can_open, bool * can_close)
+{
+ int numdelims = 0;
+ char char_before, char_after;
+ int startpos = subj->pos;
+
+ char_before = subj->pos == 0 ? '\n' : bchar(subj->buffer, subj->pos - 1);
+ while (peek_char(subj) == c) {
+ numdelims++;
+ advance(subj);
+ }
+ char_after = peek_char(subj);
+ *can_open = numdelims > 0 && numdelims <= 3 && !isspace(char_after);
+ *can_close = numdelims > 0 && numdelims <= 3 && !isspace(char_before);
+ if (c == '_') {
+ *can_open = *can_open && !isalnum(char_before);
+ *can_close = *can_close && !isalnum(char_after);
+ }
+ subj->pos = startpos;
+ return numdelims;
+}
+
+// Parse strong/emph or a fallback.
+// Assumes the subject has '_' or '*' at the current position.
+static inl* handle_strong_emph(subject* subj, char c)
+{
+ bool can_open, can_close;
+ inl * result = NULL;
+ inl ** last = malloc(sizeof(inl *));
+ inl * new;
+ inl * il;
+ inl * first_head = NULL;
+ inl * first_close = NULL;
+ int first_close_delims = 0;
+ int numdelims;
+
+ *last = NULL;
+
+ numdelims = scan_delims(subj, c, &can_open, &can_close);
+ subj->pos += numdelims;
+
+ new = make_str(bmidstr(subj->buffer, subj->pos - numdelims, numdelims));
+ *last = new;
+ first_head = new;
+ result = new;
+
+ if (!can_open || numdelims == 0) {
+ goto done;
+ }
+
+ switch (numdelims) {
+ case 1:
+ while (true) {
+ numdelims = scan_delims(subj, c, &can_open, &can_close);
+ if (numdelims >= 1 && can_close) {
+ subj->pos += 1;
+ first_head->tag = emph;
+ bdestroy(first_head->content.literal);
+ first_head->content.inlines = first_head->next;
+ first_head->next = NULL;
+ goto done;
+ } else {
+ if (!parse_inline(subj, last)) {
+ goto done;
+ }
+ }
+ }
+ break;
+ case 2:
+ while (true) {
+ numdelims = scan_delims(subj, c, &can_open, &can_close);
+ if (numdelims >= 2 && can_close) {
+ subj->pos += 2;
+ first_head->tag = strong;
+ bdestroy(first_head->content.literal);
+ first_head->content.inlines = first_head->next;
+ first_head->next = NULL;
+ goto done;
+ } else {
+ if (!parse_inline(subj, last)) {
+ goto done;
+ }
+ }
+ }
+ break;
+ case 3:
+ while (true) {
+ numdelims = scan_delims(subj, c, &can_open, &can_close);
+ if (can_close && numdelims >= 1 && numdelims <= 3 &&
+ numdelims != first_close_delims) {
+ new = make_str(bmidstr(subj->buffer, subj->pos, numdelims));
+ append_inlines(*last, new);
+ *last = new;
+ if (numdelims == 3) {
+ numdelims = 1;
+ }
+ subj->pos += numdelims;
+ if (first_close) {
+ first_head->tag = first_close_delims == 1 ? strong : emph;
+ bdestroy(first_head->content.literal);
+ first_head->content.inlines =
+ make_inlines(first_close_delims == 1 ? emph : strong,
+ first_head->next);
+
+ il = first_head->next;
+ while (il->next && il->next != first_close) {
+ il = il->next;
+ }
+ il->next = NULL;
+
+ first_head->content.inlines->next = first_close->next;
+
+ il = first_head->content.inlines;
+ while (il->next && il->next != *last) {
+ il = il->next;
+ }
+ il->next = NULL;
+ free_inlines(*last);
+
+ first_close->next = NULL;
+ free_inlines(first_close);
+ first_head->next = NULL;
+ goto done;
+ } else {
+ first_close = *last;
+ first_close_delims = numdelims;
+ }
+ } else {
+ if (!parse_inline(subj, last)) {
+ goto done;
+ }
+ }
+ }
+ break;
+ default:
+ goto done;
+ }
+
+ done:
+ free(last);
+ return result;
+}
+
+// Parse backslash-escape or just a backslash, returning an inline.
+static inl* handle_backslash(subject *subj)
+{
+ advance(subj);
+ unsigned char nextchar = peek_char(subj);
+ if (ispunct(nextchar)) { // only ascii symbols and newline can be escaped
+ advance(subj);
+ return make_str(bformat("%c", nextchar));
+ } else if (nextchar == '\n') {
+ advance(subj);
+ return make_linebreak();
+ } else {
+ return make_str(bfromcstr("\\"));
+ }
+}
+
+// Parse an entity or a regular "&" string.
+// Assumes the subject has an '&' character at the current position.
+static inl* handle_entity(subject* subj)
+{
+ int match;
+ inl * result;
+ match = scan_entity(subj->buffer, subj->pos);
+ if (match) {
+ result = make_entity(bmidstr(subj->buffer, subj->pos, match));
+ subj->pos += match;
+ } else {
+ advance(subj);
+ result = make_str(bfromcstr("&"));
+ }
+ return result;
+}
+
+// Like make_str, but parses entities.
+// Returns an inline sequence consisting of str and entity elements.
+static inl * make_str_with_entities(bstring s)
+{
+ inl * result = NULL;
+ inl * new;
+ int searchpos;
+ char c;
+ subject * subj = make_subject(s, NULL);
+
+ while ((c = peek_char(subj))) {
+ switch (c) {
+ case '&':
+ new = handle_entity(subj);
+ break;
+ default:
+ searchpos = bstrchrp(subj->buffer, '&', subj->pos);
+ if (searchpos == BSTR_ERR) {
+ searchpos = blength(subj->buffer);
+ }
+ new = make_str(bmidstr(subj->buffer, subj->pos, searchpos - subj->pos));
+ subj->pos = searchpos;
+ }
+ result = append_inlines(result, new);
+ }
+ free(subj);
+ return result;
+}
+
+// Destructively unescape a string: remove backslashes before punctuation chars.
+extern int unescape(bstring url)
+{
+ // remove backslashes before punctuation chars:
+ int searchpos = 0;
+ while ((searchpos = bstrchrp(url, '\\', searchpos)) != BSTR_ERR) {
+ if (ispunct(bchar(url, searchpos + 1))) {
+ bdelete(url, searchpos, 1);
+ } else {
+ searchpos++;
+ }
+ }
+ return 0;
+}
+
+// Clean a URL: remove surrounding whitespace and surrounding <>,
+// and remove \ that escape punctuation.
+static int clean_url(bstring url)
+{
+ // remove surrounding <> if any:
+ int urllength = blength(url);
+ btrimws(url);
+ if (bchar(url, 0) == '<' && bchar(url, urllength - 1) == '>') {
+ bdelete(url, 0, 1);
+ bdelete(url, urllength - 2, 1);
+ }
+ unescape(url);
+ return 0;
+}
+
+// Clean a title: remove surrounding quotes and remove \ that escape punctuation.
+static int clean_title(bstring title)
+{
+ // remove surrounding quotes if any:
+ int titlelength = blength(title);
+ if ((bchar(title, 0) == '\'' && bchar(title, titlelength - 1) == '\'') ||
+ (bchar(title, 0) == '(' && bchar(title, titlelength - 1) == ')') ||
+ (bchar(title, 0) == '"' && bchar(title, titlelength - 1) == '"')) {
+ bdelete(title, 0, 1);
+ bdelete(title, titlelength - 2, 1);
+ }
+ unescape(title);
+ return 0;
+}
+
+// Parse an autolink or HTML tag.
+// Assumes the subject has a '<' character at the current position.
+static inl* handle_pointy_brace(subject* subj)
+{
+ int matchlen = 0;
+ bstring contents;
+ inl* result;
+
+ advance(subj); // advance past first <
+ // first try to match a URL autolink
+ matchlen = scan_autolink_uri(subj->buffer, subj->pos);
+ if (matchlen > 0) {
+ contents = bmidstr(subj->buffer, subj->pos, matchlen - 1);
+ subj->pos += matchlen;
+ result = make_link(make_str_with_entities(contents),
+ bstrcpy(contents), bfromcstr(""));
+ bdestroy(contents);
+ return result;
+ }
+ // next try to match an email autolink
+ matchlen = scan_autolink_email(subj->buffer, subj->pos);
+ if (matchlen > 0) {
+ contents = bmidstr(subj->buffer, subj->pos, matchlen - 1);
+ subj->pos += matchlen;
+ result = make_link(make_str_with_entities(contents),
+ bformat("mailto:%s", contents->data),
+ bfromcstr(""));
+ bdestroy(contents);
+ return result;
+ }
+ // finally, try to match an html tag
+ matchlen = scan_html_tag(subj->buffer, subj->pos);
+ if (matchlen > 0) {
+ contents = bmidstr(subj->buffer, subj->pos, matchlen);
+ binsertch(contents, 0, 1, '<');
+ subj->pos += matchlen;
+ return make_raw_html(contents);
+ } else {// if nothing matches, just return the opening <:
+ return make_str(bfromcstr("<"));
+ }
+}
+
+// Parse a link label. Returns 1 if successful.
+// Unless raw_label is null, it is set to point to the raw contents of the [].
+// Assumes the subject has a '[' character at the current position.
+// Returns 0 and does not advance if no matching ] is found.
+// Note the precedence: code backticks have precedence over label bracket
+// markers, which have precedence over *, _, and other inline formatting
+// markers. So, 2 below contains a link while 1 does not:
+// 1. [a link `with a ](/url)` character
+// 2. [a link *with emphasized ](/url) text*
+static int link_label(subject* subj, bstring* raw_label)
+{
+ int nestlevel = 0;
+ inl* tmp = NULL;
+ bstring raw;
+ int startpos = subj->pos;
+ if (subj->label_nestlevel) {
+ // if we've already checked to the end of the subject
+ // for a label, even with a different starting [, we
+ // know we won't find one here and we can just return.
+ // Note: nestlevel 1 would be: [foo [bar]
+ // nestlevel 2 would be: [foo [bar [baz]
+ subj->label_nestlevel--;
+ return 0;
+ }
+ advance(subj); // advance past [
+ char c;
+ while ((c = peek_char(subj)) && (c != ']' || nestlevel > 0)) {
+ switch (c) {
+ case '`':
+ tmp = handle_backticks(subj);
+ free_inlines(tmp);
+ break;
+ case '<':
+ tmp = handle_pointy_brace(subj);
+ free_inlines(tmp);
+ break;
+ case '[': // nested []
+ nestlevel++;
+ advance(subj);
+ break;
+ case ']': // nested []
+ nestlevel--;
+ advance(subj);
+ break;
+ case '\\':
+ advance(subj);
+ if (ispunct(peek_char(subj))) {
+ advance(subj);
+ }
+ break;
+ default:
+ advance(subj);
+ }
+ }
+ if (c == ']') {
+ if (raw_label != NULL) {
+ raw = bmidstr(subj->buffer, startpos + 1, subj->pos - (startpos + 1));
+ *raw_label = raw;
+ }
+ subj->label_nestlevel = 0;
+ advance(subj); // advance past ]
+ return 1;
+ } else {
+ if (c == 0) {
+ subj->label_nestlevel = nestlevel;
+ }
+ subj->pos = startpos; // rewind
+ return 0;
+ }
+}
+
+// Parse a link or the link portion of an image, or return a fallback.
+static inl* handle_left_bracket(subject* subj)
+{
+ inl* lab = NULL;
+ inl* result = NULL;
+ reference* ref;
+ int n;
+ int sps;
+ int found_label;
+ int endlabel, starturl, endurl, starttitle, endtitle, endall;
+ bstring url, title, rawlabel, reflabel;
+ bstring rawlabel2 = NULL;
+ found_label = link_label(subj, &rawlabel);
+ endlabel = subj->pos;
+ if (found_label) {
+ if (peek_char(subj) == '(' &&
+ ((sps = scan_spacechars(subj->buffer, subj->pos + 1)) > -1) &&
+ ((n = scan_link_url(subj->buffer, subj->pos + 1 + sps)) > -1)) {
+ // try to parse an explicit link:
+ starturl = subj->pos + 1 + sps; // after (
+ endurl = starturl + n;
+ starttitle = endurl + scan_spacechars(subj->buffer, endurl);
+ // ensure there are spaces btw url and title
+ endtitle = (starttitle == endurl) ? starttitle :
+ starttitle + scan_link_title(subj->buffer, starttitle);
+ endall = endtitle + scan_spacechars(subj->buffer, endtitle);
+ if (bchar(subj->buffer, endall) == ')') {
+ subj->pos = endall + 1;
+ url = bmidstr(subj->buffer, starturl, endurl - starturl);
+ clean_url(url);
+ title = bmidstr(subj->buffer, starttitle, endtitle - starttitle);
+ clean_title(title);
+ lab = parse_inlines(rawlabel, NULL);
+ bdestroy(rawlabel);
+ return make_link(lab, url, title);
+ } else {
+ // if we get here, we matched a label but didn't get further:
+ subj->pos = endlabel;
+ lab = parse_inlines(rawlabel, subj->reference_map);
+ bdestroy(rawlabel);
+ result = append_inlines(make_str(bfromcstr("[")),
+ append_inlines(lab,
+ make_str(bfromcstr("]"))));
+ return result;
+ }
+ } else {
+ // Check for reference link.
+ // First, see if there's another label:
+ subj->pos = subj->pos + scan_spacechars(subj->buffer, endlabel);
+ reflabel = rawlabel;
+ // if followed by a nonempty link label, we change reflabel to it:
+ if (peek_char(subj) == '[' &&
+ link_label(subj, &rawlabel2)) {
+ if (blength(rawlabel2) > 0) {
+ reflabel = rawlabel2;
+ }
+ } else {
+ subj->pos = endlabel;
+ }
+ // lookup rawlabel in subject->reference_map:
+ ref = lookup_reference(subj->reference_map, reflabel);
+ if (ref != NULL) { // found
+ lab = parse_inlines(rawlabel, NULL);
+ result = make_link(lab, bstrcpy(ref->url), bstrcpy(ref->title));
+ } else {
+ subj->pos = endlabel;
+ lab = parse_inlines(rawlabel, subj->reference_map);
+ result = append_inlines(make_str(bfromcstr("[")),
+ append_inlines(lab, make_str(bfromcstr("]"))));
+ }
+ bdestroy(rawlabel);
+ bdestroy(rawlabel2);
+ return result;
+ }
+ }
+ // If we fall through to here, it means we didn't match a link:
+ advance(subj); // advance past [
+ return make_str(bfromcstr("["));
+}
+
+// Parse a hard or soft linebreak, returning an inline.
+// Assumes the subject has a newline at the current position.
+static inl* handle_newline(subject *subj)
+{
+ int nlpos = subj->pos;
+ // skip over newline
+ advance(subj);
+ // skip spaces at beginning of line
+ while (peek_char(subj) == ' ') {
+ advance(subj);
+ }
+ if (nlpos > 1 &&
+ bchar(subj->buffer, nlpos - 1) == ' ' &&
+ bchar(subj->buffer, nlpos - 2) == ' ') {
+ return make_linebreak();
+ } else {
+ return make_softbreak();
+ }
+}
+
+inline static int not_eof(subject* subj)
+{
+ return !is_eof(subj);
+}
+
+// Parse inlines while a predicate is satisfied. Return inlines.
+extern inl* parse_inlines_while(subject* subj, int (*f)(subject*))
+{
+ inl* result = NULL;
+ inl** last = &result;
+ while ((*f)(subj) && parse_inline(subj, last)) {
+ }
+ return result;
+}
+
+// Parse an inline, advancing subject, and add it to last element.
+// Adjust tail to point to new last element of list.
+// Return 0 if no inline can be parsed, 1 otherwise.
+extern int parse_inline(subject* subj, inl ** last)
+{
+ inl* new = NULL;
+ bstring contents;
+ bstring special_chars;
+ unsigned char c;
+ int endpos;
+ c = peek_char(subj);
+ if (c == 0) {
+ return 0;
+ }
+ switch(c){
+ case '\n':
+ new = handle_newline(subj);
+ break;
+ case '`':
+ new = handle_backticks(subj);
+ break;
+ case '\\':
+ new = handle_backslash(subj);
+ break;
+ case '&':
+ new = handle_entity(subj);
+ break;
+ case '<':
+ new = handle_pointy_brace(subj);
+ break;
+ case '_':
+ if (subj->pos > 0 && (isalnum(bchar(subj->buffer, subj->pos - 1)) ||
+ bchar(subj->buffer, subj->pos - 1) == '_')) {
+ new = make_str(take_one(subj));
+ } else {
+ new = handle_strong_emph(subj, '_');
+ }
+ break;
+ case '*':
+ new = handle_strong_emph(subj, '*');
+ break;
+ case '[':
+ new = handle_left_bracket(subj);
+ break;
+ case '!':
+ advance(subj);
+ if (peek_char(subj) == '[') {
+ new = handle_left_bracket(subj);
+ if (new != NULL && new->tag == link) {
+ new->tag = image;
+ } else {
+ new = append_inlines(make_str(bfromcstr("!")), new);
+ }
+ } else {
+ new = make_str(bfromcstr("!"));
+ }
+ break;
+ default:
+ // we read until we hit a special character
+ special_chars = bfromcstr("\n\\`&_*[]<!");
+ endpos = binchr(subj->buffer, subj->pos, special_chars);
+ bdestroy(special_chars);
+ if (endpos == subj->pos) {
+ // current char is special: read a 1-character str
+ contents = take_one(subj);
+ } else if (endpos == BSTR_ERR) {
+ // special char not found, take whole rest of buffer:
+ endpos = subj->buffer->slen;
+ contents = bmidstr(subj->buffer, subj->pos, endpos - subj->pos);
+ subj->pos = endpos;
+ } else {
+ // take buffer from subj->pos to endpos to str.
+ contents = bmidstr(subj->buffer, subj->pos, endpos - subj->pos);
+ subj->pos = endpos;
+ // if we're at a newline, strip trailing spaces.
+ if (peek_char(subj) == '\n') {
+ brtrimws(contents);
+ }
+ }
+ new = make_str(contents);
+ }
+ if (*last == NULL) {
+ *last = new;
+ } else {
+ append_inlines(*last, new);
+ }
+ return 1;
+}
+
+extern inl* parse_inlines(bstring input, reference** refmap)
+{
+ subject * subj = make_subject(input, refmap);
+ inl * result = parse_inlines_while(subj, not_eof);
+ free(subj);
+ return result;
+}
+
+// Parse zero or more space characters, including at most one newline.
+void spnl(subject* subj)
+{
+ bool seen_newline = false;
+ while (peek_char(subj) == ' ' ||
+ (!seen_newline &&
+ (seen_newline = peek_char(subj) == '\n'))) {
+ advance(subj);
+ }
+}
+
+// Parse reference. Assumes string begins with '[' character.
+// Modify refmap if a reference is encountered.
+// Return 0 if no reference found, otherwise position of subject
+// after reference is parsed.
+extern int parse_reference(bstring input, reference** refmap)
+{
+ subject * subj = make_subject(input, NULL);
+ bstring lab = NULL;
+ bstring url = NULL;
+ bstring title = NULL;
+ int matchlen = 0;
+ int beforetitle;
+ reference * new = NULL;
+ int newpos;
+
+ // parse label:
+ if (!link_label(subj, &lab)) {
+ free(subj);
+ return 0;
+ }
+ // colon:
+ if (peek_char(subj) == ':') {
+ advance(subj);
+ } else {
+ free(subj);
+ bdestroy(lab);
+ return 0;
+ }
+ // parse link url:
+ spnl(subj);
+ matchlen = scan_link_url(subj->buffer, subj->pos);
+ if (matchlen) {
+ url = bmidstr(subj->buffer, subj->pos, matchlen);
+ clean_url(url);
+ subj->pos += matchlen;
+ } else {
+ free(subj);
+ bdestroy(lab);
+ bdestroy(url);
+ return 0;
+ }
+ // parse optional link_title
+ beforetitle = subj->pos;
+ spnl(subj);
+ matchlen = scan_link_title(subj->buffer, subj->pos);
+ if (matchlen) {
+ title = bmidstr(subj->buffer, subj->pos, matchlen);
+ clean_title(title);
+ subj->pos += matchlen;
+ } else {
+ subj->pos = beforetitle;
+ title = bfromcstr("");
+ }
+ // parse final spaces and newline:
+ while (peek_char(subj) == ' ') {
+ advance(subj);
+ }
+ if (peek_char(subj) == '\n') {
+ advance(subj);
+ } else if (peek_char(subj) != 0) {
+ free(subj);
+ bdestroy(lab);
+ bdestroy(url);
+ bdestroy(title);
+ return 0;
+ }
+ // insert reference into refmap
+ new = make_reference(lab, url, title);
+ add_reference(refmap, new);
+
+ newpos = subj->pos;
+ free(subj);
+ bdestroy(lab);
+ bdestroy(url);
+ bdestroy(title);
+ return newpos;
+}
+
diff --git a/src/main.c b/src/main.c
new file mode 100644
index 0000000..40a63bc
--- /dev/null
+++ b/src/main.c
@@ -0,0 +1,102 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include "bstrlib.h"
+#include "stmd.h"
+#include "debug.h"
+
+void print_usage()
+{
+ printf("Usage: stmd [FILE*]\n");
+ printf("Options: --help, -h Print usage information\n");
+ printf(" --ast Print AST instead of HTML\n");
+ printf(" --version Print version\n");
+}
+
+int main(int argc, char *argv[]) {
+ int i;
+ bool ast = false;
+ int g = 0;
+ int numfps = 0;
+ int files[argc];
+
+ for (i=1; i < argc; i++) {
+ if (strcmp(argv[i], "--version") == 0) {
+ printf("stmd %s", VERSION);
+ printf(" - standard markdown converter (c) 2014 John MacFarlane\n");
+ exit(0);
+ } else if ((strcmp(argv[i], "--help") == 0) ||
+ (strcmp(argv[i], "-h") == 0)) {
+ print_usage();
+ exit(0);
+ } else if (strcmp(argv[i], "--ast") == 0) {
+ ast = true;
+ } else if (*argv[i] == '-') {
+ print_usage();
+ exit(1);
+ } else { // treat as file argument
+ files[g] = i;
+ g++;
+ }
+ }
+
+ numfps = g;
+ bstring s = NULL;
+ bstring html;
+ g = 0;
+ block * cur = make_document();
+ int linenum = 1;
+ extern int errno;
+ FILE * fp = NULL;
+
+ if (numfps == 0) {
+ // read from stdin
+ while ((s = bgets((bNgetc) fgetc, stdin, '\n'))) {
+ check(incorporate_line(s, linenum, &cur) == 0,
+ "error incorporating line %d", linenum);
+ bdestroy(s);
+ linenum++;
+ }
+ } else {
+ // iterate over input file pointers
+ for (g=0; g < numfps; g++) {
+
+ fp = fopen(argv[files[g]], "r");
+ if (fp == NULL) {
+ fprintf(stderr, "Error opening file %s: %s\n",
+ argv[files[g]], strerror(errno));
+ exit(1);
+ }
+
+ struct bStream *stream = bsopen((bNread)fread, fp);
+ if (stream == NULL) {
+ printf("Error opening stream\n");
+ }
+ while (bsreadln(s, stream, '\n') != BSTR_ERR) {
+ check(incorporate_line(s, linenum, &cur) == 0,
+ "error incorporating line %d of %s", linenum, argv[files[g]]);
+ linenum++;
+ }
+ bsclose(stream);
+ }
+ }
+
+ while (cur != cur->top) {
+ finalize(cur, linenum);
+ cur = cur->parent;
+ }
+ check(cur == cur->top, "problems finalizing open containers");
+ finalize(cur, linenum);
+ process_inlines(cur, cur->attributes.refmap);
+ if (ast) {
+ print_blocks(cur, 0);
+ } else {
+ check(blocks_to_html(cur, &html, false) == 0, "could not format as HTML");
+ printf("%s", html->data);
+ bdestroy(html);
+ }
+ free_blocks(cur);
+ return 0;
+error:
+ return -1;
+}
+
diff --git a/src/print.c b/src/print.c
new file mode 100644
index 0000000..a924870
--- /dev/null
+++ b/src/print.c
@@ -0,0 +1,168 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include "bstrlib.h"
+#include "stmd.h"
+#include "debug.h"
+
+static bstring format_str(bstring s)
+{
+ int pos = 0;
+ int len = blength(s);
+ bstring result = bfromcstr("");
+ char c;
+ bformata(result, "\"");
+ while (pos < len) {
+ c = bchar(s, pos);
+ switch (c) {
+ case '\n':
+ bformata(result, "\\n");
+ break;
+ case '"':
+ bformata(result, "\\\"");
+ break;
+ case '\\':
+ bformata(result, "\\\\");
+ break;
+ default:
+ bformata(result, "%c", c);
+ }
+ pos++;
+ }
+ bformata(result, "\"");
+ return result;
+}
+
+// Functions to pretty-print inline and block lists, for debugging.
+// Prettyprint an inline list, for debugging.
+extern void print_blocks(block* b, int indent)
+{
+ struct ListData * data;
+ while(b != NULL) {
+ // printf("%3d %3d %3d| ", b->start_line, b->start_column, b->end_line);
+ for (int i=0; i < indent; i++) {
+ putchar(' ');
+ }
+ switch(b->tag) {
+ case document:
+ printf("document\n");
+ print_blocks(b->children, indent + 2);
+ break;
+ case block_quote:
+ printf("block_quote\n");
+ print_blocks(b->children, indent + 2);
+ break;
+ case list_item:
+ data = &(b->attributes.list_data);
+ printf("list_item\n");
+ print_blocks(b->children, indent + 2);
+ break;
+ case list:
+ data = &(b->attributes.list_data);
+ if (data->list_type == ordered) {
+ printf("list (type=ordered tight=%s start=%d delim=%s)\n",
+ (data->tight ? "true" : "false"),
+ data->start,
+ (data->delimiter == parens ? "parens" : "period"));
+ } else {
+ printf("list (type=bullet tight=%s bullet_char=%c)\n",
+ (data->tight ? "true" : "false"),
+ data->bullet_char);
+ }
+ print_blocks(b->children, indent + 2);
+ break;
+ case atx_header:
+ printf("atx_header (level=%d)\n", b->attributes.header_level);
+ print_inlines(b->inline_content, indent + 2);
+ break;
+ case setext_header:
+ printf("setext_header (level=%d)\n", b->attributes.header_level);
+ print_inlines(b->inline_content, indent + 2);
+ break;
+ case paragraph:
+ printf("paragraph\n");
+ print_inlines(b->inline_content, indent + 2);
+ break;
+ case hrule:
+ printf("hrule\n");
+ break;
+ case indented_code:
+ printf("indented_code %s\n", format_str(b->string_content)->data);
+ break;
+ case fenced_code:
+ printf("fenced_code length=%d info=%s %s\n",
+ b->attributes.fenced_code_data.fence_length,
+ format_str(b->attributes.fenced_code_data.info)->data,
+ format_str(b->string_content)->data);
+ break;
+ case html_block:
+ printf("html_block %s\n", format_str(b->string_content)->data);
+ break;
+ case reference_def:
+ printf("reference_def\n");
+ break;
+ default:
+ log_warn("block type %d not implemented\n", b->tag);
+ break;
+ }
+ b = b->next;
+ }
+}
+
+// Prettyprint an inline list, for debugging.
+extern void print_inlines(inl* ils, int indent)
+{
+ while(ils != NULL) {
+ /*
+ // we add 11 extra spaces for the line/column info
+ for (int i=0; i < 11; i++) {
+ putchar(' ');
+ }
+ putchar('|');
+ putchar(' ');
+ */
+ for (int i=0; i < indent; i++) {
+ putchar(' ');
+ }
+ switch(ils->tag) {
+ case str:
+ printf("str %s\n", format_str(ils->content.literal)->data);
+ break;
+ case linebreak:
+ printf("linebreak\n");
+ break;
+ case softbreak:
+ printf("softbreak\n");
+ break;
+ case code:
+ printf("code %s\n", format_str(ils->content.literal)->data);
+ break;
+ case raw_html:
+ printf("html %s\n", format_str(ils->content.literal)->data);
+ break;
+ case entity:
+ printf("entity %s\n", format_str(ils->content.literal)->data);
+ break;
+ case link:
+ printf("link url=%s title=%s\n",
+ format_str(ils->content.linkable.url)->data,
+ format_str(ils->content.linkable.title)->data);
+ print_inlines(ils->content.linkable.label, indent + 2);
+ break;
+ case image:
+ printf("image url=%s title=%s\n",
+ format_str(ils->content.linkable.url)->data,
+ format_str(ils->content.linkable.title)->data);
+ print_inlines(ils->content.linkable.label, indent + 2);
+ break;
+ case strong:
+ printf("strong\n");
+ print_inlines(ils->content.linkable.label, indent + 2);
+ break;
+ case emph:
+ printf("emph\n");
+ print_inlines(ils->content.linkable.label, indent + 2);
+ break;
+ }
+ ils = ils->next;
+ }
+}
diff --git a/src/scanners.h b/src/scanners.h
new file mode 100644
index 0000000..71e0520
--- /dev/null
+++ b/src/scanners.h
@@ -0,0 +1,15 @@
+#include "bstrlib.h"
+
+int scan_autolink_uri(bstring s, int pos);
+int scan_autolink_email(bstring s, int pos);
+int scan_html_tag(bstring s, int pos);
+int scan_html_block_tag(bstring s, int pos);
+int scan_link_url(bstring s, int pos);
+int scan_link_title(bstring s, int pos);
+int scan_spacechars(bstring s, int pos);
+int scan_atx_header_start(bstring s, int pos);
+int scan_setext_header_line(bstring s, int pos);
+int scan_hrule(bstring s, int pos);
+int scan_open_code_fence(bstring s, int pos);
+int scan_close_code_fence(bstring s, int pos, int len);
+int scan_entity(bstring s, int pos);
diff --git a/src/scanners.re b/src/scanners.re
new file mode 100644
index 0000000..f90238d
--- /dev/null
+++ b/src/scanners.re
@@ -0,0 +1,238 @@
+#include "bstrlib.h"
+
+/*!re2c
+ re2c:define:YYCTYPE = "unsigned char";
+ re2c:define:YYCURSOR = p;
+ re2c:define:YYMARKER = marker;
+ re2c:define:YYCTXMARKER = marker;
+ re2c:yyfill:enable = 0;
+
+ wordchar = [^\x00-\x20];
+
+ spacechar = [ \t\n];
+
+ reg_char = [^\\()\x00-\x20];
+
+ escaped_char = [\\][!"#$%&'()*+,./:;<=>?@[\\\]^_`{|}~-];
+
+ tagname = [A-Za-z][A-Za-z0-9]*;
+
+ blocktagname = 'article'|'header'|'aside'|'hgroup'|'blockquote'|'hr'|'body'|'li'|'br'|'map'|'button'|'object'|'canvas'|'ol'|'caption'|'output'|'col'|'p'|'colgroup'|'pre'|'dd'|'progress'|'div'|'section'|'dl'|'table'|'td'|'dt'|'tbody'|'embed'|'textarea'|'fieldset'|'tfoot'|'figcaption'|'th'|'figure'|'thead'|'footer'|'footer'|'tr'|'form'|'ul'|'h1'|'h2'|'h3'|'h4'|'h5'|'h6'|'video'|'script'|'style';
+
+ attributename = [a-zA-Z_:][a-zA-Z0-9:._-]*;
+
+ unquotedvalue = [^\"'=<>`\x00]+;
+ singlequotedvalue = ['][^'\x00]*['];
+ doublequotedvalue = [\"][^\"\x00]*[\"];
+
+ attributevalue = unquotedvalue | singlequotedvalue | doublequotedvalue;
+
+ attributevaluespec = spacechar* [=] spacechar* attributevalue;
+
+ attribute = spacechar+ attributename attributevaluespec?;
+
+ opentag = tagname attribute* spacechar* [/]? [>];
+ closetag = [/] tagname spacechar* [>];
+
+ htmlcomment = "!--" ([^-\x00]+ | [-][^-\x00]+)* "-->";
+
+ processinginstruction = "?" ([^?>\x00]+ | [?][^>\x00])* "?>";
+
+ declaration = "!" [A-Z]+ spacechar+ [^>\x00]* ">";
+
+ cdata = "![CDATA[" ([^\]\x00]+ | "]" [^\]\x00] | "]]" [^>\x00])* "]]>";
+
+ htmltag = opentag | closetag | htmlcomment | processinginstruction |
+ declaration | cdata;
+
+ in_parens_nosp = [(] (reg_char|escaped_char)* [)];
+
+ in_double_quotes = ["] (escaped_char|[^"\x00])* ["];
+ in_single_quotes = ['] (escaped_char|[^'\x00])* ['];
+ in_parens = [(] (escaped_char|[^)\x00])* [)];
+
+ scheme = 'coap'|'doi'|'javascript'|'aaa'|'aaas'|'about'|'acap'|'cap'|'cid'|'crid'|'data'|'dav'|'dict'|'dns'|'file'|'ftp'|'geo'|'go'|'gopher'|'h323'|'http'|'https'|'iax'|'icap'|'im'|'imap'|'info'|'ipp'|'iris'|'iris.beep'|'iris.xpc'|'iris.xpcs'|'iris.lwz'|'ldap'|'mailto'|'mid'|'msrp'|'msrps'|'mtqp'|'mupdate'|'news'|'nfs'|'ni'|'nih'|'nntp'|'opaquelocktoken'|'pop'|'pres'|'rtsp'|'service'|'session'|'shttp'|'sieve'|'sip'|'sips'|'sms'|'snmp'|'soap.beep'|'soap.beeps'|'tag'|'tel'|'telnet'|'tftp'|'thismessage'|'tn3270'|'tip'|'tv'|'urn'|'vemmi'|'ws'|'wss'|'xcon'|'xcon-userid'|'xmlrpc.beep'|'xmlrpc.beeps'|'xmpp'|'z39.50r'|'z39.50s'|'adiumxtra'|'afp'|'afs'|'aim'|'apt'|'attachment'|'aw'|'beshare'|'bitcoin'|'bolo'|'callto'|'chrome'|'chrome-extension'|'com-eventbrite-attendee'|'content'|'cvs'|'dlna-playsingle'|'dlna-playcontainer'|'dtn'|'dvb'|'ed2k'|'facetime'|'feed'|'finger'|'fish'|'gg'|'git'|'gizmoproject'|'gtalk'|'hcp'|'icon'|'ipn'|'irc'|'irc6'|'ircs'|'itms'|'jar'|'jms'|'keyparc'|'lastfm'|'ldaps'|'magnet'|'maps'|'market'|'message'|'mms'|'ms-help'|'msnim'|'mumble'|'mvn'|'notes'|'oid'|'palm'|'paparazzi'|'platform'|'proxy'|'psyc'|'query'|'res'|'resource'|'rmi'|'rsync'|'rtmp'|'secondlife'|'sftp'|'sgn'|'skype'|'smb'|'soldat'|'spotify'|'ssh'|'steam'|'svn'|'teamspeak'|'things'|'udp'|'unreal'|'ut2004'|'ventrilo'|'view-source'|'webcal'|'wtai'|'wyciwyg'|'xfire'|'xri'|'ymsgr';
+*/
+
+// Try to match URI autolink after first <, returning number of chars matched.
+extern int scan_autolink_uri(bstring s, int pos)
+{
+ unsigned char * marker = NULL;
+ unsigned char * p = &(s->data[pos]);
+ unsigned char * start = p;
+/*!re2c
+ scheme [:]([^\x00-\x20<>\\]|escaped_char)*[>] { return (p - start); }
+ .? { return 0; }
+*/
+}
+
+// Try to match email autolink after first <, returning num of chars matched.
+extern int scan_autolink_email(bstring s, int pos)
+{
+ unsigned char * marker = NULL;
+ unsigned char * p = &(s->data[pos]);
+ unsigned char * start = p;
+/*!re2c
+ [a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+
+ [@]
+ [a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?
+ ([.][a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*
+ [>] { return (p - start); }
+ .? { return 0; }
+*/
+}
+
+// Try to match an HTML tag after first <, returning num of chars matched.
+extern int scan_html_tag(bstring s, int pos)
+{
+ unsigned char * marker = NULL;
+ unsigned char * p = &(s->data[pos]);
+ unsigned char * start = p;
+/*!re2c
+ htmltag { return (p - start); }
+ .? { return 0; }
+*/
+}
+
+// Try to match an HTML block tag including first <,
+// returning num of chars matched.
+extern int scan_html_block_tag(bstring s, int pos)
+{
+ unsigned char * marker = NULL;
+ unsigned char * p = &(s->data[pos]);
+ unsigned char * start = p;
+/*!re2c
+ [<] [/] blocktagname (spacechar | [>]) { return (p - start); }
+ [<] blocktagname (spacechar | [/>]) { return (p - start); }
+ [<] [!?] { return (p - start); }
+ .? { return 0; }
+*/
+}
+
+// Try to match a URL in a link or reference, return number of chars matched.
+// This may optionally be contained in <..>; otherwise
+// whitespace and unbalanced right parentheses aren't allowed.
+// Newlines aren't ever allowed.
+extern int scan_link_url(bstring s, int pos)
+{
+ unsigned char * marker = NULL;
+ unsigned char * p = &(s->data[pos]);
+ unsigned char * start = p;
+/*!re2c
+ [ \n]* [<] ([^<>\n\\\x00] | escaped_char | [\\])* [>] { return (p - start); }
+ [ \n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (p - start); }
+ .? { return 0; }
+*/
+}
+
+// Try to match a link title (in single quotes, in double quotes, or
+// in parentheses), returning number of chars matched. Allow one
+// level of internal nesting (quotes within quotes).
+extern int scan_link_title(bstring s, int pos)
+{
+ unsigned char * marker = NULL;
+ unsigned char * p = &(s->data[pos]);
+ unsigned char * start = p;
+/*!re2c
+ ["] (escaped_char|[^"\x00])* ["] { return (p - start); }
+ ['] (escaped_char|[^'\x00])* ['] { return (p - start); }
+ [(] (escaped_char|[^)\x00])* [)] { return (p - start); }
+ .? { return 0; }
+*/
+}
+
+// Match space characters, including newlines.
+extern int scan_spacechars(bstring s, int pos)
+{
+ unsigned char * p = &(s->data[pos]);
+ unsigned char * start = p;
+/*!re2c
+ [ \t\n]* { return (p - start); }
+ . { return 0; }
+*/
+}
+
+// Match ATX header start.
+extern int scan_atx_header_start(bstring s, int pos)
+{
+ unsigned char * marker = NULL;
+ unsigned char * p = &(s->data[pos]);
+ unsigned char * start = p;
+/*!re2c
+ [#]{1,6} ([ ]+|[\n]) { return (p - start); }
+ .? { return 0; }
+*/
+}
+
+// Match sexext header line. Return 1 for level-1 header,
+// 2 for level-2, 0 for no match.
+extern int scan_setext_header_line(bstring s, int pos)
+{
+ unsigned char * marker = NULL;
+ unsigned char * p = &(s->data[pos]);
+/*!re2c
+ [=]+ [ ]* [\n] { return 1; }
+ [-]+ [ ]* [\n] { return 2; }
+ .? { return 0; }
+*/
+}
+
+// Scan a horizontal rule line: "...three or more hyphens, asterisks,
+// or underscores on a line by themselves. If you wish, you may use
+// spaces between the hyphens or asterisks."
+extern int scan_hrule(bstring s, int pos)
+{
+ unsigned char * marker = NULL;
+ unsigned char * p = &(s->data[pos]);
+ unsigned char * start = p;
+/*!re2c
+ ([*][ ]*){3,} [ \t]* [\n] { return (p - start); }
+ ([_][ ]*){3,} [ \t]* [\n] { return (p - start); }
+ ([-][ ]*){3,} [ \t]* [\n] { return (p - start); }
+ .? { return 0; }
+*/
+}
+
+// Scan an opening code fence.
+extern int scan_open_code_fence(bstring s, int pos)
+{
+ unsigned char * marker = NULL;
+ unsigned char * p = &(s->data[pos]);
+ unsigned char * start = p;
+/*!re2c
+ [`]{3,} / [^`\n\x00]*[\n] { return (p - start); }
+ [~]{3,} / [^~\n\x00]*[\n] { return (p - start); }
+ .? { return 0; }
+*/
+}
+
+// Scan a closing code fence with length at least len.
+extern int scan_close_code_fence(bstring s, int pos, int len)
+{
+ unsigned char * marker = NULL;
+ unsigned char * p = &(s->data[pos]);
+ unsigned char * start = p;
+/*!re2c
+ ([`]{3,} | [~]{3,}) / spacechar* [\n]
+ { if (p - start > len) {
+ return (p - start);
+ } else {
+ return 0;
+ } }
+ .? { return 0; }
+*/
+}
+
+// Scans an entity.
+// Returns number of chars matched.
+extern int scan_entity(bstring s, int pos)
+{
+ unsigned char * marker = NULL;
+ unsigned char * p = &(s->data[pos]);
+ unsigned char * start = p;
+/*!re2c
+ [&] ([#] ([Xx][A-Fa-f0-9]{1,8}|[0-9]{1,8}) |[A-Za-z][A-Za-z0-9]{1,31} ) [;]
+ { return (p - start); }
+ .? { return 0; }
+*/
+}
diff --git a/src/stmd.h b/src/stmd.h
new file mode 100644
index 0000000..5e34399
--- /dev/null
+++ b/src/stmd.h
@@ -0,0 +1,121 @@
+#include <stdbool.h>
+#include "bstrlib.h"
+#include "uthash.h"
+
+#define VERSION "0.1"
+#define CODE_INDENT 4
+
+typedef struct Inline {
+ enum { str, softbreak, linebreak, code, raw_html, entity,
+ emph, strong, link, image } tag;
+ union {
+ bstring literal;
+ struct Inline* inlines;
+ struct { struct Inline* label;
+ bstring url;
+ bstring title;
+ } linkable;
+ } content;
+ struct Inline* next;
+} inl;
+
+typedef struct Reference {
+ bstring label;
+ bstring url;
+ bstring title;
+ UT_hash_handle hh; // used by uthash
+} reference;
+
+typedef struct Subject {
+ bstring buffer;
+ int pos;
+ reference** reference_map;
+ int label_nestlevel;
+} subject;
+
+// Types for blocks
+
+struct ListData {
+ enum { bullet,
+ ordered } list_type;
+ int marker_offset;
+ int padding;
+ int start;
+ enum { period,
+ parens } delimiter;
+ unsigned char bullet_char;
+ bool tight;
+};
+
+struct FencedCodeData {
+ int fence_length;
+ int fence_offset;
+ char fence_char;
+ bstring info;
+};
+
+typedef struct Block {
+ enum { document,
+ block_quote,
+ list,
+ list_item,
+ fenced_code,
+ indented_code,
+ html_block,
+ paragraph,
+ atx_header,
+ setext_header,
+ hrule,
+ reference_def
+ } tag;
+ int start_line;
+ int start_column;
+ int end_line;
+ bool open;
+ bool last_line_blank;
+ struct Block* children;
+ struct Block* last_child;
+ struct Block* parent;
+ struct Block* top;
+ bstring string_content;
+ inl* inline_content;
+ union {
+ struct ListData list_data;
+ struct FencedCodeData fenced_code_data;
+ int header_level;
+ reference** refmap;
+ } attributes;
+ struct Block * next;
+ struct Block * prev;
+} block;
+
+int parse_inline(subject* subj, inl ** last);
+inl* parse_inlines(bstring input, reference** refmap);
+inl* parse_inlines_while(subject* subj, int (*f)(subject*));
+void free_inlines(inl* e);
+int parse_reference(bstring input, reference** refmap);
+void free_reference(reference *ref);
+void free_reference_map(reference **refmap);
+reference* make_reference(bstring label, bstring url, bstring title);
+reference* lookup_reference(reference** refmap, bstring label);
+void add_reference(reference** refmap, reference* ref);
+int unescape(bstring s);
+
+extern block* make_document();
+extern block* add_child(block* parent,
+ int block_type, int start_line, int start_column);
+void free_blocks(block* e);
+
+// FOR NOW:
+int process_inlines(block* cur, reference** refmap);
+int incorporate_line(bstring ln, int line_number, block** curptr);
+int finalize(block* b, int line_number);
+
+void print_inlines(inl* ils, int indent);
+void print_blocks(block* blk, int indent);
+
+int blocks_to_html(block* b, bstring* result, bool tight);
+int inlines_to_html(inl* b, bstring* result);
+
+int bdetab(bstring s, int utf8);
+
diff --git a/src/utf8.c b/src/utf8.c
new file mode 100644
index 0000000..4bb3b35
--- /dev/null
+++ b/src/utf8.c
@@ -0,0 +1,106 @@
+#include <stdlib.h>
+#include "bstrlib.h"
+#include "debug.h"
+
+#define advance(s) \
+ s++; \
+ check(*s >> 6 == 0x02, "UTF-8 decode error on byte %x", *s);
+
+// Reads a unicode code point from a UTF8-encoded string, and
+// puts it in the pointer n. If something illegal
+// is encountered, 0xFFFD is emitted.
+// Returns a pointer to next position in string, or NULL if no
+// more characters remain.
+extern unsigned char * from_utf8(unsigned char * s, unsigned int *n)
+{
+ int x = 0;
+
+ if (*s == 0) {
+ return NULL;
+ } else if (*s < 0x80) {
+ x = *s;
+ } else if (*s >> 5 == 0x06) {
+ x = *s & 0x1F;
+ advance(s);
+ x = (x << 6) + (*s & 0x3F);
+ } else if (*s >> 4 == 0x0E) {
+ x = *s & 0x0F;
+ advance(s);
+ x = (x << 6) + (*s & 0x3F);
+ advance(s);
+ x = (x << 6) + (*s & 0x3F);
+ } else if (*s >> 3 == 0x1E) {
+ x = *s & 0x07;
+ advance(s);
+ x = (x << 6) + (*s & 0x3F);
+ advance(s);
+ x = (x << 6) + (*s & 0x3F);
+ advance(s);
+ x = (x << 6) + (*s & 0x3F);
+ } else if (*s >> 2 == 0x3E) {
+ x = *s & 0x03;
+ advance(s);
+ x = (x << 6) + (*s & 0x3F);
+ advance(s);
+ x = (x << 6) + (*s & 0x3F);
+ advance(s);
+ x = (x << 6) + (*s & 0x3F);
+ advance(s);
+ x = (x << 6) + (*s & 0x3F);
+ } else {
+ log_err("UTF-8 decode error on byte %x", *s);
+ goto error;
+ }
+ *n = x;
+ s++;
+ return s;
+ error:
+ *n = 0xFFFD;
+ return s;
+}
+
+// Converts the unicode code point c to UTF-8,
+// putting the result in dest. Returns 0 on success, -1 on error.
+extern int to_utf8(unsigned int c, bstring dest)
+{
+ if (c < 0x80) {
+ bconchar(dest, c);
+ } else if (c < 0x800) {
+ bconchar(dest, 192 + c/64);
+ bconchar(dest, 128 + c%64);
+ } else if (c - 0xd800u < 0x800) {
+ goto error;
+ } else if (c < 0x10000) {
+ bconchar(dest, 224 + c / 4096);
+ bconchar(dest, 128 + c /64%64);
+ bconchar(dest, 128 + c%64);
+ } else if (c < 0x110000) {
+ bconchar(dest, 240 + c/262144);
+ bconchar(dest, 128 + c/4096%64);
+ bconchar(dest, 128 + c/64%64);
+ bconchar(dest, 128 + c%64);
+ } else {
+ goto error;
+ }
+ return 0;
+error:
+ return -1;
+}
+
+#define bufpush(x) \
+ check(to_utf8(x, buf) == 0, "UTF-8 encode error on code point %04x", x)
+
+// Returns the case-folded version of the source string, or NULL on error.
+extern bstring case_fold(bstring source)
+{
+ unsigned char * s = source->data;
+ unsigned int c = 0;
+ bstring buf = bfromcstr("");
+ while ((s = from_utf8(s, &c))) {
+#include "case_fold_switch.c"
+ }
+ return buf;
+error:
+ return NULL;
+}
+
diff --git a/src/utf8.h b/src/utf8.h
new file mode 100644
index 0000000..fe59a90
--- /dev/null
+++ b/src/utf8.h
@@ -0,0 +1,6 @@
+#include <stdlib.h>
+#include "bstrlib.h"
+
+extern unsigned char * from_utf8(unsigned char * s, unsigned int *n);
+extern int to_utf8(unsigned int c, bstring dest);
+extern bstring case_fold(bstring source);
diff --git a/src/uthash.h b/src/uthash.h
new file mode 100644
index 0000000..b9bc7e9
--- /dev/null
+++ b/src/uthash.h
@@ -0,0 +1,948 @@
+/*
+Copyright (c) 2003-2013, Troy D. Hanson http://troydhanson.github.com/uthash/
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef UTHASH_H
+#define UTHASH_H
+
+#include <string.h> /* memcmp,strlen */
+#include <stddef.h> /* ptrdiff_t */
+#include <stdlib.h> /* exit() */
+
+/* These macros use decltype or the earlier __typeof GNU extension.
+ As decltype is only available in newer compilers (VS2010 or gcc 4.3+
+ when compiling c++ source) this code uses whatever method is needed
+ or, for VS2008 where neither is available, uses casting workarounds. */
+#ifdef _MSC_VER /* MS compiler */
+#if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */
+#define DECLTYPE(x) (decltype(x))
+#else /* VS2008 or older (or VS2010 in C mode) */
+#define NO_DECLTYPE
+#define DECLTYPE(x)
+#endif
+#else /* GNU, Sun and other compilers */
+#define DECLTYPE(x) (__typeof(x))
+#endif
+
+#ifdef NO_DECLTYPE
+#define DECLTYPE_ASSIGN(dst,src) \
+do { \
+ char **_da_dst = (char**)(&(dst)); \
+ *_da_dst = (char*)(src); \
+} while(0)
+#else
+#define DECLTYPE_ASSIGN(dst,src) \
+do { \
+ (dst) = DECLTYPE(dst)(src); \
+} while(0)
+#endif
+
+/* a number of the hash function use uint32_t which isn't defined on win32 */
+#ifdef _MSC_VER
+typedef unsigned int uint32_t;
+typedef unsigned char uint8_t;
+#else
+#include <inttypes.h> /* uint32_t */
+#endif
+
+#define UTHASH_VERSION 1.9.8
+
+#ifndef uthash_fatal
+#define uthash_fatal(msg) exit(-1) /* fatal error (out of memory,etc) */
+#endif
+#ifndef uthash_malloc
+#define uthash_malloc(sz) malloc(sz) /* malloc fcn */
+#endif
+#ifndef uthash_free
+#define uthash_free(ptr,sz) free(ptr) /* free fcn */
+#endif
+
+#ifndef uthash_noexpand_fyi
+#define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */
+#endif
+#ifndef uthash_expand_fyi
+#define uthash_expand_fyi(tbl) /* can be defined to log expands */
+#endif
+
+/* initial number of buckets */
+#define HASH_INITIAL_NUM_BUCKETS 32 /* initial number of buckets */
+#define HASH_INITIAL_NUM_BUCKETS_LOG2 5 /* lg2 of initial number of buckets */
+#define HASH_BKT_CAPACITY_THRESH 10 /* expand when bucket count reaches */
+
+/* calculate the element whose hash handle address is hhe */
+#define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho)))
+
+#define HASH_FIND(hh,head,keyptr,keylen,out) \
+do { \
+ unsigned _hf_bkt,_hf_hashv; \
+ out=NULL; \
+ if (head) { \
+ HASH_FCN(keyptr,keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt); \
+ if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv)) { \
+ HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], \
+ keyptr,keylen,out); \
+ } \
+ } \
+} while (0)
+
+#ifdef HASH_BLOOM
+#define HASH_BLOOM_BITLEN (1ULL << HASH_BLOOM)
+#define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8) + ((HASH_BLOOM_BITLEN%8) ? 1:0)
+#define HASH_BLOOM_MAKE(tbl) \
+do { \
+ (tbl)->bloom_nbits = HASH_BLOOM; \
+ (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \
+ if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \
+ memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \
+ (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \
+} while (0)
+
+#define HASH_BLOOM_FREE(tbl) \
+do { \
+ uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \
+} while (0)
+
+#define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8] |= (1U << ((idx)%8)))
+#define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8] & (1U << ((idx)%8)))
+
+#define HASH_BLOOM_ADD(tbl,hashv) \
+ HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1)))
+
+#define HASH_BLOOM_TEST(tbl,hashv) \
+ HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1)))
+
+#else
+#define HASH_BLOOM_MAKE(tbl)
+#define HASH_BLOOM_FREE(tbl)
+#define HASH_BLOOM_ADD(tbl,hashv)
+#define HASH_BLOOM_TEST(tbl,hashv) (1)
+#define HASH_BLOOM_BYTELEN 0
+#endif
+
+#define HASH_MAKE_TABLE(hh,head) \
+do { \
+ (head)->hh.tbl = (UT_hash_table*)uthash_malloc( \
+ sizeof(UT_hash_table)); \
+ if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \
+ memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \
+ (head)->hh.tbl->tail = &((head)->hh); \
+ (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \
+ (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \
+ (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \
+ (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \
+ HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \
+ if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \
+ memset((head)->hh.tbl->buckets, 0, \
+ HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \
+ HASH_BLOOM_MAKE((head)->hh.tbl); \
+ (head)->hh.tbl->signature = HASH_SIGNATURE; \
+} while(0)
+
+#define HASH_ADD(hh,head,fieldname,keylen_in,add) \
+ HASH_ADD_KEYPTR(hh,head,&((add)->fieldname),keylen_in,add)
+
+#define HASH_REPLACE(hh,head,fieldname,keylen_in,add,replaced) \
+do { \
+ replaced=NULL; \
+ HASH_FIND(hh,head,&((add)->fieldname),keylen_in,replaced); \
+ if (replaced!=NULL) { \
+ HASH_DELETE(hh,head,replaced); \
+ }; \
+ HASH_ADD(hh,head,fieldname,keylen_in,add); \
+} while(0)
+
+#define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \
+do { \
+ unsigned _ha_bkt; \
+ (add)->hh.next = NULL; \
+ (add)->hh.key = (char*)(keyptr); \
+ (add)->hh.keylen = (unsigned)(keylen_in); \
+ if (!(head)) { \
+ head = (add); \
+ (head)->hh.prev = NULL; \
+ HASH_MAKE_TABLE(hh,head); \
+ } else { \
+ (head)->hh.tbl->tail->next = (add); \
+ (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \
+ (head)->hh.tbl->tail = &((add)->hh); \
+ } \
+ (head)->hh.tbl->num_items++; \
+ (add)->hh.tbl = (head)->hh.tbl; \
+ HASH_FCN(keyptr,keylen_in, (head)->hh.tbl->num_buckets, \
+ (add)->hh.hashv, _ha_bkt); \
+ HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt],&(add)->hh); \
+ HASH_BLOOM_ADD((head)->hh.tbl,(add)->hh.hashv); \
+ HASH_EMIT_KEY(hh,head,keyptr,keylen_in); \
+ HASH_FSCK(hh,head); \
+} while(0)
+
+#define HASH_TO_BKT( hashv, num_bkts, bkt ) \
+do { \
+ bkt = ((hashv) & ((num_bkts) - 1)); \
+} while(0)
+
+/* delete "delptr" from the hash table.
+ * "the usual" patch-up process for the app-order doubly-linked-list.
+ * The use of _hd_hh_del below deserves special explanation.
+ * These used to be expressed using (delptr) but that led to a bug
+ * if someone used the same symbol for the head and deletee, like
+ * HASH_DELETE(hh,users,users);
+ * We want that to work, but by changing the head (users) below
+ * we were forfeiting our ability to further refer to the deletee (users)
+ * in the patch-up process. Solution: use scratch space to
+ * copy the deletee pointer, then the latter references are via that
+ * scratch pointer rather than through the repointed (users) symbol.
+ */
+#define HASH_DELETE(hh,head,delptr) \
+do { \
+ unsigned _hd_bkt; \
+ struct UT_hash_handle *_hd_hh_del; \
+ if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \
+ uthash_free((head)->hh.tbl->buckets, \
+ (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
+ HASH_BLOOM_FREE((head)->hh.tbl); \
+ uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \
+ head = NULL; \
+ } else { \
+ _hd_hh_del = &((delptr)->hh); \
+ if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \
+ (head)->hh.tbl->tail = \
+ (UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \
+ (head)->hh.tbl->hho); \
+ } \
+ if ((delptr)->hh.prev) { \
+ ((UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \
+ (head)->hh.tbl->hho))->next = (delptr)->hh.next; \
+ } else { \
+ DECLTYPE_ASSIGN(head,(delptr)->hh.next); \
+ } \
+ if (_hd_hh_del->next) { \
+ ((UT_hash_handle*)((ptrdiff_t)_hd_hh_del->next + \
+ (head)->hh.tbl->hho))->prev = \
+ _hd_hh_del->prev; \
+ } \
+ HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \
+ HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \
+ (head)->hh.tbl->num_items--; \
+ } \
+ HASH_FSCK(hh,head); \
+} while (0)
+
+
+/* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */
+#define HASH_FIND_STR(head,findstr,out) \
+ HASH_FIND(hh,head,findstr,strlen(findstr),out)
+#define HASH_ADD_STR(head,strfield,add) \
+ HASH_ADD(hh,head,strfield,strlen(add->strfield),add)
+#define HASH_REPLACE_STR(head,strfield,add,replaced) \
+ HASH_REPLACE(hh,head,strfield,strlen(add->strfield),add,replaced)
+#define HASH_FIND_INT(head,findint,out) \
+ HASH_FIND(hh,head,findint,sizeof(int),out)
+#define HASH_ADD_INT(head,intfield,add) \
+ HASH_ADD(hh,head,intfield,sizeof(int),add)
+#define HASH_REPLACE_INT(head,intfield,add,replaced) \
+ HASH_REPLACE(hh,head,intfield,sizeof(int),add,replaced)
+#define HASH_FIND_PTR(head,findptr,out) \
+ HASH_FIND(hh,head,findptr,sizeof(void *),out)
+#define HASH_ADD_PTR(head,ptrfield,add) \
+ HASH_ADD(hh,head,ptrfield,sizeof(void *),add)
+#define HASH_REPLACE_PTR(head,ptrfield,add,replaced) \
+ HASH_REPLACE(hh,head,ptrfield,sizeof(void *),add,replaced)
+#define HASH_DEL(head,delptr) \
+ HASH_DELETE(hh,head,delptr)
+
+/* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined.
+ * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined.
+ */
+#ifdef HASH_DEBUG
+#define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0)
+#define HASH_FSCK(hh,head) \
+do { \
+ unsigned _bkt_i; \
+ unsigned _count, _bkt_count; \
+ char *_prev; \
+ struct UT_hash_handle *_thh; \
+ if (head) { \
+ _count = 0; \
+ for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \
+ _bkt_count = 0; \
+ _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \
+ _prev = NULL; \
+ while (_thh) { \
+ if (_prev != (char*)(_thh->hh_prev)) { \
+ HASH_OOPS("invalid hh_prev %p, actual %p\n", \
+ _thh->hh_prev, _prev ); \
+ } \
+ _bkt_count++; \
+ _prev = (char*)(_thh); \
+ _thh = _thh->hh_next; \
+ } \
+ _count += _bkt_count; \
+ if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \
+ HASH_OOPS("invalid bucket count %d, actual %d\n", \
+ (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \
+ } \
+ } \
+ if (_count != (head)->hh.tbl->num_items) { \
+ HASH_OOPS("invalid hh item count %d, actual %d\n", \
+ (head)->hh.tbl->num_items, _count ); \
+ } \
+ /* traverse hh in app order; check next/prev integrity, count */ \
+ _count = 0; \
+ _prev = NULL; \
+ _thh = &(head)->hh; \
+ while (_thh) { \
+ _count++; \
+ if (_prev !=(char*)(_thh->prev)) { \
+ HASH_OOPS("invalid prev %p, actual %p\n", \
+ _thh->prev, _prev ); \
+ } \
+ _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \
+ _thh = ( _thh->next ? (UT_hash_handle*)((char*)(_thh->next) + \
+ (head)->hh.tbl->hho) : NULL ); \
+ } \
+ if (_count != (head)->hh.tbl->num_items) { \
+ HASH_OOPS("invalid app item count %d, actual %d\n", \
+ (head)->hh.tbl->num_items, _count ); \
+ } \
+ } \
+} while (0)
+#else
+#define HASH_FSCK(hh,head)
+#endif
+
+/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to
+ * the descriptor to which this macro is defined for tuning the hash function.
+ * The app can #include <unistd.h> to get the prototype for write(2). */
+#ifdef HASH_EMIT_KEYS
+#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \
+do { \
+ unsigned _klen = fieldlen; \
+ write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \
+ write(HASH_EMIT_KEYS, keyptr, fieldlen); \
+} while (0)
+#else
+#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)
+#endif
+
+/* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */
+#ifdef HASH_FUNCTION
+#define HASH_FCN HASH_FUNCTION
+#else
+#define HASH_FCN HASH_JEN
+#endif
+
+/* The Bernstein hash function, used in Perl prior to v5.6 */
+#define HASH_BER(key,keylen,num_bkts,hashv,bkt) \
+do { \
+ unsigned _hb_keylen=keylen; \
+ char *_hb_key=(char*)(key); \
+ (hashv) = 0; \
+ while (_hb_keylen--) { (hashv) = ((hashv) * 33) + *_hb_key++; } \
+ bkt = (hashv) & (num_bkts-1); \
+} while (0)
+
+
+/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at
+ * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */
+#define HASH_SAX(key,keylen,num_bkts,hashv,bkt) \
+do { \
+ unsigned _sx_i; \
+ char *_hs_key=(char*)(key); \
+ hashv = 0; \
+ for(_sx_i=0; _sx_i < keylen; _sx_i++) \
+ hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \
+ bkt = hashv & (num_bkts-1); \
+} while (0)
+
+#define HASH_FNV(key,keylen,num_bkts,hashv,bkt) \
+do { \
+ unsigned _fn_i; \
+ char *_hf_key=(char*)(key); \
+ hashv = 2166136261UL; \
+ for(_fn_i=0; _fn_i < keylen; _fn_i++) \
+ hashv = (hashv * 16777619) ^ _hf_key[_fn_i]; \
+ bkt = hashv & (num_bkts-1); \
+} while(0)
+
+#define HASH_OAT(key,keylen,num_bkts,hashv,bkt) \
+do { \
+ unsigned _ho_i; \
+ char *_ho_key=(char*)(key); \
+ hashv = 0; \
+ for(_ho_i=0; _ho_i < keylen; _ho_i++) { \
+ hashv += _ho_key[_ho_i]; \
+ hashv += (hashv << 10); \
+ hashv ^= (hashv >> 6); \
+ } \
+ hashv += (hashv << 3); \
+ hashv ^= (hashv >> 11); \
+ hashv += (hashv << 15); \
+ bkt = hashv & (num_bkts-1); \
+} while(0)
+
+#define HASH_JEN_MIX(a,b,c) \
+do { \
+ a -= b; a -= c; a ^= ( c >> 13 ); \
+ b -= c; b -= a; b ^= ( a << 8 ); \
+ c -= a; c -= b; c ^= ( b >> 13 ); \
+ a -= b; a -= c; a ^= ( c >> 12 ); \
+ b -= c; b -= a; b ^= ( a << 16 ); \
+ c -= a; c -= b; c ^= ( b >> 5 ); \
+ a -= b; a -= c; a ^= ( c >> 3 ); \
+ b -= c; b -= a; b ^= ( a << 10 ); \
+ c -= a; c -= b; c ^= ( b >> 15 ); \
+} while (0)
+
+#define HASH_JEN(key,keylen,num_bkts,hashv,bkt) \
+do { \
+ unsigned _hj_i,_hj_j,_hj_k; \
+ unsigned char *_hj_key=(unsigned char*)(key); \
+ hashv = 0xfeedbeef; \
+ _hj_i = _hj_j = 0x9e3779b9; \
+ _hj_k = (unsigned)(keylen); \
+ while (_hj_k >= 12) { \
+ _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \
+ + ( (unsigned)_hj_key[2] << 16 ) \
+ + ( (unsigned)_hj_key[3] << 24 ) ); \
+ _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \
+ + ( (unsigned)_hj_key[6] << 16 ) \
+ + ( (unsigned)_hj_key[7] << 24 ) ); \
+ hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \
+ + ( (unsigned)_hj_key[10] << 16 ) \
+ + ( (unsigned)_hj_key[11] << 24 ) ); \
+ \
+ HASH_JEN_MIX(_hj_i, _hj_j, hashv); \
+ \
+ _hj_key += 12; \
+ _hj_k -= 12; \
+ } \
+ hashv += keylen; \
+ switch ( _hj_k ) { \
+ case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); \
+ case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); \
+ case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); \
+ case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); \
+ case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); \
+ case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); \
+ case 5: _hj_j += _hj_key[4]; \
+ case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); \
+ case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); \
+ case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); \
+ case 1: _hj_i += _hj_key[0]; \
+ } \
+ HASH_JEN_MIX(_hj_i, _hj_j, hashv); \
+ bkt = hashv & (num_bkts-1); \
+} while(0)
+
+/* The Paul Hsieh hash function */
+#undef get16bits
+#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
+ || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
+#define get16bits(d) (*((const uint16_t *) (d)))
+#endif
+
+#if !defined (get16bits)
+#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \
+ +(uint32_t)(((const uint8_t *)(d))[0]) )
+#endif
+#define HASH_SFH(key,keylen,num_bkts,hashv,bkt) \
+do { \
+ unsigned char *_sfh_key=(unsigned char*)(key); \
+ uint32_t _sfh_tmp, _sfh_len = keylen; \
+ \
+ int _sfh_rem = _sfh_len & 3; \
+ _sfh_len >>= 2; \
+ hashv = 0xcafebabe; \
+ \
+ /* Main loop */ \
+ for (;_sfh_len > 0; _sfh_len--) { \
+ hashv += get16bits (_sfh_key); \
+ _sfh_tmp = (uint32_t)(get16bits (_sfh_key+2)) << 11 ^ hashv; \
+ hashv = (hashv << 16) ^ _sfh_tmp; \
+ _sfh_key += 2*sizeof (uint16_t); \
+ hashv += hashv >> 11; \
+ } \
+ \
+ /* Handle end cases */ \
+ switch (_sfh_rem) { \
+ case 3: hashv += get16bits (_sfh_key); \
+ hashv ^= hashv << 16; \
+ hashv ^= (uint32_t)(_sfh_key[sizeof (uint16_t)] << 18); \
+ hashv += hashv >> 11; \
+ break; \
+ case 2: hashv += get16bits (_sfh_key); \
+ hashv ^= hashv << 11; \
+ hashv += hashv >> 17; \
+ break; \
+ case 1: hashv += *_sfh_key; \
+ hashv ^= hashv << 10; \
+ hashv += hashv >> 1; \
+ } \
+ \
+ /* Force "avalanching" of final 127 bits */ \
+ hashv ^= hashv << 3; \
+ hashv += hashv >> 5; \
+ hashv ^= hashv << 4; \
+ hashv += hashv >> 17; \
+ hashv ^= hashv << 25; \
+ hashv += hashv >> 6; \
+ bkt = hashv & (num_bkts-1); \
+} while(0)
+
+#ifdef HASH_USING_NO_STRICT_ALIASING
+/* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads.
+ * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error.
+ * MurmurHash uses the faster approach only on CPU's where we know it's safe.
+ *
+ * Note the preprocessor built-in defines can be emitted using:
+ *
+ * gcc -m64 -dM -E - < /dev/null (on gcc)
+ * cc -## a.c (where a.c is a simple test file) (Sun Studio)
+ */
+#if (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86))
+#define MUR_GETBLOCK(p,i) p[i]
+#else /* non intel */
+#define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 0x3) == 0)
+#define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 0x3) == 1)
+#define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 0x3) == 2)
+#define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 0x3) == 3)
+#define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL))
+#if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__))
+#define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24))
+#define MUR_TWO_TWO(p) ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16))
+#define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >> 8))
+#else /* assume little endian non-intel */
+#define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24))
+#define MUR_TWO_TWO(p) ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16))
+#define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) << 8))
+#endif
+#define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) : \
+ (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \
+ (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) : \
+ MUR_ONE_THREE(p))))
+#endif
+#define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
+#define MUR_FMIX(_h) \
+do { \
+ _h ^= _h >> 16; \
+ _h *= 0x85ebca6b; \
+ _h ^= _h >> 13; \
+ _h *= 0xc2b2ae35l; \
+ _h ^= _h >> 16; \
+} while(0)
+
+#define HASH_MUR(key,keylen,num_bkts,hashv,bkt) \
+do { \
+ const uint8_t *_mur_data = (const uint8_t*)(key); \
+ const int _mur_nblocks = (keylen) / 4; \
+ uint32_t _mur_h1 = 0xf88D5353; \
+ uint32_t _mur_c1 = 0xcc9e2d51; \
+ uint32_t _mur_c2 = 0x1b873593; \
+ uint32_t _mur_k1 = 0; \
+ const uint8_t *_mur_tail; \
+ const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+_mur_nblocks*4); \
+ int _mur_i; \
+ for(_mur_i = -_mur_nblocks; _mur_i; _mur_i++) { \
+ _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i); \
+ _mur_k1 *= _mur_c1; \
+ _mur_k1 = MUR_ROTL32(_mur_k1,15); \
+ _mur_k1 *= _mur_c2; \
+ \
+ _mur_h1 ^= _mur_k1; \
+ _mur_h1 = MUR_ROTL32(_mur_h1,13); \
+ _mur_h1 = _mur_h1*5+0xe6546b64; \
+ } \
+ _mur_tail = (const uint8_t*)(_mur_data + _mur_nblocks*4); \
+ _mur_k1=0; \
+ switch((keylen) & 3) { \
+ case 3: _mur_k1 ^= _mur_tail[2] << 16; \
+ case 2: _mur_k1 ^= _mur_tail[1] << 8; \
+ case 1: _mur_k1 ^= _mur_tail[0]; \
+ _mur_k1 *= _mur_c1; \
+ _mur_k1 = MUR_ROTL32(_mur_k1,15); \
+ _mur_k1 *= _mur_c2; \
+ _mur_h1 ^= _mur_k1; \
+ } \
+ _mur_h1 ^= (keylen); \
+ MUR_FMIX(_mur_h1); \
+ hashv = _mur_h1; \
+ bkt = hashv & (num_bkts-1); \
+} while(0)
+#endif /* HASH_USING_NO_STRICT_ALIASING */
+
+/* key comparison function; return 0 if keys equal */
+#define HASH_KEYCMP(a,b,len) memcmp(a,b,len)
+
+/* iterate over items in a known bucket to find desired item */
+#define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out) \
+do { \
+ if (head.hh_head) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,head.hh_head)); \
+ else out=NULL; \
+ while (out) { \
+ if ((out)->hh.keylen == keylen_in) { \
+ if ((HASH_KEYCMP((out)->hh.key,keyptr,keylen_in)) == 0) break; \
+ } \
+ if ((out)->hh.hh_next) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,(out)->hh.hh_next)); \
+ else out = NULL; \
+ } \
+} while(0)
+
+/* add an item to a bucket */
+#define HASH_ADD_TO_BKT(head,addhh) \
+do { \
+ head.count++; \
+ (addhh)->hh_next = head.hh_head; \
+ (addhh)->hh_prev = NULL; \
+ if (head.hh_head) { (head).hh_head->hh_prev = (addhh); } \
+ (head).hh_head=addhh; \
+ if (head.count >= ((head.expand_mult+1) * HASH_BKT_CAPACITY_THRESH) \
+ && (addhh)->tbl->noexpand != 1) { \
+ HASH_EXPAND_BUCKETS((addhh)->tbl); \
+ } \
+} while(0)
+
+/* remove an item from a given bucket */
+#define HASH_DEL_IN_BKT(hh,head,hh_del) \
+ (head).count--; \
+ if ((head).hh_head == hh_del) { \
+ (head).hh_head = hh_del->hh_next; \
+ } \
+ if (hh_del->hh_prev) { \
+ hh_del->hh_prev->hh_next = hh_del->hh_next; \
+ } \
+ if (hh_del->hh_next) { \
+ hh_del->hh_next->hh_prev = hh_del->hh_prev; \
+ }
+
+/* Bucket expansion has the effect of doubling the number of buckets
+ * and redistributing the items into the new buckets. Ideally the
+ * items will distribute more or less evenly into the new buckets
+ * (the extent to which this is true is a measure of the quality of
+ * the hash function as it applies to the key domain).
+ *
+ * With the items distributed into more buckets, the chain length
+ * (item count) in each bucket is reduced. Thus by expanding buckets
+ * the hash keeps a bound on the chain length. This bounded chain
+ * length is the essence of how a hash provides constant time lookup.
+ *
+ * The calculation of tbl->ideal_chain_maxlen below deserves some
+ * explanation. First, keep in mind that we're calculating the ideal
+ * maximum chain length based on the *new* (doubled) bucket count.
+ * In fractions this is just n/b (n=number of items,b=new num buckets).
+ * Since the ideal chain length is an integer, we want to calculate
+ * ceil(n/b). We don't depend on floating point arithmetic in this
+ * hash, so to calculate ceil(n/b) with integers we could write
+ *
+ * ceil(n/b) = (n/b) + ((n%b)?1:0)
+ *
+ * and in fact a previous version of this hash did just that.
+ * But now we have improved things a bit by recognizing that b is
+ * always a power of two. We keep its base 2 log handy (call it lb),
+ * so now we can write this with a bit shift and logical AND:
+ *
+ * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0)
+ *
+ */
+#define HASH_EXPAND_BUCKETS(tbl) \
+do { \
+ unsigned _he_bkt; \
+ unsigned _he_bkt_i; \
+ struct UT_hash_handle *_he_thh, *_he_hh_nxt; \
+ UT_hash_bucket *_he_new_buckets, *_he_newbkt; \
+ _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \
+ 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \
+ if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \
+ memset(_he_new_buckets, 0, \
+ 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \
+ tbl->ideal_chain_maxlen = \
+ (tbl->num_items >> (tbl->log2_num_buckets+1)) + \
+ ((tbl->num_items & ((tbl->num_buckets*2)-1)) ? 1 : 0); \
+ tbl->nonideal_items = 0; \
+ for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \
+ { \
+ _he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \
+ while (_he_thh) { \
+ _he_hh_nxt = _he_thh->hh_next; \
+ HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2, _he_bkt); \
+ _he_newbkt = &(_he_new_buckets[ _he_bkt ]); \
+ if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \
+ tbl->nonideal_items++; \
+ _he_newbkt->expand_mult = _he_newbkt->count / \
+ tbl->ideal_chain_maxlen; \
+ } \
+ _he_thh->hh_prev = NULL; \
+ _he_thh->hh_next = _he_newbkt->hh_head; \
+ if (_he_newbkt->hh_head) _he_newbkt->hh_head->hh_prev = \
+ _he_thh; \
+ _he_newbkt->hh_head = _he_thh; \
+ _he_thh = _he_hh_nxt; \
+ } \
+ } \
+ uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
+ tbl->num_buckets *= 2; \
+ tbl->log2_num_buckets++; \
+ tbl->buckets = _he_new_buckets; \
+ tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \
+ (tbl->ineff_expands+1) : 0; \
+ if (tbl->ineff_expands > 1) { \
+ tbl->noexpand=1; \
+ uthash_noexpand_fyi(tbl); \
+ } \
+ uthash_expand_fyi(tbl); \
+} while(0)
+
+
+/* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */
+/* Note that HASH_SORT assumes the hash handle name to be hh.
+ * HASH_SRT was added to allow the hash handle name to be passed in. */
+#define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn)
+#define HASH_SRT(hh,head,cmpfcn) \
+do { \
+ unsigned _hs_i; \
+ unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \
+ struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \
+ if (head) { \
+ _hs_insize = 1; \
+ _hs_looping = 1; \
+ _hs_list = &((head)->hh); \
+ while (_hs_looping) { \
+ _hs_p = _hs_list; \
+ _hs_list = NULL; \
+ _hs_tail = NULL; \
+ _hs_nmerges = 0; \
+ while (_hs_p) { \
+ _hs_nmerges++; \
+ _hs_q = _hs_p; \
+ _hs_psize = 0; \
+ for ( _hs_i = 0; _hs_i < _hs_insize; _hs_i++ ) { \
+ _hs_psize++; \
+ _hs_q = (UT_hash_handle*)((_hs_q->next) ? \
+ ((void*)((char*)(_hs_q->next) + \
+ (head)->hh.tbl->hho)) : NULL); \
+ if (! (_hs_q) ) break; \
+ } \
+ _hs_qsize = _hs_insize; \
+ while ((_hs_psize > 0) || ((_hs_qsize > 0) && _hs_q )) { \
+ if (_hs_psize == 0) { \
+ _hs_e = _hs_q; \
+ _hs_q = (UT_hash_handle*)((_hs_q->next) ? \
+ ((void*)((char*)(_hs_q->next) + \
+ (head)->hh.tbl->hho)) : NULL); \
+ _hs_qsize--; \
+ } else if ( (_hs_qsize == 0) || !(_hs_q) ) { \
+ _hs_e = _hs_p; \
+ if (_hs_p){ \
+ _hs_p = (UT_hash_handle*)((_hs_p->next) ? \
+ ((void*)((char*)(_hs_p->next) + \
+ (head)->hh.tbl->hho)) : NULL); \
+ } \
+ _hs_psize--; \
+ } else if (( \
+ cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \
+ DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \
+ ) <= 0) { \
+ _hs_e = _hs_p; \
+ if (_hs_p){ \
+ _hs_p = (UT_hash_handle*)((_hs_p->next) ? \
+ ((void*)((char*)(_hs_p->next) + \
+ (head)->hh.tbl->hho)) : NULL); \
+ } \
+ _hs_psize--; \
+ } else { \
+ _hs_e = _hs_q; \
+ _hs_q = (UT_hash_handle*)((_hs_q->next) ? \
+ ((void*)((char*)(_hs_q->next) + \
+ (head)->hh.tbl->hho)) : NULL); \
+ _hs_qsize--; \
+ } \
+ if ( _hs_tail ) { \
+ _hs_tail->next = ((_hs_e) ? \
+ ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL); \
+ } else { \
+ _hs_list = _hs_e; \
+ } \
+ if (_hs_e) { \
+ _hs_e->prev = ((_hs_tail) ? \
+ ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL); \
+ } \
+ _hs_tail = _hs_e; \
+ } \
+ _hs_p = _hs_q; \
+ } \
+ if (_hs_tail){ \
+ _hs_tail->next = NULL; \
+ } \
+ if ( _hs_nmerges <= 1 ) { \
+ _hs_looping=0; \
+ (head)->hh.tbl->tail = _hs_tail; \
+ DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \
+ } \
+ _hs_insize *= 2; \
+ } \
+ HASH_FSCK(hh,head); \
+ } \
+} while (0)
+
+/* This function selects items from one hash into another hash.
+ * The end result is that the selected items have dual presence
+ * in both hashes. There is no copy of the items made; rather
+ * they are added into the new hash through a secondary hash
+ * hash handle that must be present in the structure. */
+#define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \
+do { \
+ unsigned _src_bkt, _dst_bkt; \
+ void *_last_elt=NULL, *_elt; \
+ UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \
+ ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \
+ if (src) { \
+ for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \
+ for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \
+ _src_hh; \
+ _src_hh = _src_hh->hh_next) { \
+ _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \
+ if (cond(_elt)) { \
+ _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \
+ _dst_hh->key = _src_hh->key; \
+ _dst_hh->keylen = _src_hh->keylen; \
+ _dst_hh->hashv = _src_hh->hashv; \
+ _dst_hh->prev = _last_elt; \
+ _dst_hh->next = NULL; \
+ if (_last_elt_hh) { _last_elt_hh->next = _elt; } \
+ if (!dst) { \
+ DECLTYPE_ASSIGN(dst,_elt); \
+ HASH_MAKE_TABLE(hh_dst,dst); \
+ } else { \
+ _dst_hh->tbl = (dst)->hh_dst.tbl; \
+ } \
+ HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \
+ HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh); \
+ (dst)->hh_dst.tbl->num_items++; \
+ _last_elt = _elt; \
+ _last_elt_hh = _dst_hh; \
+ } \
+ } \
+ } \
+ } \
+ HASH_FSCK(hh_dst,dst); \
+} while (0)
+
+#define HASH_CLEAR(hh,head) \
+do { \
+ if (head) { \
+ uthash_free((head)->hh.tbl->buckets, \
+ (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \
+ HASH_BLOOM_FREE((head)->hh.tbl); \
+ uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \
+ (head)=NULL; \
+ } \
+} while(0)
+
+#define HASH_OVERHEAD(hh,head) \
+ (size_t)((((head)->hh.tbl->num_items * sizeof(UT_hash_handle)) + \
+ ((head)->hh.tbl->num_buckets * sizeof(UT_hash_bucket)) + \
+ (sizeof(UT_hash_table)) + \
+ (HASH_BLOOM_BYTELEN)))
+
+#ifdef NO_DECLTYPE
+#define HASH_ITER(hh,head,el,tmp) \
+for((el)=(head), (*(char**)(&(tmp)))=(char*)((head)?(head)->hh.next:NULL); \
+ el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL))
+#else
+#define HASH_ITER(hh,head,el,tmp) \
+for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL); \
+ el; (el)=(tmp),(tmp)=DECLTYPE(el)((tmp)?(tmp)->hh.next:NULL))
+#endif
+
+/* obtain a count of items in the hash */
+#define HASH_COUNT(head) HASH_CNT(hh,head)
+#define HASH_CNT(hh,head) ((head)?((head)->hh.tbl->num_items):0)
+
+typedef struct UT_hash_bucket {
+ struct UT_hash_handle *hh_head;
+ unsigned count;
+
+ /* expand_mult is normally set to 0. In this situation, the max chain length
+ * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If
+ * the bucket's chain exceeds this length, bucket expansion is triggered).
+ * However, setting expand_mult to a non-zero value delays bucket expansion
+ * (that would be triggered by additions to this particular bucket)
+ * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH.
+ * (The multiplier is simply expand_mult+1). The whole idea of this
+ * multiplier is to reduce bucket expansions, since they are expensive, in
+ * situations where we know that a particular bucket tends to be overused.
+ * It is better to let its chain length grow to a longer yet-still-bounded
+ * value, than to do an O(n) bucket expansion too often.
+ */
+ unsigned expand_mult;
+
+} UT_hash_bucket;
+
+/* random signature used only to find hash tables in external analysis */
+#define HASH_SIGNATURE 0xa0111fe1
+#define HASH_BLOOM_SIGNATURE 0xb12220f2
+
+typedef struct UT_hash_table {
+ UT_hash_bucket *buckets;
+ unsigned num_buckets, log2_num_buckets;
+ unsigned num_items;
+ struct UT_hash_handle *tail; /* tail hh in app order, for fast append */
+ ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */
+
+ /* in an ideal situation (all buckets used equally), no bucket would have
+ * more than ceil(#items/#buckets) items. that's the ideal chain length. */
+ unsigned ideal_chain_maxlen;
+
+ /* nonideal_items is the number of items in the hash whose chain position
+ * exceeds the ideal chain maxlen. these items pay the penalty for an uneven
+ * hash distribution; reaching them in a chain traversal takes >ideal steps */
+ unsigned nonideal_items;
+
+ /* ineffective expands occur when a bucket doubling was performed, but
+ * afterward, more than half the items in the hash had nonideal chain
+ * positions. If this happens on two consecutive expansions we inhibit any
+ * further expansion, as it's not helping; this happens when the hash
+ * function isn't a good fit for the key domain. When expansion is inhibited
+ * the hash will still work, albeit no longer in constant time. */
+ unsigned ineff_expands, noexpand;
+
+ uint32_t signature; /* used only to find hash tables in external analysis */
+#ifdef HASH_BLOOM
+ uint32_t bloom_sig; /* used only to test bloom exists in external analysis */
+ uint8_t *bloom_bv;
+ char bloom_nbits;
+#endif
+
+} UT_hash_table;
+
+typedef struct UT_hash_handle {
+ struct UT_hash_table *tbl;
+ void *prev; /* prev element in app order */
+ void *next; /* next element in app order */
+ struct UT_hash_handle *hh_prev; /* previous hh in bucket order */
+ struct UT_hash_handle *hh_next; /* next hh in bucket order */
+ void *key; /* ptr to enclosing struct's key */
+ unsigned keylen; /* enclosing struct's key len */
+ unsigned hashv; /* result of hash-fcn(key) */
+} UT_hash_handle;
+
+#endif /* UTHASH_H */
diff --git a/template.html b/template.html
new file mode 100644
index 0000000..2286c68
--- /dev/null
+++ b/template.html
@@ -0,0 +1,66 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="UTF-8">
+<title>$title$</title>
+<style type="text/css">
+body { font-family: arial, sans-serif; line-height: 1.4em; max-width: 52em;
+ margin: 3em; }
+div#TOC ul { list-style: none; }
+h1 { font-size: 140%; font-weight: bold; border-top: 1px solid gray; padding-top: 0.5em; }
+h2 { font-size: 120%; font-weight: bold; }
+h3 { font-size: 110%; font-weight: bold; }
+h4 { font-size: 100%; font-weight: bold; }
+span.space { position: relative; }
+span.space:after {
+ content: "";
+ position: absolute;
+ /* create a mark that indicates a space (trick from D. Greenspan) */
+ top: 3px; bottom: 3px; left: 1px; right: 1px;
+ border: 1px solid #999;
+}
+div.example { overflow: hidden; }
+p { text-align: justify; }
+pre { padding: 0.5em; margin-left: 0; margin-right: 0; margin-top: 0.2em;
+ margin-bottom: 0.5em; font-size: 88%; }
+pre {
+ white-space: pre-wrap; /* css-3 */
+ white-space: -moz-pre-wrap; /* Mozilla, since 1999 */
+ white-space: -pre-wrap; /* Opera 4-6 */
+ white-space: -o-pre-wrap; /* Opera 7 */
+ word-wrap: break-word; /* Internet Explorer 5.5+ */
+}
+code { font-size: 120%; font-family: monospace; }
+div.example > pre { float:left; width: 48%; }
+div.example > pre.markdown { clear:left; }
+pre.tree { font-weight: bold; color: #777; }
+pre.markdown { background-color: #E3DEC1;}
+pre.html { background-color: #E89F65; }
+pre.html span.space:after {
+ border: 1px solid #666;
+}
+div.examplenum { font-size: 82%; text-align: left; }
+a.footnoteRef > sup:before {
+ content: "[";
+}
+a.footnoteRef > sup:after {
+ content: "]";
+}
+a.footnoteRef > sup {
+ vertical-align: baseline;
+ font-size: 100%;
+}
+</style>
+</head>
+<body>
+<h1 class="title">$title$</h1>
+<div class="version">Version $version$ ($date$)</div>
+<div class="authors">
+$for(author)$<span class="author">$author$</span>$sep$; $endfor$
+</div>
+<div id="TOC">
+$toc$
+</div>
+$body$
+</body>
+</html>
diff --git a/template.tex b/template.tex
new file mode 100644
index 0000000..d083b72
--- /dev/null
+++ b/template.tex
@@ -0,0 +1,229 @@
+\documentclass[$if(fontsize)$$fontsize$,$endif$$if(lang)$$lang$,$endif$$if(papersize)$$papersize$,$endif$$for(classoption)$$classoption$$sep$,$endfor$]{$documentclass$}
+$if(fontfamily)$
+\usepackage{$fontfamily$}
+$else$
+\usepackage{lmodern}
+$endif$
+$if(linestretch)$
+\usepackage{setspace}
+\setstretch{$linestretch$}
+$endif$
+\usepackage{amssymb,amsmath}
+\usepackage{ifxetex,ifluatex}
+\usepackage{fixltx2e} % provides \textsubscript
+\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
+ \usepackage[T1]{fontenc}
+ \usepackage[utf8]{inputenc}
+$if(euro)$
+ \usepackage{eurosym}
+$endif$
+\else % if luatex or xelatex
+ \ifxetex
+ \usepackage{mathspec}
+ \usepackage{xltxtra,xunicode}
+ \else
+ \usepackage{fontspec}
+ \fi
+ \defaultfontfeatures{Mapping=tex-text,Scale=MatchLowercase}
+ \newcommand{\euro}{€}
+$if(mainfont)$
+ \setmainfont{$mainfont$}
+$endif$
+$if(sansfont)$
+ \setsansfont{$sansfont$}
+$endif$
+$if(monofont)$
+ \setmonofont[Mapping=tex-ansi]{$monofont$}
+$endif$
+$if(mathfont)$
+ \setmathfont(Digits,Latin,Greek){$mathfont$}
+$endif$
+\fi
+% use upquote if available, for straight quotes in verbatim environments
+\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
+% use microtype if available
+\IfFileExists{microtype.sty}{\usepackage{microtype}}{}
+\usepackage[margin=1in]{geometry}
+$if(natbib)$
+\usepackage{natbib}
+\bibliographystyle{$if(biblio-style)$$biblio-style$$else$plainnat$endif$}
+$endif$
+$if(biblatex)$
+\usepackage{biblatex}
+$if(biblio-files)$
+\bibliography{$biblio-files$}
+$endif$
+$endif$
+$if(listings)$
+\usepackage{listings}
+$endif$
+$if(lhs)$
+\lstnewenvironment{code}{\lstset{language=Haskell,basicstyle=\small\ttfamily}}{}
+$endif$
+\usepackage{fancyvrb}
+\usepackage{color,framed}
+\newcommand{\VerbBar}{|}
+\newcommand{\VERB}{\Verb[commandchars=\\\{\}]}
+\DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\{\},fontsize=\small}
+% Add ',fontsize=\small' for more characters per line
+\definecolor{shadecolor}{gray}{1}
+\newenvironment{Shaded}{\begin{snugshade}}{\end{snugshade}}
+\newcommand{\NormalTok}[1]{{#1}}
+\let\KeywordTok\NormalTok
+\let\DataTypeTok\NormalTok
+\let\DecValTok\NormalTok
+\let\BaseNTok\NormalTok
+\let\FloatTok\NormalTok
+\let\CharTok\NormalTok
+\let\StringTok\NormalTok
+\let\CommentTok\NormalTok
+\let\OtherTok\NormalTok
+\let\AlertTok\NormalTok
+\let\FunctionTok\NormalTok
+\let\RegionMarkerTok\NormalTok
+\let\ErrorTok\NormalTok
+%\newcommand{\KeywordTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{\textbf{{#1}}}}
+%\newcommand{\DataTypeTok}[1]{\textcolor[rgb]{0.56,0.13,0.00}{{#1}}}
+%\newcommand{\DecValTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}}
+%\newcommand{\BaseNTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}}
+%\newcommand{\FloatTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}}
+%\newcommand{\CharTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}}
+%\newcommand{\StringTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}}
+%\newcommand{\CommentTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textit{{#1}}}}
+%\newcommand{\OtherTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{{#1}}}
+%\newcommand{\AlertTok}[1]{\textcolor[rgb]{1.00,0.00,0.00}{\textbf{{#1}}}}
+%\newcommand{\FunctionTok}[1]{\textcolor[rgb]{0.02,0.16,0.49}{{#1}}}
+%\newcommand{\RegionMarkerTok}[1]{{#1}}
+%\newcommand{\ErrorTok}[1]{\textcolor[rgb]{1.00,0.00,0.00}{\textbf{{#1}}}}
+$if(verbatim-in-note)$
+\usepackage{fancyvrb}
+$endif$
+$if(tables)$
+\usepackage{longtable,booktabs}
+$endif$
+$if(graphics)$
+\usepackage{graphicx}
+\makeatletter
+\def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth\else\Gin@nat@width\fi}
+\def\maxheight{\ifdim\Gin@nat@height>\textheight\textheight\else\Gin@nat@height\fi}
+\makeatother
+% Scale images if necessary, so that they will not overflow the page
+% margins by default, and it is still possible to overwrite the defaults
+% using explicit options in \includegraphics[width, height, ...]{}
+\setkeys{Gin}{width=\maxwidth,height=\maxheight,keepaspectratio}
+$endif$
+\ifxetex
+ \usepackage[setpagesize=false, % page size defined by xetex
+ unicode=false, % unicode breaks when used with xetex
+ xetex]{hyperref}
+\else
+ \usepackage[unicode=true]{hyperref}
+\fi
+\hypersetup{breaklinks=true,
+ bookmarks=true,
+ pdfauthor={$author-meta$},
+ pdftitle={$title-meta$},
+ colorlinks=true,
+ citecolor=$if(citecolor)$$citecolor$$else$blue$endif$,
+ urlcolor=$if(urlcolor)$$urlcolor$$else$blue$endif$,
+ linkcolor=$if(linkcolor)$$linkcolor$$else$magenta$endif$,
+ pdfborder={0 0 0}}
+\urlstyle{same} % don't use monospace font for urls
+$if(links-as-notes)$
+% Make links footnotes instead of hotlinks:
+\renewcommand{\href}[2]{#2\footnote{\url{#1}}}
+$endif$
+$if(strikeout)$
+\usepackage[normalem]{ulem}
+% avoid problems with \sout in headers with hyperref:
+\pdfstringdefDisableCommands{\renewcommand{\sout}{}}
+$endif$
+\setlength{\parindent}{0pt}
+\setlength{\parskip}{6pt plus 2pt minus 1pt}
+\setlength{\emergencystretch}{3em} % prevent overfull lines
+$if(numbersections)$
+\setcounter{secnumdepth}{5}
+$else$
+\setcounter{secnumdepth}{0}
+$endif$
+$if(verbatim-in-note)$
+\VerbatimFootnotes % allows verbatim text in footnotes
+$endif$
+$if(lang)$
+\ifxetex
+ \usepackage{polyglossia}
+ \setmainlanguage{$mainlang$}
+\else
+ \usepackage[$lang$]{babel}
+\fi
+$endif$
+
+\usepackage{titlesec}
+\titleformat{\chapter}[hang]{\Huge\bfseries}{\thechapter\ }{0pt}{\Huge\bfseries}
+
+\usepackage{fancyhdr}
+\pagestyle{fancy}
+\pagenumbering{arabic}
+\lhead{\itshape $title$}
+\chead{}
+\rhead{\itshape{\nouppercase{\rightmark}}}
+\lfoot{v$version$ ($date$)}
+\cfoot{}
+\rfoot{\thepage}
+
+$if(title)$
+\title{$title$$if(subtitle)$\\\vspace{0.5em}{\large $subtitle$}$endif$}
+$endif$
+$if(author)$
+\author{$for(author)$$author$$sep$ \and $endfor$}
+$endif$
+\date{$date$}
+$for(header-includes)$
+$header-includes$
+$endfor$
+
+\begin{document}
+$if(title)$
+\maketitle
+$endif$
+$if(abstract)$
+\begin{abstract}
+$abstract$
+\end{abstract}
+$endif$
+
+$for(include-before)$
+$include-before$
+
+$endfor$
+$if(toc)$
+{
+\hypersetup{linkcolor=black}
+\setcounter{tocdepth}{$toc-depth$}
+\tableofcontents
+}
+$endif$
+$body$
+
+$if(natbib)$
+$if(biblio-files)$
+$if(biblio-title)$
+$if(book-class)$
+\renewcommand\bibname{$biblio-title$}
+$else$
+\renewcommand\refname{$biblio-title$}
+$endif$
+$endif$
+\bibliography{$biblio-files$}
+
+$endif$
+$endif$
+$if(biblatex)$
+\printbibliography$if(biblio-title)$[title=$biblio-title$]$endif$
+
+$endif$
+$for(include-after)$
+$include-after$
+
+$endfor$
+\end{document}