11 files changed, 304 insertions, 607 deletions
diff --git a/Makefile b/Makefile
index cb5938d..d14a928 100644
--- a/Makefile
+++ b/Makefile
@@ -6,7 +6,7 @@ DATADIR=data
 PROG=./stmd
 
 .PHONY: all oldtests test spec benchjs testjs
-all: $(SRCDIR)/case_fold_switch.c $(PROG)
+all: $(SRCDIR)/case_fold_switch.inc $(PROG)
 
 README.html: README.md template.html
 	pandoc --template template.html -S -s -t html5 -o $@ $<
@@ -41,13 +41,16 @@ testjs: spec.txt
 benchjs:
 	node js/bench.js ${BENCHINP}
 
-$(PROG): $(SRCDIR)/main.c $(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/html.o $(SRCDIR)/utf8.o
+HTML_OBJ=$(SRCDIR)/html/html.o $(SRCDIR)/html/houdini_href_e.o $(SRCDIR)/html/houdini_html_e.o
+STMD_OBJ=$(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/utf8.o
+
+$(PROG): $(SRCDIR)/main.c $(HTML_OBJ) $(STMD_OBJ)
 	$(CC) $(LDFLAGS) -o $@ $^
 
 $(SRCDIR)/scanners.c: $(SRCDIR)/scanners.re
 	re2c --case-insensitive -bis $< > $@ || (rm $@ && false)
 
-$(SRCDIR)/case_fold_switch.inc $(DATADIR)/CaseFolding-3.2.0.txt
+$(SRCDIR)/case_fold_switch.inc: $(DATADIR)/CaseFolding-3.2.0.txt
 	perl mkcasefold.pl < $< > $@
 
 .PHONY: leakcheck clean fuzztest dingus upload
@@ -72,7 +75,7 @@ update-site: spec.html narrative.html
 	(cd _site ; git pull ; git commit -a -m "Updated site for latest spec, narrative, js" ; git push; cd ..)
 
 clean:
-	-rm -f test $(SRCDIR)/*.o $(SRCDIR)/scanners.c
+	-rm -f test $(SRCDIR)/*.o $(SRCDIR)/scanners.c $(SRCDIR)/html/*.o
 	-rm -rf *.dSYM
 	-rm -f README.html
 	-rm -f spec.md fuzz.txt spec.html
diff --git a/src/blocks.c b/src/blocks.c
index eabac03..71dc830 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -3,11 +3,12 @@
 #include <stdio.h>
 #include <stdbool.h>
 #include <ctype.h>
-#include "bstrlib.h"
+
 #include "stmd.h"
-#include "uthash.h"
-#include "debug.h"
 #include "scanners.h"
+#include "uthash.h"
+
+static void finalize(block* b, int line_number);
 
 static block* make_block(int tag, int start_line, int start_column)
 {
@@ -140,7 +141,7 @@ static int break_out_of_lists(block ** bptr, int line_number)
 }
 
 
-extern void finalize(block* b, int line_number)
+static void finalize(block* b, int line_number)
 {
 	int firstlinelen;
 	int pos;
@@ -364,7 +365,7 @@ static int lists_match(struct ListData list_data,
 			list_data.bullet_char == item_data.bullet_char);
 }
 
-static void expand_tabs(gh_buf *ob, const char *line, size_t size)
+static void expand_tabs(gh_buf *ob, const unsigned char *line, size_t size)
 {
 	size_t  i = 0, tab = 0;
 
@@ -389,13 +390,43 @@ static void expand_tabs(gh_buf *ob, const char *line, size_t size)
 	}
 }
 
-extern block *stmd_parse_document(const char *buffer, size_t len)
+static block *finalize_parsing(block *document, int linenum)
 {
-	gh_buf line = GH_BUF_INIT;
+	while (document != document->top) {
+		finalize(document, linenum);
+		document = document->parent;
+	}
+
+	finalize(document, linenum);
+	process_inlines(document, document->attributes.refmap);
+
+	return document;
+}
 
+extern block *stmd_parse_file(FILE *f)
+{
+	gh_buf line = GH_BUF_INIT;
+	unsigned char buffer[4096];
+	int linenum = 1;
 	block *document = make_document();
+
+	while (fgets((char *)buffer, sizeof(buffer), f)) {
+		expand_tabs(&line, buffer, strlen(buffer));
+		incorporate_line(&line, linenum, &document);
+		gh_buf_clear(&line);
+		linenum++;
+	}
+
+	gh_buf_free(&line);
+	return finalize_document(document, linenum);
+}
+
+extern block *stmd_parse_document(const unsigned char *buffer, size_t len)
+{
+	gh_buf line = GH_BUF_INIT;
 	int linenum = 1;
-	const char *end = buffer + len;
+	const unsigned char *end = buffer + len;
+	block *document = make_document();
 
 	while (buffer < end) {
 		const char *eol = memchr(buffer, '\n', end - buffer);
@@ -414,16 +445,7 @@ extern block *stmd_parse_document(const char *buffer, size_t len)
 	}
 
 	gh_buf_free(&line);
-
-	while (document != document->top) {
-		finalize(document, linenum);
-		document = document->parent;
-	}
-
-	finalize(document, linenum);
-	process_inlines(document, document->attributes.refmap);
-
-	return document;
+	return finalize_document(document, linenum);
 }
 
 // Process one line at a time, modifying a block.
diff --git a/src/buffer.c b/src/buffer.c
index b81e7fa..17dc864 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -31,10 +31,10 @@ void gh_buf_init(gh_buf *buf, int initial_size)
 
 int gh_buf_try_grow(gh_buf *buf, int target_size, bool mark_oom)
 {
-	char *new_ptr;
-	size_t new_size;
+	unsigned char *new_ptr;
+	int new_size;
 
-	if (buf->ptr == gh_buf__oom || buf->asize < 0)
+	if (buf->ptr == gh_buf__oom)
 		return -1;
 
 	if (target_size <= buf->asize)
@@ -79,7 +79,7 @@ void gh_buf_free(gh_buf *buf)
 {
 	if (!buf) return;
 
-	if (buf->asize > 0 && buf->ptr != gh_buf__initbuf && buf->ptr != gh_buf__oom)
+	if (buf->ptr != gh_buf__initbuf && buf->ptr != gh_buf__oom)
 		free(buf->ptr);
 
 	gh_buf_init(buf, 0);
@@ -91,14 +91,9 @@ void gh_buf_clear(gh_buf *buf)
 
 	if (buf->asize > 0)
 		buf->ptr[0] = '\0';
-
-	if (buf->asize < 0) {
-		buf->ptr = gh_buf__initbuf;
-		buf->asize = 0;
-	}
 }
 
-int gh_buf_set(gh_buf *buf, const char *data, int len)
+int gh_buf_set(gh_buf *buf, const unsigned char *data, int len)
 {
 	if (len == 0 || data == NULL) {
 		gh_buf_clear(buf);
@@ -115,10 +110,12 @@ int gh_buf_set(gh_buf *buf, const char *data, int len)
 
 int gh_buf_sets(gh_buf *buf, const char *string)
 {
-	return gh_buf_set(buf, string, string ? strlen(string) : 0);
+	return gh_buf_set(buf,
+		(const unsigned char *)string,
+		string ? strlen(string) : 0);
 }
 
-int gh_buf_putc(gh_buf *buf, char c)
+int gh_buf_putc(gh_buf *buf, int c)
 {
 	ENSURE_SIZE(buf, buf->size + 2);
 	buf->ptr[buf->size++] = c;
@@ -126,7 +123,7 @@ int gh_buf_putc(gh_buf *buf, char c)
 	return 0;
 }
 
-int gh_buf_put(gh_buf *buf, const char *data, int len)
+int gh_buf_put(gh_buf *buf, const unsigned char *data, int len)
 {
 	ENSURE_SIZE(buf, buf->size + len + 1);
 	memmove(buf->ptr + buf->size, data, len);
@@ -137,8 +134,7 @@ int gh_buf_put(gh_buf *buf, const char *data, int len)
 
 int gh_buf_puts(gh_buf *buf, const char *string)
 {
-	assert(string);
-	return gh_buf_put(buf, string, strlen(string));
+	return gh_buf_put(buf, (const unsigned char *)string, strlen(string));
 }
 
 int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap)
@@ -153,7 +149,7 @@ int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap)
 		va_copy(args, ap);
 
 		len = vsnprintf(
-			buf->ptr + buf->size,
+			(char *)buf->ptr + buf->size,
 			buf->asize - buf->size,
 			format, args
 		);
@@ -187,9 +183,9 @@ int gh_buf_printf(gh_buf *buf, const char *format, ...)
 	return r;
 }
 
-void gh_buf_copy_cstr(char *data, size_t datasize, const gh_buf *buf)
+void gh_buf_copy_cstr(char *data, int datasize, const gh_buf *buf)
 {
-	size_t copylen;
+	int copylen;
 
 	assert(data && datasize && buf);
 
@@ -212,9 +208,9 @@ void gh_buf_swap(gh_buf *buf_a, gh_buf *buf_b)
 	*buf_b = t;
 }
 
-char *gh_buf_detach(gh_buf *buf)
+unsigned char *gh_buf_detach(gh_buf *buf)
 {
-	char *data = buf->ptr;
+	unsigned char *data = buf->ptr;
 
 	if (buf->asize == 0 || buf->ptr == gh_buf__oom)
 		return NULL;
@@ -224,13 +220,13 @@ char *gh_buf_detach(gh_buf *buf)
 	return data;
 }
 
-void gh_buf_attach(gh_buf *buf, char *ptr, int asize)
+void gh_buf_attach(gh_buf *buf, unsigned char *ptr, int asize)
 {
 	gh_buf_free(buf);
 
 	if (ptr) {
 		buf->ptr = ptr;
-		buf->size = strlen(ptr);
+		buf->size = strlen((char *)ptr);
 		if (asize)
 			buf->asize = (asize < buf->size) ? buf->size + 1 : asize;
 		else /* pass 0 to fall back on strlen + 1 */
@@ -249,11 +245,11 @@ int gh_buf_cmp(const gh_buf *a, const gh_buf *b)
 
 int gh_buf_strchr(const gh_buf *buf, int c, int pos)
 {
-  const char *p = memchr(buf->ptr + pos, c, buf->size - pos);
-  if (!p)
-    return -1;
+	const char *p = memchr(buf->ptr + pos, c, buf->size - pos);
+	if (!p)
+		return -1;
 
-  return (int)(p - p->ptr);
+	return (int)(p - buf->ptr);
 }
 
 int gh_buf_strrchr(const gh_buf *buf, int c, int pos)
@@ -270,36 +266,21 @@ int gh_buf_strrchr(const gh_buf *buf, int c, int pos)
 
 void gh_buf_truncate(gh_buf *buf, size_t len)
 {
-	assert(buf->asize >= 0);
-
 	if (len < buf->size) {
 		buf->size = len;
 		buf->ptr[buf->size] = '\0';
 	}
 }
 
-void gh_buf_ltruncate(gh_buf *buf, size_t len)
-{
-	assert(buf->asize >= 0);
-
-	if (len && len < buf->size) {
-		memmove(buf->ptr, buf->ptr + len, buf->size - len);
-		buf->size -= len;
-		buf->ptr[buf->size] = '\0';
-	}
-}
-
 void gh_buf_trim(gh_buf *buf)
 {
-	size_t i = 0;
-
-	assert(buf->asize >= 0);
-
-	/* ltrim */
+	/* TODO: leading whitespace? */
+	/*
 	while (i < buf->size && isspace(buf->ptr[i]))
 		i++;
 
 	gh_buf_truncate(buf, i);
+	*/
 
 	/* rtrim */
 	while (buf->size > 0) {
diff --git a/src/buffer.h b/src/buffer.h
index 2581ee3..422ef02 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -24,13 +24,6 @@ extern unsigned char gh_buf__oom[];
  */
 extern void gh_buf_init(gh_buf *buf, int initial_size);
 
-static inline void gh_buf_static(gh_buf *buf, unsigned char *source)
-{
-	buf->ptr = source;
-	buf->size = strlen(source);
-	buf->asize = -1;
-}
-
 /**
  * Attempt to grow the buffer to hold at least `target_size` bytes.
  *
@@ -81,13 +74,13 @@ static inline size_t gh_buf_len(const gh_buf *buf)
 
 extern int gh_buf_cmp(const gh_buf *a, const gh_buf *b);
 
-extern void gh_buf_attach(gh_buf *buf, char *ptr, int asize);
-extern char *gh_buf_detach(gh_buf *buf);
+extern void gh_buf_attach(gh_buf *buf, unsigned char *ptr, int asize);
+extern unsigned char *gh_buf_detach(gh_buf *buf);
 extern void gh_buf_copy_cstr(char *data, int datasize, const gh_buf *buf);
 
 static inline const char *gh_buf_cstr(const gh_buf *buf)
 {
-	return buf->ptr;
+	return (char *)buf->ptr;
 }
 
 #define gh_buf_at(buf, n) ((buf)->ptr[n])
@@ -100,10 +93,10 @@ static inline const char *gh_buf_cstr(const gh_buf *buf)
  * return code of these functions and call them in a series then just call
  * gh_buf_oom at the end.
  */
-extern int gh_buf_set(gh_buf *buf, const char *data, int len);
+extern int gh_buf_set(gh_buf *buf, const unsigned char *data, int len);
 extern int gh_buf_sets(gh_buf *buf, const char *string);
-extern int gh_buf_putc(gh_buf *buf, char c);
-extern int gh_buf_put(gh_buf *buf, const char *data, int len);
+extern int gh_buf_putc(gh_buf *buf, int c);
+extern int gh_buf_put(gh_buf *buf, const unsigned char *data, int len);
 extern int gh_buf_puts(gh_buf *buf, const char *string);
 extern int gh_buf_printf(gh_buf *buf, const char *format, ...)
 	__attribute__((format (printf, 2, 3)));
diff --git a/src/html.c b/src/html.c
deleted file mode 100644
index aeec5f1..0000000
--- a/src/html.c
+++ /dev/null
@@ -1,276 +0,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include <stdbool.h>
-#include "bstrlib.h"
-#include "stmd.h"
-#include "debug.h"
-#include "scanners.h"
-
-// Functions to convert block and inline lists to HTML strings.
-
-// Escape special characters in HTML.  More efficient than
-// three calls to bfindreplace.  If preserve_entities is set,
-// existing entities are left alone.
-static bstring escape_html(bstring inp, bool preserve_entities)
-{
-  int pos = 0;
-  int match;
-  char c;
-  bstring escapable = blk2bstr("&<>\"", 4);
-  bstring ent;
-  bstring s = bstrcpy(inp);
-  while ((pos = binchr(s, pos, escapable)) != BSTR_ERR) {
-    c = bchar(s,pos);
-    switch (c) {
-    case '<':
-      bdelete(s, pos, 1);
-      ent = blk2bstr("&lt;", 4);
-      binsert(s, pos, ent, ' ');
-      bdestroy(ent);
-      pos += 4;
-      break;
-    case '>':
-      bdelete(s, pos, 1);
-      ent = blk2bstr("&gt;", 4);
-      binsert(s, pos, ent, ' ');
-      bdestroy(ent);
-      pos += 4;
-      break;
-    case '&':
-      if (preserve_entities && (match = scan_entity(s, pos))) {
-        pos += match;
-      } else {
-        bdelete(s, pos, 1);
-        ent = blk2bstr("&amp;", 5);
-        binsert(s, pos, ent, ' ');
-        bdestroy(ent);
-        pos += 5;
-      }
-      break;
-    case '"':
-      bdelete(s, pos, 1);
-      ent = blk2bstr("&quot;", 6);
-      binsert(s, pos, ent, ' ');
-      bdestroy(ent);
-      pos += 6;
-      break;
-    default:
-      bdelete(s, pos, 1);
-      log_err("unexpected character %02x", c);
-    }
-  }
-  bdestroy(escapable);
-  return s;
-}
-
-static inline void cr(bstring buffer)
-{
-  int c = bchar(buffer, blength(buffer) - 1);
-  if (c != '\n' && c) {
-    bconchar(buffer, '\n');
-  }
-}
-
-// Convert a block list to HTML.  Returns 0 on success, and sets result.
-extern int blocks_to_html(block* b, bstring* result, bool tight)
-{
-  bstring contents = NULL;
-  bstring escaped, escaped2;
-  struct bstrList * info_words;
-  struct ListData * data;
-  bstring mbstart;
-  bstring html = blk2bstr("", 0);
-
-  while(b != NULL) {
-    switch(b->tag) {
-    case document:
-      check(blocks_to_html(b->children, &contents, false) == 0,
-            "error converting blocks to html");
-      bformata(html, "%s", contents->data);
-      bdestroy(contents);
-      break;
-    case paragraph:
-      check(inlines_to_html(b->inline_content, &contents) == 0,
-            "error converting inlines to html");
-      if (tight) {
-        bformata(html, "%s", contents->data);
-      } else {
-        cr(html);
-        bformata(html, "<p>%s</p>", contents->data);
-        cr(html);
-      }
-      bdestroy(contents);
-      break;
-    case block_quote:
-      check(blocks_to_html(b->children, &contents, false) == 0,
-            "error converting blocks to html");
-      cr(html);
-      bformata(html, "<blockquote>\n%s</blockquote>", contents->data);
-      cr(html);
-      bdestroy(contents);
-      break;
-    case list_item:
-      check(blocks_to_html(b->children, &contents, tight) == 0,
-            "error converting blocks to html");
-      brtrimws(contents);
-      cr(html);
-      bformata(html, "<li>%s</li>", contents->data);
-      cr(html);
-      bdestroy(contents);
-      break;
-    case list:
-      // make sure a list starts at the beginning of the line:
-      cr(html);
-      data = &(b->attributes.list_data);
-      check(blocks_to_html(b->children, &contents, data->tight) == 0,
-            "error converting blocks to html");
-      mbstart = bformat(" start=\"%d\"", data->start);
-      bformata(html, "<%s%s>\n%s</%s>",
-               data->list_type == bullet ? "ul" : "ol",
-               data->start == 1 ? "" : (char*) mbstart->data,
-               contents->data,
-               data->list_type == bullet ? "ul" : "ol");
-      cr(html);
-      bdestroy(contents);
-      bdestroy(mbstart);
-      break;
-    case atx_header:
-    case setext_header:
-      check(inlines_to_html(b->inline_content, &contents) == 0,
-            "error converting inlines to html");
-      cr(html);
-      bformata(html, "<h%d>%s</h%d>",
-               b->attributes.header_level,
-               contents->data,
-               b->attributes.header_level);
-      cr(html);
-      bdestroy(contents);
-      break;
-    case indented_code:
-      escaped = escape_html(b->string_content, false);
-      cr(html);
-      bformata(html, "<pre><code>%s</code></pre>", escaped->data);
-      cr(html);
-      bdestroy(escaped);
-      break;
-    case fenced_code:
-      escaped = escape_html(b->string_content, false);
-      cr(html);
-      bformata(html, "<pre><code");
-      if (blength(b->attributes.fenced_code_data.info) > 0) {
-        escaped2 = escape_html(b->attributes.fenced_code_data.info, true);
-        info_words = bsplit(escaped2, ' ');
-        bformata(html, " class=\"language-%s\"", info_words->entry[0]->data);
-        bdestroy(escaped2);
-        bstrListDestroy(info_words);
-      }
-      bformata(html, ">%s</code></pre>", escaped->data);
-      cr(html);
-      bdestroy(escaped);
-      break;
-    case html_block:
-      bformata(html, "%s", b->string_content->data);
-      break;
-    case hrule:
-      bformata(html, "<hr />");
-      cr(html);
-      break;
-    case reference_def:
-      break;
-    default:
-      log_warn("block type %d not implemented\n", b->tag);
-      break;
-    }
-    b = b->next;
-  }
-  *result = html;
-  return 0;
- error:
-  return -1;
-}
-
-// Convert an inline list to HTML.  Returns 0 on success, and sets result.
-extern int inlines_to_html(inl* ils, bstring* result)
-{
-  bstring contents = NULL;
-  bstring html = blk2bstr("", 0);
-  bstring mbtitle, escaped, escaped2;
-
-  while(ils != NULL) {
-    switch(ils->tag) {
-    case str:
-      escaped = escape_html(ils->content.literal, false);
-      bformata(html, "%s", escaped->data);
-      bdestroy(escaped);
-      break;
-    case linebreak:
-      bformata(html, "<br />\n");
-      break;
-    case softbreak:
-      bformata(html, "\n");
-      break;
-    case code:
-      escaped = escape_html(ils->content.literal, false);
-      bformata(html, "<code>%s</code>", escaped->data);
-      bdestroy(escaped);
-      break;
-    case raw_html:
-    case entity:
-      bformata(html, "%s", ils->content.literal->data);
-      break;
-    case link:
-      check(inlines_to_html(ils->content.inlines, &contents) == 0,
-            "error converting inlines to html");
-      if (blength(ils->content.linkable.title) > 0) {
-        escaped = escape_html(ils->content.linkable.title, true);
-        mbtitle = bformat(" title=\"%s\"", escaped->data);
-        bdestroy(escaped);
-      } else {
-        mbtitle = blk2bstr("",0);
-      }
-      escaped = escape_html(ils->content.linkable.url, true);
-      bformata(html, "<a href=\"%s\"%s>%s</a>",
-               escaped->data,
-               mbtitle->data,
-               contents->data);
-      bdestroy(escaped);
-      bdestroy(mbtitle);
-      bdestroy(contents);
-      break;
-    case image:
-      check(inlines_to_html(ils->content.inlines, &contents) == 0,
-            "error converting inlines to html");
-      escaped  = escape_html(ils->content.linkable.url, true);
-      escaped2 = escape_html(contents, false);
-      bdestroy(contents);
-      bformata(html, "<img src=\"%s\" alt=\"%s\"",
-               escaped->data, escaped2->data);
-      bdestroy(escaped);
-      bdestroy(escaped2);
-      if (blength(ils->content.linkable.title) > 0) {
-        escaped = escape_html(ils->content.linkable.title, true);
-        bformata(html, " title=\"%s\"", escaped->data);
-        bdestroy(escaped);
-      }
-      bformata(html, " />");
-      break;
-    case strong:
-      check(inlines_to_html(ils->content.inlines, &contents) == 0,
-            "error converting inlines to html");
-      bformata(html, "<strong>%s</strong>", contents->data);
-      bdestroy(contents);
-      break;
-    case emph:
-      check(inlines_to_html(ils->content.inlines, &contents) == 0,
-            "error converting inlines to html");
-      bformata(html, "<em>%s</em>", contents->data);
-      bdestroy(contents);
-      break;
-    }
-    ils = ils->next;
-  }
-  *result = html;
-  return 0;
- error:
-  return -1;
-}
diff --git a/src/inlines.c b/src/inlines.c
index 4ff45ad..82c7219 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -6,9 +6,7 @@
 
 #include "stmd.h"
 #include "uthash.h"
-#include "debug.h"
 #include "scanners.h"
-#include "utf8.h"
 
 typedef struct Subject {
   const gh_buf   *buffer;
@@ -119,7 +117,7 @@ inline static inl* make_linkable(int t, inl* label, chunk url, chunk title)
 	e->tag = t;
 	e->content.linkable.label = label;
 	e->content.linkable.url   = chunk_to_cstr(&url);
-	e->content.linkable.title = chunk_to_cstr(&title);
+	e->content.linkable.title = url.len ? chunk_to_cstr(&title) : NULL;
 	e->next = NULL;
 	return e;
 }
diff --git a/src/main.c b/src/main.c
index 9e0a3c8..e1abedc 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1,99 +1,77 @@
 #include <stdlib.h>
 #include <stdio.h>
-#include "bstrlib.h"
+#include <string.h>
 #include "stmd.h"
 #include "debug.h"
 
 void print_usage()
 {
-  printf("Usage:   stmd [FILE*]\n");
-  printf("Options: --help, -h    Print usage information\n");
-  printf("         --ast         Print AST instead of HTML\n");
-  printf("         --version     Print version\n");
+	printf("Usage:   stmd [FILE*]\n");
+	printf("Options: --help, -h    Print usage information\n");
+	printf("         --ast         Print AST instead of HTML\n");
+	printf("         --version     Print version\n");
 }
 
-int main(int argc, char *argv[]) {
-  int i;
-  bool ast = false;
-  int g = 0;
-  int numfps = 0;
-  int files[argc];
+static void print_document(block *document, bool ast)
+{
+	gh_buf html = GH_BUF_INIT;
+
+	if (ast) {
+		print_blocks(document, 0);
+	} else {
+		blocks_to_html(&html, document, false);
+		printf("%s", html.ptr);
+		gh_buf_free(&html);
+	}
+}
 
-  for (i=1; i < argc; i++) {
-    if (strcmp(argv[i], "--version") == 0) {
-      printf("stmd %s", VERSION);
-      printf(" - CommonMark converter (c) 2014 John MacFarlane\n");
-      exit(0);
-    } else if ((strcmp(argv[i], "--help") == 0) ||
-               (strcmp(argv[i], "-h") == 0)) {
-      print_usage();
-      exit(0);
-    } else if (strcmp(argv[i], "--ast") == 0) {
-      ast = true;
-    } else if (*argv[i] == '-') {
-      print_usage();
-      exit(1);
-    } else { // treat as file argument
-      files[g] = i;
-      g++;
-    }
-  }
+int main(int argc, char *argv[])
+{
+	int i, numfps = 0;
+	bool ast = false;
+	int files[argc];
+	block *document = NULL;
 
-  numfps = g;
-  bstring s = NULL;
-  bstring html;
-  g = 0;
-  block * cur = make_document();
-  int linenum = 1;
-  extern int errno;
-  FILE * fp = NULL;
+	for (i = 1; i < argc; i++) {
+		if (strcmp(argv[i], "--version") == 0) {
+			printf("stmd %s", VERSION);
+			printf(" - CommonMark converter (c) 2014 John MacFarlane\n");
+			exit(0);
+		} else if ((strcmp(argv[i], "--help") == 0) ||
+				(strcmp(argv[i], "-h") == 0)) {
+			print_usage();
+			exit(0);
+		} else if (strcmp(argv[i], "--ast") == 0) {
+			ast = true;
+		} else if (*argv[i] == '-') {
+			print_usage();
+			exit(1);
+		} else { // treat as file argument
+			files[numfps++] = i;
+		}
+	}
 
-  if (numfps == 0) {
-    // read from stdin
-    while ((s = bgets((bNgetc) fgetc, stdin, '\n'))) {
-      check(incorporate_line(s, linenum, &cur) == 0,
-          "error incorporating line %d", linenum);
-      bdestroy(s);
-      linenum++;
-    }
-  } else {
-    // iterate over input file pointers
-    for (g=0; g < numfps; g++) {
+	if (numfps == 0) {
+		document = stmd_parse_file(stdin);
+		print_document(document, ast);
+		free_blocks(document);
+	} else {
+		for (i = 0; i < numfps; i++) {
+			FILE *fp = fopen(argv[files[i]], "r");
 
-      fp = fopen(argv[files[g]], "r");
-      if (fp == NULL) {
-        fprintf(stderr, "Error opening file %s: %s\n",
-                argv[files[g]], strerror(errno));
-        exit(1);
-      }
+			if (fp == NULL) {
+				fprintf(stderr, "Error opening file %s: %s\n",
+						argv[files[i]], strerror(errno));
+				exit(1);
+			}
 
-      while ((s = bgets((bNgetc) fgetc, fp, '\n'))) {
-        check(incorporate_line(s, linenum, &cur) == 0,
-            "error incorporating line %d", linenum);
-        bdestroy(s);
-        linenum++;
-      }
-      fclose(fp);
-    }
-  }
+			document = stmd_parse_file(fp);
+			print_document(document, ast);
+			free_blocks(document);
+			fclose(fp);
+		}
+	}
 
-  while (cur != cur->top) {
-    finalize(cur, linenum);
-    cur = cur->parent;
-  }
-  check(cur == cur->top, "problems finalizing open containers");
-  finalize(cur, linenum);
-  process_inlines(cur, cur->attributes.refmap);
-  if (ast) {
-    print_blocks(cur, 0);
-  } else {
-    check(blocks_to_html(cur, &html, false) == 0, "could not format as HTML");
-    // printf("%s", html->data);
-    bdestroy(html);
-  }
-  free_blocks(cur);
-  return 0;
-error:
-  return -1;
+	return 0;
 }
 
diff --git a/src/print.c b/src/print.c
index a924870..3ebde16 100644
--- a/src/print.c
+++ b/src/print.c
@@ -1,168 +1,175 @@
 #include <stdlib.h>
 #include <stdio.h>
-#include "bstrlib.h"
+#include <string.h>
 #include "stmd.h"
 #include "debug.h"
 
-static bstring format_str(bstring s)
+static void print_str(const unsigned char *s, int len)
 {
-  int pos = 0;
-  int len = blength(s);
-  bstring result = bfromcstr("");
-  char c;
-  bformata(result, "\"");
-  while (pos < len) {
-    c = bchar(s, pos);
-    switch (c) {
-    case '\n':
-      bformata(result, "\\n");
-      break;
-    case '"':
-      bformata(result, "\\\"");
-      break;
-    case '\\':
-      bformata(result, "\\\\");
-      break;
-    default:
-      bformata(result, "%c", c);
-    }
-    pos++;
-  }
-  bformata(result, "\"");
-  return result;
+	int i;
+
+	if (len < 0)
+		len = strlen(s);
+
+	putchar('"');
+	for (i = 0; i < len; ++i) {
+		unsigned char c = s[i];
+
+		switch (c) {
+			case '\n':
+				printf("\\n");
+				break;
+			case '"':
+				printf("\\\"");
+				break;
+			case '\\':
+				printf("\\\\");
+				break;
+			default:
+				putchar((int)c);
+		}
+	}
+	putchar('"');
 }
 
 // Functions to pretty-print inline and block lists, for debugging.
 // Prettyprint an inline list, for debugging.
 extern void print_blocks(block* b, int indent)
 {
-  struct ListData * data;
-  while(b != NULL) {
-    // printf("%3d %3d %3d| ", b->start_line, b->start_column, b->end_line);
-    for (int i=0; i < indent; i++) {
-      putchar(' ');
-    }
-    switch(b->tag) {
-    case document:
-      printf("document\n");
-      print_blocks(b->children, indent + 2);
-      break;
-    case block_quote:
-      printf("block_quote\n");
-      print_blocks(b->children, indent + 2);
-      break;
-    case list_item:
-      data = &(b->attributes.list_data);
-      printf("list_item\n");
-      print_blocks(b->children, indent + 2);
-      break;
-    case list:
-      data = &(b->attributes.list_data);
-      if (data->list_type == ordered) {
-        printf("list (type=ordered tight=%s start=%d delim=%s)\n",
-               (data->tight ? "true" : "false"),
-               data->start,
-               (data->delimiter == parens ? "parens" : "period"));
-      } else {
-        printf("list (type=bullet tight=%s bullet_char=%c)\n",
-               (data->tight ? "true" : "false"),
-               data->bullet_char);
-      }
-      print_blocks(b->children, indent + 2);
-      break;
-    case atx_header:
-      printf("atx_header (level=%d)\n", b->attributes.header_level);
-      print_inlines(b->inline_content, indent + 2);
-      break;
-    case setext_header:
-      printf("setext_header (level=%d)\n", b->attributes.header_level);
-      print_inlines(b->inline_content, indent + 2);
-      break;
-    case paragraph:
-      printf("paragraph\n");
-      print_inlines(b->inline_content, indent + 2);
-      break;
-    case hrule:
-      printf("hrule\n");
-      break;
-    case indented_code:
-      printf("indented_code %s\n", format_str(b->string_content)->data);
-      break;
-    case fenced_code:
-      printf("fenced_code length=%d info=%s %s\n",
-             b->attributes.fenced_code_data.fence_length,
-             format_str(b->attributes.fenced_code_data.info)->data,
-             format_str(b->string_content)->data);
-      break;
-    case html_block:
-      printf("html_block %s\n", format_str(b->string_content)->data);
-      break;
-    case reference_def:
-      printf("reference_def\n");
-      break;
-    default:
-      log_warn("block type %d not implemented\n", b->tag);
-      break;
-    }
-    b = b->next;
-  }
+	struct ListData *data;
+
+	while(b != NULL) {
+		// printf("%3d %3d %3d| ", b->start_line, b->start_column, b->end_line);
+		for (int i=0; i < indent; i++) {
+			putchar(' ');
+		}
+
+		switch(b->tag) {
+		case document:
+			printf("document\n");
+			print_blocks(b->children, indent + 2);
+			break;
+		case block_quote:
+			printf("block_quote\n");
+			print_blocks(b->children, indent + 2);
+			break;
+		case list_item:
+			data = &(b->attributes.list_data);
+			printf("list_item\n");
+			print_blocks(b->children, indent + 2);
+			break;
+		case list:
+			data = &(b->attributes.list_data);
+			if (data->list_type == ordered) {
+				printf("list (type=ordered tight=%s start=%d delim=%s)\n",
+						(data->tight ? "true" : "false"),
+						data->start,
+						(data->delimiter == parens ? "parens" : "period"));
+			} else {
+				printf("list (type=bullet tight=%s bullet_char=%c)\n",
+						(data->tight ? "true" : "false"),
+						data->bullet_char);
+			}
+			print_blocks(b->children, indent + 2);
+			break;
+		case atx_header:
+			printf("atx_header (level=%d)\n", b->attributes.header_level);
+			print_inlines(b->inline_content, indent + 2);
+			break;
+		case setext_header:
+			printf("setext_header (level=%d)\n", b->attributes.header_level);
+			print_inlines(b->inline_content, indent + 2);
+			break;
+		case paragraph:
+			printf("paragraph\n");
+			print_inlines(b->inline_content, indent + 2);
+			break;
+		case hrule:
+			printf("hrule\n");
+			break;
+		case indented_code:
+			printf("indented_code ");
+			print_str(b->string_content.ptr, -1);
+			putchar('\n');
+			break;
+		case fenced_code:
+			printf("fenced_code length=%d info=",
+				b->attributes.fenced_code_data.fence_length);
+			print_str(b->attributes.fenced_code_data.info.ptr, -1);
+			putchar(' ');
+			print_str(b->string_content.ptr, -1);
+			putchar('\n');
+			break;
+		case html_block:
+			printf("html_block ");
+			print_str(b->string_content.ptr, -1);
+			putchar('\n');
+			break;
+		case reference_def:
+			printf("reference_def\n");
+			break;
+		default:
+			printf("# NOT IMPLEMENTED (%d)\n", b->tag);
+			break;
+		}
+		b = b->next;
+	}
 }
 
 // Prettyprint an inline list, for debugging.
 extern void print_inlines(inl* ils, int indent)
 {
-  while(ils != NULL) {
-    /*
-    // we add 11 extra spaces for the line/column info
-    for (int i=0; i < 11; i++) {
-      putchar(' ');
-    }
-    putchar('|');
-    putchar(' ');
-    */
-    for (int i=0; i < indent; i++) {
-      putchar(' ');
-    }
-    switch(ils->tag) {
-    case str:
-      printf("str %s\n", format_str(ils->content.literal)->data);
-      break;
-    case linebreak:
-      printf("linebreak\n");
-      break;
-    case softbreak:
-      printf("softbreak\n");
-      break;
-    case code:
-      printf("code %s\n", format_str(ils->content.literal)->data);
-      break;
-    case raw_html:
-      printf("html %s\n", format_str(ils->content.literal)->data);
-      break;
-    case entity:
-      printf("entity %s\n", format_str(ils->content.literal)->data);
-      break;
-    case link:
-      printf("link url=%s title=%s\n",
-             format_str(ils->content.linkable.url)->data,
-             format_str(ils->content.linkable.title)->data);
-      print_inlines(ils->content.linkable.label, indent + 2);
-      break;
-    case image:
-      printf("image url=%s title=%s\n",
-             format_str(ils->content.linkable.url)->data,
-             format_str(ils->content.linkable.title)->data);
-      print_inlines(ils->content.linkable.label, indent + 2);
-      break;
-    case strong:
-      printf("strong\n");
-      print_inlines(ils->content.linkable.label, indent + 2);
-      break;
-    case emph:
-      printf("emph\n");
-      print_inlines(ils->content.linkable.label, indent + 2);
-      break;
-    }
-    ils = ils->next;
-  }
+	while(ils != NULL) {
+		for (int i=0; i < indent; i++) {
+			putchar(' ');
+		}
+		switch(ils->tag) {
+		case str:
+			printf("str ");
+			print_str(ils->content.literal.data, ils->content.literal.len);
+			putchar('\n');
+			break;
+		case linebreak:
+			printf("linebreak\n");
+			break;
+		case softbreak:
+			printf("softbreak\n");
+			break;
+		case code:
+			printf("code ");
+			print_str(ils->content.literal.data, ils->content.literal.len);
+			putchar('\n');
+			break;
+		case raw_html:
+			printf("html ");
+			print_str(ils->content.literal.data, ils->content.literal.len);
+			putchar('\n');
+			break;
+		case entity:
+			printf("entity ");
+			print_str(ils->content.literal.data, ils->content.literal.len);
+			putchar('\n');
+			break;
+		case link:
+		case image:
+			printf("%s url=", ils->tag == link ? "link" : "image");
+			print_str(ils->content.linkable.url, -1);
+			if (ils->content.linkable.title) {
+				printf(" title=");
+				print_str(ils->content.linkable.title, -1);
+			}
+			putchar('\n');
+			print_inlines(ils->content.linkable.label, indent + 2);
+			break;
+		case strong:
+			printf("strong\n");
+			print_inlines(ils->content.linkable.label, indent + 2);
+			break;
+		case emph:
+			printf("emph\n");
+			print_inlines(ils->content.linkable.label, indent + 2);
+			break;
+		}
+		ils = ils->next;
+	}
 }
diff --git a/src/stmd.h b/src/stmd.h
index eb1b989..dc24235 100644
--- a/src/stmd.h
+++ b/src/stmd.h
@@ -105,19 +105,14 @@ extern block* add_child(block* parent,
                         int block_type, int start_line, int start_column);
 void free_blocks(block* e);
 
-block *stmd_parse_document(const char *buffer, size_t len);
-
-// FOR NOW:
-void process_inlines(block* cur, reference** refmap);
-void incorporate_line(gh_buf *ln, int line_number, block** curptr);
-void finalize(block* b, int line_number);
+extern block *stmd_parse_document(const unsigned char *buffer, size_t len);
+extern block *stmd_parse_file(FILE *f);
 
 void print_inlines(inl* ils, int indent);
 void print_blocks(block* blk, int indent);
 
-/* TODO */
-// int blocks_to_html(block* b, bstring* result, bool tight);
-// int inlines_to_html(inl* b, bstring* result);
+void blocks_to_html(gh_buf *html, block *b, bool tight);
+void inlines_to_html(gh_buf *html, inl *b);
 
 void utf8proc_case_fold(gh_buf *dest, const unsigned char *str, int len);
 
diff --git a/src/utf8.c b/src/utf8.c
index 1a5df9e..e3f8dd3 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -1,6 +1,8 @@
 #include <stdlib.h>
-#include "bstrlib.h"
-#include "debug.h"
+#include <stdint.h>
+#include <unistd.h>
+
+#include "stmd.h"
 
 static const int8_t utf8proc_utf8class[256] = {
 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
diff --git a/src/utf8.h b/src/utf8.h
deleted file mode 100644
index fe59a90..0000000
--- a/src/utf8.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#include <stdlib.h>
-#include "bstrlib.h"
-
-extern unsigned char * from_utf8(unsigned char * s, unsigned int *n);
-extern int to_utf8(unsigned int c, bstring dest);
-extern bstring case_fold(bstring source);