5 files changed, 169 insertions, 89 deletions
diff --git a/js/lib/blocks.js b/js/lib/blocks.js
index c6e5d75..bd00b1a 100644
--- a/js/lib/blocks.js
+++ b/js/lib/blocks.js
@@ -1,35 +1,66 @@
 var Node = require('./node');
 
 var C_GREATERTHAN = 62;
+var C_NEWLINE = 10;
 var C_SPACE = 32;
 var C_OPEN_BRACKET = 91;
 
 var InlineParser = require('./inlines');
+
 var unescapeString = new InlineParser().unescapeString;
 
+var BLOCKTAGNAME = '(?:article|header|aside|hgroup|iframe|blockquote|hr|body|li|map|button|object|canvas|ol|caption|output|col|p|colgroup|pre|dd|progress|div|section|dl|table|td|dt|tbody|embed|textarea|fieldset|tfoot|figcaption|th|figure|thead|footer|footer|tr|form|ul|h1|h2|h3|h4|h5|h6|video|script|style)';
+
+var HTMLBLOCKOPEN = "<(?:" + BLOCKTAGNAME + "[\\s/>]" + "|" +
+        "/" + BLOCKTAGNAME + "[\\s>]" + "|" + "[?!])";
+
+var reHtmlBlockOpen = new RegExp('^' + HTMLBLOCKOPEN, 'i');
+
+var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/;
+
+var reMaybeSpecial = /^[ #`~*+_=<>0-9-]/;
+
+var reNonSpace = /[^ \t\n]/;
+
+var reBulletListMarker = /^[*+-]( +|$)/;
+
+var reOrderedListMarker = /^(\d+)([.)])( +|$)/;
+
+var reATXHeaderMarker = /^#{1,6}(?: +|$)/;
+
+var reCodeFence = /^`{3,}(?!.*`)|^~{3,}(?!.*~)/;
+
+var reClosingCodeFence = /^(?:`{3,}|~{3,})(?= *$)/;
+
+var reSetextHeaderLine = /^(?:=+|-+) *$/;
+
+var reLineEnding = /\r\n|\n|\r/;
+
 // Returns true if string contains only space characters.
 var isBlank = function(s) {
     "use strict";
-    return /^\s*$/.test(s);
+    return !(reNonSpace.test(s));
 };
 
+var tabSpaces = ['    ', '   ', '  ', ' '];
+
 // Convert tabs to spaces on each line using a 4-space tab stop.
 var detabLine = function(text) {
     "use strict";
-    if (text.indexOf('\u0000') !== -1) {
-        // replace NUL for security
-        text = text.replace(/\0/g, '\uFFFD');
-    }
-    if (text.indexOf('\t') === -1) {
-        return text;
-    } else {
-        var lastStop = 0;
-        return text.replace(/\t/g, function(match, offset) {
-            var result = '    '.slice((offset - lastStop) % 4);
-            lastStop = offset + 1;
-            return result;
-        });
+
+    var start = 0;
+    var offset;
+    var lastStop = 0;
+
+    while ((offset = text.indexOf('\t', start)) !== -1) {
+        var numspaces = (offset - lastStop) % 4;
+        var spaces = tabSpaces[numspaces];
+        text = text.slice(0, offset) + spaces + text.slice(offset + 1);
+        lastStop = offset + numspaces;
+        start = lastStop;
     }
+
+    return text;
 };
 
 // Attempt to match a regex in string s at offset offset.
@@ -44,13 +75,15 @@ var matchAt = function(re, s, offset) {
     }
 };
 
-var BLOCKTAGNAME = '(?:article|header|aside|hgroup|iframe|blockquote|hr|body|li|map|button|object|canvas|ol|caption|output|col|p|colgroup|pre|dd|progress|div|section|dl|table|td|dt|tbody|embed|textarea|fieldset|tfoot|figcaption|th|figure|thead|footer|footer|tr|form|ul|h1|h2|h3|h4|h5|h6|video|script|style)';
-var HTMLBLOCKOPEN = "<(?:" + BLOCKTAGNAME + "[\\s/>]" + "|" +
-        "/" + BLOCKTAGNAME + "[\\s>]" + "|" + "[?!])";
-var reHtmlBlockOpen = new RegExp('^' + HTMLBLOCKOPEN, 'i');
-
-var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/;
-
+// destructively trip final blank lines in an array of strings
+var stripFinalBlankLines = function(lns) {
+    "use strict";
+    var i = lns.length - 1;
+    while (!reNonSpace.test(lns[i])) {
+        lns.pop();
+        i--;
+    }
+};
 
 // DOC PARSER
 
@@ -160,12 +193,12 @@ var parseListMarker = function(ln, offset) {
     if (rest.match(reHrule)) {
         return null;
     }
-    if ((match = rest.match(/^[*+-]( +|$)/))) {
+    if ((match = rest.match(reBulletListMarker))) {
         spaces_after_marker = match[1].length;
         data.type = 'Bullet';
         data.bullet_char = match[0][0];
 
-    } else if ((match = rest.match(/^(\d+)([.)])( +|$)/))) {
+    } else if ((match = rest.match(reOrderedListMarker))) {
         spaces_after_marker = match[3].length;
         data.type = 'Ordered';
         data.start = parseInt(match[1]);
@@ -214,6 +247,11 @@ var incorporateLine = function(ln, line_number) {
     var container = this.doc;
     var oldtip = this.tip;
 
+    // replace NUL characters for security
+    if (ln.indexOf('\u0000') !== -1) {
+        ln = ln.replace(/\0/g, '\uFFFD');
+    }
+
     // Convert tabs to spaces:
     ln = detabLine(ln);
 
@@ -226,7 +264,7 @@ var incorporateLine = function(ln, line_number) {
         }
         container = container.lastChild;
 
-        match = matchAt(/[^ ]/, ln, offset);
+        match = matchAt(reNonSpace, ln, offset);
         if (match === -1) {
             first_nonspace = ln.length;
             blank = true;
@@ -319,13 +357,11 @@ var incorporateLine = function(ln, line_number) {
     // want to close unmatched blocks.  So we store this closure for
     // use later, when we have more information.
     var closeUnmatchedBlocks = function(mythis) {
-        var already_done = false;
         // finalize any blocks not matched
-        while (!already_done && oldtip !== last_matched_container) {
+        while (oldtip !== last_matched_container) {
             mythis.finalize(oldtip, line_number - 1);
             oldtip = oldtip.parent;
         }
-        already_done = true;
     };
 
     // Check to see if we've hit 2nd blank line; if so break out of list:
@@ -339,9 +375,9 @@ var incorporateLine = function(ln, line_number) {
            container.t !== 'IndentedCode' &&
            container.t !== 'HtmlBlock' &&
            // this is a little performance optimization:
-           matchAt(/^[ #`~*+_=<>0-9-]/, ln, offset) !== -1) {
+           matchAt(reMaybeSpecial, ln, offset) !== -1) {
 
-        match = matchAt(/[^ ]/, ln, offset);
+        match = matchAt(reNonSpace, ln, offset);
         if (match === -1) {
             first_nonspace = ln.length;
             blank = true;
@@ -371,7 +407,7 @@ var incorporateLine = function(ln, line_number) {
             closeUnmatchedBlocks(this);
             container = this.addChild('BlockQuote', line_number, offset);
 
-        } else if ((match = ln.slice(first_nonspace).match(/^#{1,6}(?: +|$)/))) {
+        } else if ((match = ln.slice(first_nonspace).match(reATXHeaderMarker))) {
             // ATX header
             offset = first_nonspace + match[0].length;
             closeUnmatchedBlocks(this);
@@ -382,7 +418,7 @@ var incorporateLine = function(ln, line_number) {
                 [ln.slice(offset).replace(/^ *#+ *$/, '').replace(/ +#+ *$/, '')];
             break;
 
-        } else if ((match = ln.slice(first_nonspace).match(/^`{3,}(?!.*`)|^~{3,}(?!.*~)/))) {
+        } else if ((match = ln.slice(first_nonspace).match(reCodeFence))) {
             // fenced code block
             var fence_length = match[0].length;
             closeUnmatchedBlocks(this);
@@ -402,7 +438,7 @@ var incorporateLine = function(ln, line_number) {
 
         } else if (container.t === 'Paragraph' &&
                    container.strings.length === 1 &&
-                   ((match = ln.slice(first_nonspace).match(/^(?:=+|-+) *$/)))) {
+                   ((match = ln.slice(first_nonspace).match(reSetextHeaderLine)))) {
             // setext header line
             closeUnmatchedBlocks(this);
             container.t = 'Header'; // convert Paragraph to SetextHeader
@@ -447,7 +483,7 @@ var incorporateLine = function(ln, line_number) {
     // What remains at the offset is a text line.  Add the text to the
     // appropriate container.
 
-    match = matchAt(/[^ ]/, ln, offset);
+    match = matchAt(reNonSpace, ln, offset);
     if (match === -1) {
         first_nonspace = ln.length;
         blank = true;
@@ -500,7 +536,7 @@ var incorporateLine = function(ln, line_number) {
             // check for closing code fence:
             match = (indent <= 3 &&
                      ln.charAt(first_nonspace) === container.fence_char &&
-                     ln.slice(first_nonspace).match(/^(?:`{3,}|~{3,})(?= *$)/));
+                     ln.slice(first_nonspace).match(reClosingCodeFence));
             if (match && match[0].length >= container.fence_length) {
                 // don't add closing fence to container; instead, close it:
                 this.finalize(container, line_number);
@@ -569,7 +605,8 @@ var finalize = function(block, line_number) {
         break;
 
     case 'IndentedCode':
-        block.literal = block.strings.join('\n').replace(/(\n *)*$/, '\n');
+        stripFinalBlankLines(block.strings);
+        block.literal = block.strings.join('\n') + '\n';
         block.t = 'CodeBlock';
         break;
 
@@ -644,21 +681,31 @@ var parse = function(input) {
     this.doc = Document();
     this.tip = this.doc;
     this.refmap = {};
-    var lines = input.replace(/\n$/, '').split(/\r\n|\n|\r/);
+    if (this.options.time) { console.time("preparing input"); }
+    var lines = input.split(reLineEnding);
     var len = lines.length;
+    if (input.charCodeAt(input.length - 1) === C_NEWLINE) {
+        // ignore last blank line created by final newline
+        len -= 1;
+    }
+    if (this.options.time) { console.timeEnd("preparing input"); }
+    if (this.options.time) { console.time("block parsing"); }
     for (var i = 0; i < len; i++) {
         this.incorporateLine(lines[i], i + 1);
     }
     while (this.tip) {
         this.finalize(this.tip, len);
     }
+    if (this.options.time) { console.timeEnd("block parsing"); }
+    if (this.options.time) { console.time("inline parsing"); }
     this.processInlines(this.doc);
+    if (this.options.time) { console.timeEnd("inline parsing"); }
     return this.doc;
 };
 
 
 // The DocParser object.
-function DocParser(){
+function DocParser(options){
     "use strict";
     return {
         doc: Document(),
@@ -672,7 +719,8 @@ function DocParser(){
         incorporateLine: incorporateLine,
         finalize: finalize,
         processInlines: processInlines,
-        parse: parse
+        parse: parse,
+        options: options || {}
     };
 }
 
diff --git a/js/lib/html.js b/js/lib/html.js
index 26c677b..847ed98 100644
--- a/js/lib/html.js
+++ b/js/lib/html.js
@@ -19,31 +19,38 @@ var tag = function(name, attrs, selfclosing) {
     return result;
 };
 
-var renderNodes = function(block, options) {
+var reHtmlTag = /\<[^>]*\>/;
+
+var renderNodes = function(block) {
 
     var attrs;
     var info_words;
     var tagname;
     var walker = block.walker();
     var event, node, entering;
-    var buffer = [];
+    var buffer = "";
+    var lastOut = "\n";
     var disableTags = 0;
     var grandparent;
     var out = function(s) {
         if (disableTags > 0) {
-            buffer.push(s.replace(/\<[^>]*\>/g, ''));
+            buffer += s.replace(reHtmlTag, '');
         } else {
-            buffer.push(s);
+            buffer += s;
         }
+        lastOut = s;
     };
     var esc = this.escape;
     var cr = function() {
-        if (buffer.length > 0 && buffer[buffer.length - 1] !== '\n') {
-            out('\n');
+        if (lastOut !== '\n') {
+            buffer += '\n';
+            lastOut = '\n';
         }
     };
 
-    options = options || {};
+    var options = this.options;
+
+    if (options.time) { console.time("rendering"); }
 
     while ((event = walker.next())) {
         entering = event.entering;
@@ -81,10 +88,6 @@ var renderNodes = function(block, options) {
             out(tag(entering ? 'strong' : '/strong'));
             break;
 
-        case 'Emph':
-            out(tag(entering ? 'strong' : '/strong'));
-            break;
-
         case 'Html':
             out(node.literal);
             break;
@@ -198,7 +201,7 @@ var renderNodes = function(block, options) {
             }
             cr();
             out(tag('pre') + tag('code', attrs));
-            out(this.escape(node.literal));
+            out(esc(node.literal));
             out(tag('/code') + tag('/pre'));
             cr();
             break;
@@ -220,14 +223,15 @@ var renderNodes = function(block, options) {
             break;
 
         default:
-            console.log("Unknown node type " + node.t);
+            throw("Unknown node type " + node.t);
         }
 
     }
-    return buffer.join('');
+    if (options.time) { console.timeEnd("rendering"); }
+    return buffer;
 };
 
-var sub = function(s) {
+var replaceUnsafeChar = function(s) {
     switch (s) {
     case '&':
         return '&amp;';
@@ -242,23 +246,27 @@ var sub = function(s) {
     }
 };
 
+var reNeedsEscaping = /[&<>"]/;
 
 // The HtmlRenderer object.
-function HtmlRenderer(){
+function HtmlRenderer(options){
     return {
         // default options:
-        blocksep: '\n',  // space between blocks
-        innersep: '\n',  // space between block container tag and contents
         softbreak: '\n', // by default, soft breaks are rendered as newlines in HTML
         // set to "<br />" to make them hard breaks
         // set to " " if you want to ignore line wrapping in source
         escape: function(s, preserve_entities) {
-            if (preserve_entities) {
-                return s.replace(/[&](?:[#](x[a-f0-9]{1,8}|[0-9]{1,8});|[a-z][a-z0-9]{1,31};)|[&<>"]/gi, sub);
+            if (reNeedsEscaping.test(s)) {
+                if (preserve_entities) {
+                    return s.replace(/[&](?:[#](x[a-f0-9]{1,8}|[0-9]{1,8});|[a-z][a-z0-9]{1,31};)|[&<>"]/gi, replaceUnsafeChar);
+                } else {
+                    return s.replace(/[&<>"]/g, replaceUnsafeChar);
+                }
             } else {
-                return s.replace(/[&<>"]/g, sub);
+                return s;
             }
         },
+        options: options || {},
         render: renderNodes
     };
 }
diff --git a/js/lib/index.js b/js/lib/index.js
index d0532c6..22a2184 100755
--- a/js/lib/index.js
+++ b/js/lib/index.js
@@ -13,11 +13,15 @@
 
 var util = require('util');
 
-var renderAST = function(tree) {
-    return util.inspect(tree.toAST(), {depth: 20}) + '\n';
-};
-
 module.exports.Node = require('./node');
 module.exports.DocParser = require('./blocks');
 module.exports.HtmlRenderer = require('./html');
-module.exports.ASTRenderer = renderAST;
+module.exports.ASTRenderer = function(options) {
+    return {
+        render: function(tree) {
+            return util.inspect(tree.toAST(), null, 20,
+                                this.options.colors) + '\n';
+        },
+        options: options || {}
+    };
+}
diff --git a/js/lib/inlines.js b/js/lib/inlines.js
index 72c4448..4d49861 100644
--- a/js/lib/inlines.js
+++ b/js/lib/inlines.js
@@ -65,6 +65,8 @@ var reEntityHere = new RegExp('^' + ENTITY, 'i');
 
 var reEntityOrEscapedChar = new RegExp('\\\\' + ESCAPABLE + '|' + ENTITY, 'gi');
 
+var reBackslashOrAmp = /[\\&]/;
+
 var reTicks = new RegExp('`+');
 
 var reTicksHere = new RegExp('^`+');
@@ -75,6 +77,18 @@ var reAutolink = /^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data
 
 var reSpnl = /^ *(?:\n *)?/;
 
+var reWhitespaceChar = /^\s/;
+
+var reWhitespace = /\s+/g;
+
+var reFinalSpace = / *$/;
+
+var reInitialSpace = /^ */;
+
+var reAsciiAlnum = /[a-z0-9]/i;
+
+var reLinkLabel = /^\[(?:[^\\\[\]]|\\[\[\]]){0,1000}\]/;
+
 // Matches a string of non-special characters.
 var reMain = /^[^\n`\[\]\\!<&*_]+/m;
 
@@ -90,7 +104,11 @@ var unescapeChar = function(s) {
 // Replace entities and backslash escapes with literal characters.
 var unescapeString = function(s) {
     "use strict";
-    return s.replace(reEntityOrEscapedChar, unescapeChar);
+    if (reBackslashOrAmp.test(s)) {
+        return s.replace(reEntityOrEscapedChar, unescapeChar);
+    } else {
+        return s;
+    }
 };
 
 // Normalize reference label: collapse internal whitespace
@@ -167,8 +185,7 @@ var parseBackticks = function(block) {
             node = new Node('Code');
             node.literal = this.subject.slice(afterOpenTicks,
                                         this.pos - ticks.length)
-                          .replace(/[ \n]+/g, ' ')
-                          .trim();
+                          .trim().replace(reWhitespace, ' ');
             block.appendChild(node);
             return true;
         }
@@ -270,17 +287,17 @@ var scanDelims = function(cc) {
         char_after = fromCodePoint(cc_after);
     }
 
-    var can_open = numdelims > 0 && !(/\s/.test(char_after)) &&
+    var can_open = numdelims > 0 && !(reWhitespaceChar.test(char_after)) &&
             !(rePunctuation.test(char_after) &&
              !(/\s/.test(char_before)) &&
              !(rePunctuation.test(char_before)));
-    var can_close = numdelims > 0 && !(/\s/.test(char_before)) &&
+    var can_close = numdelims > 0 && !(reWhitespaceChar.test(char_before)) &&
             !(rePunctuation.test(char_before) &&
-              !(/\s/.test(char_after)) &&
+              !(reWhitespaceChar.test(char_after)) &&
               !(rePunctuation.test(char_after)));
     if (cc === C_UNDERSCORE) {
-        can_open = can_open && !((/[a-z0-9]/i).test(char_before));
-        can_close = can_close && !((/[a-z0-9]/i).test(char_after));
+        can_open = can_open && !((reAsciiAlnum).test(char_before));
+        can_close = can_close && !((reAsciiAlnum).test(char_after));
     }
     this.pos = startpos;
     return { numdelims: numdelims,
@@ -463,7 +480,7 @@ var parseLinkDestination = function() {
 // Attempt to parse a link label, returning number of characters parsed.
 var parseLinkLabel = function() {
     "use strict";
-    var m = this.match(/^\[(?:[^\\\[\]]|\\[\[\]]){0,1000}\]/);
+    var m = this.match(reLinkLabel);
     return m === null ? 0 : m.length;
 };
 
@@ -581,10 +598,11 @@ var parseCloseBracket = function(block) {
             ((dest = this.parseLinkDestination()) !== null) &&
             this.spnl() &&
             // make sure there's a space before the title:
-            (/^\s/.test(this.subject.charAt(this.pos - 1)) &&
+            (reWhitespaceChar.test(this.subject.charAt(this.pos - 1)) &&
              (title = this.parseLinkTitle() || '') || true) &&
             this.spnl() &&
-            this.match(/^\)/)) {
+            this.subject.charAt(this.pos) === ')') {
+            this.pos += 1;
             matched = true;
         }
     } else {
@@ -691,15 +709,15 @@ var parseNewline = function(block) {
     // check previous node for trailing spaces
     var lastc = block.lastChild;
     if (lastc && lastc.t === 'Text') {
-        var sps = / *$/.exec(lastc.literal)[0].length;
+        var sps = reFinalSpace.exec(lastc.literal)[0].length;
         if (sps > 0) {
-            lastc.literal = lastc.literal.replace(/ *$/, '');
+            lastc.literal = lastc.literal.replace(reFinalSpace, '');
         }
         block.appendChild(new Node(sps >= 2 ? 'Hardbreak' : 'Softbreak'));
     } else {
         block.appendChild(new Node('Softbreak'));
     }
-    this.match(/^ */); // gobble leading spaces in next line
+    this.match(reInitialSpace); // gobble leading spaces in next line
     return true;
 };
 
diff --git a/js/lib/node.js b/js/lib/node.js
index 84fb122..9dc7c3f 100644
--- a/js/lib/node.js
+++ b/js/lib/node.js
@@ -14,18 +14,12 @@ function isContainer(node) {
             t === 'Image');
 }
 
-function NodeWalker(root) {
-    this.current = root;
-    this.root = root;
-    this.entering = true;
-}
-
-NodeWalker.prototype.resumeAt = function(node, entering) {
+var resumeAt = function(node, entering) {
     this.current = node;
     this.entering = (entering === true);
 };
 
-NodeWalker.prototype.next = function(){
+var next = function(){
     var cur = this.current;
     var entering = this.entering;
 
@@ -56,7 +50,15 @@ NodeWalker.prototype.next = function(){
     return {entering: entering, node: cur};
 };
 
-function Node(nodeType, sourcepos) {
+var NodeWalker = function(root) {
+    return { current: root,
+             root: root,
+             entering: true,
+             next: next,
+             resumeAt: resumeAt };
+};
+
+var Node = function(nodeType, sourcepos) {
     this.t = nodeType;
     this.parent = null;
     this.firstChild = null;
@@ -77,7 +79,7 @@ function Node(nodeType, sourcepos) {
     this.fence_length = undefined;
     this.fence_offset = undefined;
     this.level = undefined;
-}
+};
 
 Node.prototype.isContainer = function() {
     return isContainer(this);
@@ -154,7 +156,7 @@ Node.prototype.insertBefore = function(sibling) {
 };
 
 Node.prototype.walker = function() {
-    var walker = new NodeWalker(this);
+    var walker = NodeWalker(this);
     return walker;
 };