summaryrefslogtreecommitdiff
path: root/js/lib
diff options
context:
space:
mode:
Diffstat (limited to 'js/lib')
-rw-r--r--js/lib/blocks.js124
-rw-r--r--js/lib/html.js50
-rwxr-xr-xjs/lib/index.js14
-rw-r--r--js/lib/inlines.js46
-rw-r--r--js/lib/node.js24
5 files changed, 169 insertions, 89 deletions
diff --git a/js/lib/blocks.js b/js/lib/blocks.js
index c6e5d75..bd00b1a 100644
--- a/js/lib/blocks.js
+++ b/js/lib/blocks.js
@@ -1,35 +1,66 @@
var Node = require('./node');
var C_GREATERTHAN = 62;
+var C_NEWLINE = 10;
var C_SPACE = 32;
var C_OPEN_BRACKET = 91;
var InlineParser = require('./inlines');
+
var unescapeString = new InlineParser().unescapeString;
+var BLOCKTAGNAME = '(?:article|header|aside|hgroup|iframe|blockquote|hr|body|li|map|button|object|canvas|ol|caption|output|col|p|colgroup|pre|dd|progress|div|section|dl|table|td|dt|tbody|embed|textarea|fieldset|tfoot|figcaption|th|figure|thead|footer|footer|tr|form|ul|h1|h2|h3|h4|h5|h6|video|script|style)';
+
+var HTMLBLOCKOPEN = "<(?:" + BLOCKTAGNAME + "[\\s/>]" + "|" +
+ "/" + BLOCKTAGNAME + "[\\s>]" + "|" + "[?!])";
+
+var reHtmlBlockOpen = new RegExp('^' + HTMLBLOCKOPEN, 'i');
+
+var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/;
+
+var reMaybeSpecial = /^[ #`~*+_=<>0-9-]/;
+
+var reNonSpace = /[^ \t\n]/;
+
+var reBulletListMarker = /^[*+-]( +|$)/;
+
+var reOrderedListMarker = /^(\d+)([.)])( +|$)/;
+
+var reATXHeaderMarker = /^#{1,6}(?: +|$)/;
+
+var reCodeFence = /^`{3,}(?!.*`)|^~{3,}(?!.*~)/;
+
+var reClosingCodeFence = /^(?:`{3,}|~{3,})(?= *$)/;
+
+var reSetextHeaderLine = /^(?:=+|-+) *$/;
+
+var reLineEnding = /\r\n|\n|\r/;
+
// Returns true if string contains only space characters.
var isBlank = function(s) {
"use strict";
- return /^\s*$/.test(s);
+ return !(reNonSpace.test(s));
};
+var tabSpaces = [' ', ' ', ' ', ' '];
+
// Convert tabs to spaces on each line using a 4-space tab stop.
var detabLine = function(text) {
"use strict";
- if (text.indexOf('\u0000') !== -1) {
- // replace NUL for security
- text = text.replace(/\0/g, '\uFFFD');
- }
- if (text.indexOf('\t') === -1) {
- return text;
- } else {
- var lastStop = 0;
- return text.replace(/\t/g, function(match, offset) {
- var result = ' '.slice((offset - lastStop) % 4);
- lastStop = offset + 1;
- return result;
- });
+
+ var start = 0;
+ var offset;
+ var lastStop = 0;
+
+ while ((offset = text.indexOf('\t', start)) !== -1) {
+ var numspaces = (offset - lastStop) % 4;
+ var spaces = tabSpaces[numspaces];
+ text = text.slice(0, offset) + spaces + text.slice(offset + 1);
+ lastStop = offset + numspaces;
+ start = lastStop;
}
+
+ return text;
};
// Attempt to match a regex in string s at offset offset.
@@ -44,13 +75,15 @@ var matchAt = function(re, s, offset) {
}
};
-var BLOCKTAGNAME = '(?:article|header|aside|hgroup|iframe|blockquote|hr|body|li|map|button|object|canvas|ol|caption|output|col|p|colgroup|pre|dd|progress|div|section|dl|table|td|dt|tbody|embed|textarea|fieldset|tfoot|figcaption|th|figure|thead|footer|footer|tr|form|ul|h1|h2|h3|h4|h5|h6|video|script|style)';
-var HTMLBLOCKOPEN = "<(?:" + BLOCKTAGNAME + "[\\s/>]" + "|" +
- "/" + BLOCKTAGNAME + "[\\s>]" + "|" + "[?!])";
-var reHtmlBlockOpen = new RegExp('^' + HTMLBLOCKOPEN, 'i');
-
-var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/;
-
+// destructively trip final blank lines in an array of strings
+var stripFinalBlankLines = function(lns) {
+ "use strict";
+ var i = lns.length - 1;
+ while (!reNonSpace.test(lns[i])) {
+ lns.pop();
+ i--;
+ }
+};
// DOC PARSER
@@ -160,12 +193,12 @@ var parseListMarker = function(ln, offset) {
if (rest.match(reHrule)) {
return null;
}
- if ((match = rest.match(/^[*+-]( +|$)/))) {
+ if ((match = rest.match(reBulletListMarker))) {
spaces_after_marker = match[1].length;
data.type = 'Bullet';
data.bullet_char = match[0][0];
- } else if ((match = rest.match(/^(\d+)([.)])( +|$)/))) {
+ } else if ((match = rest.match(reOrderedListMarker))) {
spaces_after_marker = match[3].length;
data.type = 'Ordered';
data.start = parseInt(match[1]);
@@ -214,6 +247,11 @@ var incorporateLine = function(ln, line_number) {
var container = this.doc;
var oldtip = this.tip;
+ // replace NUL characters for security
+ if (ln.indexOf('\u0000') !== -1) {
+ ln = ln.replace(/\0/g, '\uFFFD');
+ }
+
// Convert tabs to spaces:
ln = detabLine(ln);
@@ -226,7 +264,7 @@ var incorporateLine = function(ln, line_number) {
}
container = container.lastChild;
- match = matchAt(/[^ ]/, ln, offset);
+ match = matchAt(reNonSpace, ln, offset);
if (match === -1) {
first_nonspace = ln.length;
blank = true;
@@ -319,13 +357,11 @@ var incorporateLine = function(ln, line_number) {
// want to close unmatched blocks. So we store this closure for
// use later, when we have more information.
var closeUnmatchedBlocks = function(mythis) {
- var already_done = false;
// finalize any blocks not matched
- while (!already_done && oldtip !== last_matched_container) {
+ while (oldtip !== last_matched_container) {
mythis.finalize(oldtip, line_number - 1);
oldtip = oldtip.parent;
}
- already_done = true;
};
// Check to see if we've hit 2nd blank line; if so break out of list:
@@ -339,9 +375,9 @@ var incorporateLine = function(ln, line_number) {
container.t !== 'IndentedCode' &&
container.t !== 'HtmlBlock' &&
// this is a little performance optimization:
- matchAt(/^[ #`~*+_=<>0-9-]/, ln, offset) !== -1) {
+ matchAt(reMaybeSpecial, ln, offset) !== -1) {
- match = matchAt(/[^ ]/, ln, offset);
+ match = matchAt(reNonSpace, ln, offset);
if (match === -1) {
first_nonspace = ln.length;
blank = true;
@@ -371,7 +407,7 @@ var incorporateLine = function(ln, line_number) {
closeUnmatchedBlocks(this);
container = this.addChild('BlockQuote', line_number, offset);
- } else if ((match = ln.slice(first_nonspace).match(/^#{1,6}(?: +|$)/))) {
+ } else if ((match = ln.slice(first_nonspace).match(reATXHeaderMarker))) {
// ATX header
offset = first_nonspace + match[0].length;
closeUnmatchedBlocks(this);
@@ -382,7 +418,7 @@ var incorporateLine = function(ln, line_number) {
[ln.slice(offset).replace(/^ *#+ *$/, '').replace(/ +#+ *$/, '')];
break;
- } else if ((match = ln.slice(first_nonspace).match(/^`{3,}(?!.*`)|^~{3,}(?!.*~)/))) {
+ } else if ((match = ln.slice(first_nonspace).match(reCodeFence))) {
// fenced code block
var fence_length = match[0].length;
closeUnmatchedBlocks(this);
@@ -402,7 +438,7 @@ var incorporateLine = function(ln, line_number) {
} else if (container.t === 'Paragraph' &&
container.strings.length === 1 &&
- ((match = ln.slice(first_nonspace).match(/^(?:=+|-+) *$/)))) {
+ ((match = ln.slice(first_nonspace).match(reSetextHeaderLine)))) {
// setext header line
closeUnmatchedBlocks(this);
container.t = 'Header'; // convert Paragraph to SetextHeader
@@ -447,7 +483,7 @@ var incorporateLine = function(ln, line_number) {
// What remains at the offset is a text line. Add the text to the
// appropriate container.
- match = matchAt(/[^ ]/, ln, offset);
+ match = matchAt(reNonSpace, ln, offset);
if (match === -1) {
first_nonspace = ln.length;
blank = true;
@@ -500,7 +536,7 @@ var incorporateLine = function(ln, line_number) {
// check for closing code fence:
match = (indent <= 3 &&
ln.charAt(first_nonspace) === container.fence_char &&
- ln.slice(first_nonspace).match(/^(?:`{3,}|~{3,})(?= *$)/));
+ ln.slice(first_nonspace).match(reClosingCodeFence));
if (match && match[0].length >= container.fence_length) {
// don't add closing fence to container; instead, close it:
this.finalize(container, line_number);
@@ -569,7 +605,8 @@ var finalize = function(block, line_number) {
break;
case 'IndentedCode':
- block.literal = block.strings.join('\n').replace(/(\n *)*$/, '\n');
+ stripFinalBlankLines(block.strings);
+ block.literal = block.strings.join('\n') + '\n';
block.t = 'CodeBlock';
break;
@@ -644,21 +681,31 @@ var parse = function(input) {
this.doc = Document();
this.tip = this.doc;
this.refmap = {};
- var lines = input.replace(/\n$/, '').split(/\r\n|\n|\r/);
+ if (this.options.time) { console.time("preparing input"); }
+ var lines = input.split(reLineEnding);
var len = lines.length;
+ if (input.charCodeAt(input.length - 1) === C_NEWLINE) {
+ // ignore last blank line created by final newline
+ len -= 1;
+ }
+ if (this.options.time) { console.timeEnd("preparing input"); }
+ if (this.options.time) { console.time("block parsing"); }
for (var i = 0; i < len; i++) {
this.incorporateLine(lines[i], i + 1);
}
while (this.tip) {
this.finalize(this.tip, len);
}
+ if (this.options.time) { console.timeEnd("block parsing"); }
+ if (this.options.time) { console.time("inline parsing"); }
this.processInlines(this.doc);
+ if (this.options.time) { console.timeEnd("inline parsing"); }
return this.doc;
};
// The DocParser object.
-function DocParser(){
+function DocParser(options){
"use strict";
return {
doc: Document(),
@@ -672,7 +719,8 @@ function DocParser(){
incorporateLine: incorporateLine,
finalize: finalize,
processInlines: processInlines,
- parse: parse
+ parse: parse,
+ options: options || {}
};
}
diff --git a/js/lib/html.js b/js/lib/html.js
index 26c677b..847ed98 100644
--- a/js/lib/html.js
+++ b/js/lib/html.js
@@ -19,31 +19,38 @@ var tag = function(name, attrs, selfclosing) {
return result;
};
-var renderNodes = function(block, options) {
+var reHtmlTag = /\<[^>]*\>/;
+
+var renderNodes = function(block) {
var attrs;
var info_words;
var tagname;
var walker = block.walker();
var event, node, entering;
- var buffer = [];
+ var buffer = "";
+ var lastOut = "\n";
var disableTags = 0;
var grandparent;
var out = function(s) {
if (disableTags > 0) {
- buffer.push(s.replace(/\<[^>]*\>/g, ''));
+ buffer += s.replace(reHtmlTag, '');
} else {
- buffer.push(s);
+ buffer += s;
}
+ lastOut = s;
};
var esc = this.escape;
var cr = function() {
- if (buffer.length > 0 && buffer[buffer.length - 1] !== '\n') {
- out('\n');
+ if (lastOut !== '\n') {
+ buffer += '\n';
+ lastOut = '\n';
}
};
- options = options || {};
+ var options = this.options;
+
+ if (options.time) { console.time("rendering"); }
while ((event = walker.next())) {
entering = event.entering;
@@ -81,10 +88,6 @@ var renderNodes = function(block, options) {
out(tag(entering ? 'strong' : '/strong'));
break;
- case 'Emph':
- out(tag(entering ? 'strong' : '/strong'));
- break;
-
case 'Html':
out(node.literal);
break;
@@ -198,7 +201,7 @@ var renderNodes = function(block, options) {
}
cr();
out(tag('pre') + tag('code', attrs));
- out(this.escape(node.literal));
+ out(esc(node.literal));
out(tag('/code') + tag('/pre'));
cr();
break;
@@ -220,14 +223,15 @@ var renderNodes = function(block, options) {
break;
default:
- console.log("Unknown node type " + node.t);
+ throw("Unknown node type " + node.t);
}
}
- return buffer.join('');
+ if (options.time) { console.timeEnd("rendering"); }
+ return buffer;
};
-var sub = function(s) {
+var replaceUnsafeChar = function(s) {
switch (s) {
case '&':
return '&amp;';
@@ -242,23 +246,27 @@ var sub = function(s) {
}
};
+var reNeedsEscaping = /[&<>"]/;
// The HtmlRenderer object.
-function HtmlRenderer(){
+function HtmlRenderer(options){
return {
// default options:
- blocksep: '\n', // space between blocks
- innersep: '\n', // space between block container tag and contents
softbreak: '\n', // by default, soft breaks are rendered as newlines in HTML
// set to "<br />" to make them hard breaks
// set to " " if you want to ignore line wrapping in source
escape: function(s, preserve_entities) {
- if (preserve_entities) {
- return s.replace(/[&](?:[#](x[a-f0-9]{1,8}|[0-9]{1,8});|[a-z][a-z0-9]{1,31};)|[&<>"]/gi, sub);
+ if (reNeedsEscaping.test(s)) {
+ if (preserve_entities) {
+ return s.replace(/[&](?:[#](x[a-f0-9]{1,8}|[0-9]{1,8});|[a-z][a-z0-9]{1,31};)|[&<>"]/gi, replaceUnsafeChar);
+ } else {
+ return s.replace(/[&<>"]/g, replaceUnsafeChar);
+ }
} else {
- return s.replace(/[&<>"]/g, sub);
+ return s;
}
},
+ options: options || {},
render: renderNodes
};
}
diff --git a/js/lib/index.js b/js/lib/index.js
index d0532c6..22a2184 100755
--- a/js/lib/index.js
+++ b/js/lib/index.js
@@ -13,11 +13,15 @@
var util = require('util');
-var renderAST = function(tree) {
- return util.inspect(tree.toAST(), {depth: 20}) + '\n';
-};
-
module.exports.Node = require('./node');
module.exports.DocParser = require('./blocks');
module.exports.HtmlRenderer = require('./html');
-module.exports.ASTRenderer = renderAST;
+module.exports.ASTRenderer = function(options) {
+ return {
+ render: function(tree) {
+ return util.inspect(tree.toAST(), null, 20,
+ this.options.colors) + '\n';
+ },
+ options: options || {}
+ };
+}
diff --git a/js/lib/inlines.js b/js/lib/inlines.js
index 72c4448..4d49861 100644
--- a/js/lib/inlines.js
+++ b/js/lib/inlines.js
@@ -65,6 +65,8 @@ var reEntityHere = new RegExp('^' + ENTITY, 'i');
var reEntityOrEscapedChar = new RegExp('\\\\' + ESCAPABLE + '|' + ENTITY, 'gi');
+var reBackslashOrAmp = /[\\&]/;
+
var reTicks = new RegExp('`+');
var reTicksHere = new RegExp('^`+');
@@ -75,6 +77,18 @@ var reAutolink = /^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data
var reSpnl = /^ *(?:\n *)?/;
+var reWhitespaceChar = /^\s/;
+
+var reWhitespace = /\s+/g;
+
+var reFinalSpace = / *$/;
+
+var reInitialSpace = /^ */;
+
+var reAsciiAlnum = /[a-z0-9]/i;
+
+var reLinkLabel = /^\[(?:[^\\\[\]]|\\[\[\]]){0,1000}\]/;
+
// Matches a string of non-special characters.
var reMain = /^[^\n`\[\]\\!<&*_]+/m;
@@ -90,7 +104,11 @@ var unescapeChar = function(s) {
// Replace entities and backslash escapes with literal characters.
var unescapeString = function(s) {
"use strict";
- return s.replace(reEntityOrEscapedChar, unescapeChar);
+ if (reBackslashOrAmp.test(s)) {
+ return s.replace(reEntityOrEscapedChar, unescapeChar);
+ } else {
+ return s;
+ }
};
// Normalize reference label: collapse internal whitespace
@@ -167,8 +185,7 @@ var parseBackticks = function(block) {
node = new Node('Code');
node.literal = this.subject.slice(afterOpenTicks,
this.pos - ticks.length)
- .replace(/[ \n]+/g, ' ')
- .trim();
+ .trim().replace(reWhitespace, ' ');
block.appendChild(node);
return true;
}
@@ -270,17 +287,17 @@ var scanDelims = function(cc) {
char_after = fromCodePoint(cc_after);
}
- var can_open = numdelims > 0 && !(/\s/.test(char_after)) &&
+ var can_open = numdelims > 0 && !(reWhitespaceChar.test(char_after)) &&
!(rePunctuation.test(char_after) &&
!(/\s/.test(char_before)) &&
!(rePunctuation.test(char_before)));
- var can_close = numdelims > 0 && !(/\s/.test(char_before)) &&
+ var can_close = numdelims > 0 && !(reWhitespaceChar.test(char_before)) &&
!(rePunctuation.test(char_before) &&
- !(/\s/.test(char_after)) &&
+ !(reWhitespaceChar.test(char_after)) &&
!(rePunctuation.test(char_after)));
if (cc === C_UNDERSCORE) {
- can_open = can_open && !((/[a-z0-9]/i).test(char_before));
- can_close = can_close && !((/[a-z0-9]/i).test(char_after));
+ can_open = can_open && !((reAsciiAlnum).test(char_before));
+ can_close = can_close && !((reAsciiAlnum).test(char_after));
}
this.pos = startpos;
return { numdelims: numdelims,
@@ -463,7 +480,7 @@ var parseLinkDestination = function() {
// Attempt to parse a link label, returning number of characters parsed.
var parseLinkLabel = function() {
"use strict";
- var m = this.match(/^\[(?:[^\\\[\]]|\\[\[\]]){0,1000}\]/);
+ var m = this.match(reLinkLabel);
return m === null ? 0 : m.length;
};
@@ -581,10 +598,11 @@ var parseCloseBracket = function(block) {
((dest = this.parseLinkDestination()) !== null) &&
this.spnl() &&
// make sure there's a space before the title:
- (/^\s/.test(this.subject.charAt(this.pos - 1)) &&
+ (reWhitespaceChar.test(this.subject.charAt(this.pos - 1)) &&
(title = this.parseLinkTitle() || '') || true) &&
this.spnl() &&
- this.match(/^\)/)) {
+ this.subject.charAt(this.pos) === ')') {
+ this.pos += 1;
matched = true;
}
} else {
@@ -691,15 +709,15 @@ var parseNewline = function(block) {
// check previous node for trailing spaces
var lastc = block.lastChild;
if (lastc && lastc.t === 'Text') {
- var sps = / *$/.exec(lastc.literal)[0].length;
+ var sps = reFinalSpace.exec(lastc.literal)[0].length;
if (sps > 0) {
- lastc.literal = lastc.literal.replace(/ *$/, '');
+ lastc.literal = lastc.literal.replace(reFinalSpace, '');
}
block.appendChild(new Node(sps >= 2 ? 'Hardbreak' : 'Softbreak'));
} else {
block.appendChild(new Node('Softbreak'));
}
- this.match(/^ */); // gobble leading spaces in next line
+ this.match(reInitialSpace); // gobble leading spaces in next line
return true;
};
diff --git a/js/lib/node.js b/js/lib/node.js
index 84fb122..9dc7c3f 100644
--- a/js/lib/node.js
+++ b/js/lib/node.js
@@ -14,18 +14,12 @@ function isContainer(node) {
t === 'Image');
}
-function NodeWalker(root) {
- this.current = root;
- this.root = root;
- this.entering = true;
-}
-
-NodeWalker.prototype.resumeAt = function(node, entering) {
+var resumeAt = function(node, entering) {
this.current = node;
this.entering = (entering === true);
};
-NodeWalker.prototype.next = function(){
+var next = function(){
var cur = this.current;
var entering = this.entering;
@@ -56,7 +50,15 @@ NodeWalker.prototype.next = function(){
return {entering: entering, node: cur};
};
-function Node(nodeType, sourcepos) {
+var NodeWalker = function(root) {
+ return { current: root,
+ root: root,
+ entering: true,
+ next: next,
+ resumeAt: resumeAt };
+};
+
+var Node = function(nodeType, sourcepos) {
this.t = nodeType;
this.parent = null;
this.firstChild = null;
@@ -77,7 +79,7 @@ function Node(nodeType, sourcepos) {
this.fence_length = undefined;
this.fence_offset = undefined;
this.level = undefined;
-}
+};
Node.prototype.isContainer = function() {
return isContainer(this);
@@ -154,7 +156,7 @@ Node.prototype.insertBefore = function(sibling) {
};
Node.prototype.walker = function() {
- var walker = new NodeWalker(this);
+ var walker = NodeWalker(this);
return walker;
};