summaryrefslogtreecommitdiff
path: root/js/lib/blocks.js
diff options
context:
space:
mode:
Diffstat (limited to 'js/lib/blocks.js')
-rw-r--r--js/lib/blocks.js124
1 files changed, 86 insertions, 38 deletions
diff --git a/js/lib/blocks.js b/js/lib/blocks.js
index c6e5d75..bd00b1a 100644
--- a/js/lib/blocks.js
+++ b/js/lib/blocks.js
@@ -1,35 +1,66 @@
var Node = require('./node');
var C_GREATERTHAN = 62;
+var C_NEWLINE = 10;
var C_SPACE = 32;
var C_OPEN_BRACKET = 91;
var InlineParser = require('./inlines');
+
var unescapeString = new InlineParser().unescapeString;
+var BLOCKTAGNAME = '(?:article|header|aside|hgroup|iframe|blockquote|hr|body|li|map|button|object|canvas|ol|caption|output|col|p|colgroup|pre|dd|progress|div|section|dl|table|td|dt|tbody|embed|textarea|fieldset|tfoot|figcaption|th|figure|thead|footer|footer|tr|form|ul|h1|h2|h3|h4|h5|h6|video|script|style)';
+
+var HTMLBLOCKOPEN = "<(?:" + BLOCKTAGNAME + "[\\s/>]" + "|" +
+ "/" + BLOCKTAGNAME + "[\\s>]" + "|" + "[?!])";
+
+var reHtmlBlockOpen = new RegExp('^' + HTMLBLOCKOPEN, 'i');
+
+var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/;
+
+var reMaybeSpecial = /^[ #`~*+_=<>0-9-]/;
+
+var reNonSpace = /[^ \t\n]/;
+
+var reBulletListMarker = /^[*+-]( +|$)/;
+
+var reOrderedListMarker = /^(\d+)([.)])( +|$)/;
+
+var reATXHeaderMarker = /^#{1,6}(?: +|$)/;
+
+var reCodeFence = /^`{3,}(?!.*`)|^~{3,}(?!.*~)/;
+
+var reClosingCodeFence = /^(?:`{3,}|~{3,})(?= *$)/;
+
+var reSetextHeaderLine = /^(?:=+|-+) *$/;
+
+var reLineEnding = /\r\n|\n|\r/;
+
// Returns true if string contains only space characters.
var isBlank = function(s) {
"use strict";
- return /^\s*$/.test(s);
+ return !(reNonSpace.test(s));
};
+var tabSpaces = [' ', ' ', ' ', ' '];
+
// Convert tabs to spaces on each line using a 4-space tab stop.
var detabLine = function(text) {
"use strict";
- if (text.indexOf('\u0000') !== -1) {
- // replace NUL for security
- text = text.replace(/\0/g, '\uFFFD');
- }
- if (text.indexOf('\t') === -1) {
- return text;
- } else {
- var lastStop = 0;
- return text.replace(/\t/g, function(match, offset) {
- var result = ' '.slice((offset - lastStop) % 4);
- lastStop = offset + 1;
- return result;
- });
+
+ var start = 0;
+ var offset;
+ var lastStop = 0;
+
+ while ((offset = text.indexOf('\t', start)) !== -1) {
+ var numspaces = (offset - lastStop) % 4;
+ var spaces = tabSpaces[numspaces];
+ text = text.slice(0, offset) + spaces + text.slice(offset + 1);
+ lastStop = offset + numspaces;
+ start = lastStop;
}
+
+ return text;
};
// Attempt to match a regex in string s at offset offset.
@@ -44,13 +75,15 @@ var matchAt = function(re, s, offset) {
}
};
-var BLOCKTAGNAME = '(?:article|header|aside|hgroup|iframe|blockquote|hr|body|li|map|button|object|canvas|ol|caption|output|col|p|colgroup|pre|dd|progress|div|section|dl|table|td|dt|tbody|embed|textarea|fieldset|tfoot|figcaption|th|figure|thead|footer|footer|tr|form|ul|h1|h2|h3|h4|h5|h6|video|script|style)';
-var HTMLBLOCKOPEN = "<(?:" + BLOCKTAGNAME + "[\\s/>]" + "|" +
- "/" + BLOCKTAGNAME + "[\\s>]" + "|" + "[?!])";
-var reHtmlBlockOpen = new RegExp('^' + HTMLBLOCKOPEN, 'i');
-
-var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/;
-
+// destructively trip final blank lines in an array of strings
+var stripFinalBlankLines = function(lns) {
+ "use strict";
+ var i = lns.length - 1;
+ while (!reNonSpace.test(lns[i])) {
+ lns.pop();
+ i--;
+ }
+};
// DOC PARSER
@@ -160,12 +193,12 @@ var parseListMarker = function(ln, offset) {
if (rest.match(reHrule)) {
return null;
}
- if ((match = rest.match(/^[*+-]( +|$)/))) {
+ if ((match = rest.match(reBulletListMarker))) {
spaces_after_marker = match[1].length;
data.type = 'Bullet';
data.bullet_char = match[0][0];
- } else if ((match = rest.match(/^(\d+)([.)])( +|$)/))) {
+ } else if ((match = rest.match(reOrderedListMarker))) {
spaces_after_marker = match[3].length;
data.type = 'Ordered';
data.start = parseInt(match[1]);
@@ -214,6 +247,11 @@ var incorporateLine = function(ln, line_number) {
var container = this.doc;
var oldtip = this.tip;
+ // replace NUL characters for security
+ if (ln.indexOf('\u0000') !== -1) {
+ ln = ln.replace(/\0/g, '\uFFFD');
+ }
+
// Convert tabs to spaces:
ln = detabLine(ln);
@@ -226,7 +264,7 @@ var incorporateLine = function(ln, line_number) {
}
container = container.lastChild;
- match = matchAt(/[^ ]/, ln, offset);
+ match = matchAt(reNonSpace, ln, offset);
if (match === -1) {
first_nonspace = ln.length;
blank = true;
@@ -319,13 +357,11 @@ var incorporateLine = function(ln, line_number) {
// want to close unmatched blocks. So we store this closure for
// use later, when we have more information.
var closeUnmatchedBlocks = function(mythis) {
- var already_done = false;
// finalize any blocks not matched
- while (!already_done && oldtip !== last_matched_container) {
+ while (oldtip !== last_matched_container) {
mythis.finalize(oldtip, line_number - 1);
oldtip = oldtip.parent;
}
- already_done = true;
};
// Check to see if we've hit 2nd blank line; if so break out of list:
@@ -339,9 +375,9 @@ var incorporateLine = function(ln, line_number) {
container.t !== 'IndentedCode' &&
container.t !== 'HtmlBlock' &&
// this is a little performance optimization:
- matchAt(/^[ #`~*+_=<>0-9-]/, ln, offset) !== -1) {
+ matchAt(reMaybeSpecial, ln, offset) !== -1) {
- match = matchAt(/[^ ]/, ln, offset);
+ match = matchAt(reNonSpace, ln, offset);
if (match === -1) {
first_nonspace = ln.length;
blank = true;
@@ -371,7 +407,7 @@ var incorporateLine = function(ln, line_number) {
closeUnmatchedBlocks(this);
container = this.addChild('BlockQuote', line_number, offset);
- } else if ((match = ln.slice(first_nonspace).match(/^#{1,6}(?: +|$)/))) {
+ } else if ((match = ln.slice(first_nonspace).match(reATXHeaderMarker))) {
// ATX header
offset = first_nonspace + match[0].length;
closeUnmatchedBlocks(this);
@@ -382,7 +418,7 @@ var incorporateLine = function(ln, line_number) {
[ln.slice(offset).replace(/^ *#+ *$/, '').replace(/ +#+ *$/, '')];
break;
- } else if ((match = ln.slice(first_nonspace).match(/^`{3,}(?!.*`)|^~{3,}(?!.*~)/))) {
+ } else if ((match = ln.slice(first_nonspace).match(reCodeFence))) {
// fenced code block
var fence_length = match[0].length;
closeUnmatchedBlocks(this);
@@ -402,7 +438,7 @@ var incorporateLine = function(ln, line_number) {
} else if (container.t === 'Paragraph' &&
container.strings.length === 1 &&
- ((match = ln.slice(first_nonspace).match(/^(?:=+|-+) *$/)))) {
+ ((match = ln.slice(first_nonspace).match(reSetextHeaderLine)))) {
// setext header line
closeUnmatchedBlocks(this);
container.t = 'Header'; // convert Paragraph to SetextHeader
@@ -447,7 +483,7 @@ var incorporateLine = function(ln, line_number) {
// What remains at the offset is a text line. Add the text to the
// appropriate container.
- match = matchAt(/[^ ]/, ln, offset);
+ match = matchAt(reNonSpace, ln, offset);
if (match === -1) {
first_nonspace = ln.length;
blank = true;
@@ -500,7 +536,7 @@ var incorporateLine = function(ln, line_number) {
// check for closing code fence:
match = (indent <= 3 &&
ln.charAt(first_nonspace) === container.fence_char &&
- ln.slice(first_nonspace).match(/^(?:`{3,}|~{3,})(?= *$)/));
+ ln.slice(first_nonspace).match(reClosingCodeFence));
if (match && match[0].length >= container.fence_length) {
// don't add closing fence to container; instead, close it:
this.finalize(container, line_number);
@@ -569,7 +605,8 @@ var finalize = function(block, line_number) {
break;
case 'IndentedCode':
- block.literal = block.strings.join('\n').replace(/(\n *)*$/, '\n');
+ stripFinalBlankLines(block.strings);
+ block.literal = block.strings.join('\n') + '\n';
block.t = 'CodeBlock';
break;
@@ -644,21 +681,31 @@ var parse = function(input) {
this.doc = Document();
this.tip = this.doc;
this.refmap = {};
- var lines = input.replace(/\n$/, '').split(/\r\n|\n|\r/);
+ if (this.options.time) { console.time("preparing input"); }
+ var lines = input.split(reLineEnding);
var len = lines.length;
+ if (input.charCodeAt(input.length - 1) === C_NEWLINE) {
+ // ignore last blank line created by final newline
+ len -= 1;
+ }
+ if (this.options.time) { console.timeEnd("preparing input"); }
+ if (this.options.time) { console.time("block parsing"); }
for (var i = 0; i < len; i++) {
this.incorporateLine(lines[i], i + 1);
}
while (this.tip) {
this.finalize(this.tip, len);
}
+ if (this.options.time) { console.timeEnd("block parsing"); }
+ if (this.options.time) { console.time("inline parsing"); }
this.processInlines(this.doc);
+ if (this.options.time) { console.timeEnd("inline parsing"); }
return this.doc;
};
// The DocParser object.
-function DocParser(){
+function DocParser(options){
"use strict";
return {
doc: Document(),
@@ -672,7 +719,8 @@ function DocParser(){
incorporateLine: incorporateLine,
finalize: finalize,
processInlines: processInlines,
- parse: parse
+ parse: parse,
+ options: options || {}
};
}