summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--js/bench.js2
-rwxr-xr-xjs/bin/commonmark2
-rw-r--r--js/lib/blocks.js127
-rw-r--r--js/lib/html-renderer.js311
-rwxr-xr-xjs/lib/index.js1
-rw-r--r--js/lib/inlines.js257
-rw-r--r--js/lib/node.js195
-rwxr-xr-xjs/test.js2
8 files changed, 555 insertions, 342 deletions
diff --git a/js/bench.js b/js/bench.js
index 77f5025..a5e0d91 100644
--- a/js/bench.js
+++ b/js/bench.js
@@ -17,7 +17,7 @@ suite.add('commonmark.js markdown->html', function() {
"use strict";
var doc = new sm.DocParser().parse(contents);
var renderer = new sm.HtmlRenderer();
- renderer.renderBlock(doc);
+ renderer.render(doc);
})
.add('showdown.js markdown->html', function() {
diff --git a/js/bin/commonmark b/js/bin/commonmark
index 3c222f8..ac53e6e 100755
--- a/js/bin/commonmark
+++ b/js/bin/commonmark
@@ -14,7 +14,7 @@ var files = [];
if (process.argv[2] === '--ast') {
files = process.argv.slice(3);
renderer = { render: function(x) {
- return util.inspect(x, null, Infinity) + '\n';
+ return util.inspect(x.toAST(), null, Infinity, true) + '\n';
} };
} else {
files = process.argv.slice(2);
diff --git a/js/lib/blocks.js b/js/lib/blocks.js
index 786c099..cc2263a 100644
--- a/js/lib/blocks.js
+++ b/js/lib/blocks.js
@@ -1,3 +1,5 @@
+Node = require('./node');
+
var C_GREATERTHAN = 62;
var C_SPACE = 32;
var C_OPEN_BRACKET = 91;
@@ -48,19 +50,14 @@ var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/;
// These are methods of a DocParser object, defined below.
var makeBlock = function(tag, start_line, start_column) {
- return { t: tag,
- open: true,
- last_line_blank: false,
- start_line: start_line,
- start_column: start_column,
- end_line: start_line,
- children: [],
- parent: null,
- // string_content is formed by concatenating strings, in finalize:
- string_content: "",
- strings: [],
- children: []
- };
+ var b = new Node(tag);
+ b.pos.start = [start_line, start_column];
+ b.pos.end = []; // assigned in finalization step
+ b.open = true;
+ b.last_line_blank = false;
+ b.string_content = "";
+ b.strings = [];
+ return b;
};
// Returns true if parent block can contain child block.
@@ -81,14 +78,17 @@ var acceptsLines = function(block_type) {
// Returns true if block ends with a blank line, descending if needed
// into lists and sublists.
var endsWithBlankLine = function(block) {
- if (block.last_line_blank) {
+ while (block) {
+ if (block.last_line_blank) {
return true;
+ }
+ if (block.t === 'List' || block.t === 'ListItem') {
+ block = block.lastChild;
+ } else {
+ break;
+ }
}
- if ((block.t === 'List' || block.t === 'ListItem') && block.children.length > 0) {
- return endsWithBlankLine(block.children[block.children.length - 1]);
- } else {
- return false;
- }
+ return false;
};
// Break out of all containing lists, resetting the tip of the
@@ -135,8 +135,7 @@ var addChild = function(tag, line_number, offset) {
var column_number = offset + 1; // offset 0 = column 1
var newBlock = makeBlock(tag, line_number, column_number);
- this.tip.children.push(newBlock);
- newBlock.parent = this.tip;
+ this.tip.appendChild(newBlock);
this.tip = newBlock;
return newBlock;
};
@@ -190,7 +189,6 @@ var listsMatch = function(list_data, item_data) {
var incorporateLine = function(ln, line_number) {
var all_matched = true;
- var last_child;
var first_nonspace;
var offset = 0;
var match;
@@ -210,12 +208,11 @@ var incorporateLine = function(ln, line_number) {
// For each containing block, try to parse the associated line start.
// Bail out on failure: container will point to the last matching block.
// Set all_matched to false if not all containers match.
- while (container.children.length > 0) {
- last_child = container.children[container.children.length - 1];
- if (!last_child.open) {
+ while (container.lastChild) {
+ if (!container.lastChild.open) {
break;
}
- container = last_child;
+ container = container.lastChild;
match = matchAt(/[^ ]/, ln, offset);
if (match === -1) {
@@ -472,8 +469,8 @@ var incorporateLine = function(ln, line_number) {
container.t === 'Header' ||
container.t === 'FencedCode' ||
(container.t === 'ListItem' &&
- container.children.length === 0 &&
- container.start_line === line_number));
+ !container.firstChild &&
+ container.pos.start[0] === line_number));
var cont = container;
while (cont.parent) {
@@ -537,10 +534,10 @@ var finalize = function(block, line_number) {
return 0;
}
block.open = false;
- if (line_number > block.start_line) {
- block.end_line = line_number - 1;
+ if (line_number > block.pos.start[0]) {
+ block.pos.end = [line_number - 1]; // TODO end column
} else {
- block.end_line = line_number;
+ block.pos.end = [line_number];
}
switch (block.t) {
@@ -584,30 +581,24 @@ var finalize = function(block, line_number) {
case 'List':
block.tight = true; // tight by default
- var numitems = block.children.length;
- var i = 0;
- while (i < numitems) {
- var item = block.children[i];
+ var item = block.firstChild;
+ while (item) {
// check for non-final list item ending with blank line:
- var last_item = i === numitems - 1;
- if (endsWithBlankLine(item) && !last_item) {
+ if (endsWithBlankLine(item) && item.next) {
block.tight = false;
break;
}
// recurse into children of list item, to see if there are
// spaces between any of them:
- var numsubitems = item.children.length;
- var j = 0;
- while (j < numsubitems) {
- var subitem = item.children[j];
- var last_subitem = j === numsubitems - 1;
- if (endsWithBlankLine(subitem) && !(last_item && last_subitem)) {
+ var subitem = item.firstChild;
+ while (subitem) {
+ if (endsWithBlankLine(subitem) && (item.next || subitem.next)) {
block.tight = false;
break;
}
- j++;
+ subitem = subitem.next;
}
- i++;
+ item = item.next;
}
break;
@@ -621,45 +612,14 @@ var finalize = function(block, line_number) {
// Walk through a block & children recursively, parsing string content
// into inline content where appropriate. Returns new object.
var processInlines = function(block) {
- var newblock = {};
- newblock.t = block.t;
- newblock.start_line = block.start_line;
- newblock.start_column = block.start_column;
- newblock.end_line = block.end_line;
-
- switch(block.t) {
- case 'Paragraph':
- newblock.children =
- this.inlineParser.parse(block.string_content.trim(), this.refmap);
- break;
- case 'Header':
- newblock.children =
- this.inlineParser.parse(block.string_content.trim(), this.refmap);
- newblock.level = block.level;
- break;
- case 'List':
- newblock.list_data = block.list_data;
- newblock.tight = block.tight;
- break;
- case 'CodeBlock':
- newblock.string_content = block.string_content;
- newblock.info = block.info;
- break;
- case 'HtmlBlock':
- newblock.string_content = block.string_content;
- break;
- default:
- break;
- }
-
- if (block.children && !newblock.children) {
- var newchildren = [];
- for (var i = 0; i < block.children.length; i++) {
- newchildren.push(this.processInlines(block.children[i]));
+ var node, event;
+ var walker = block.walker();
+ while (event = walker.next()) {
+ node = event.node;
+ if (!event.entering && (node.t == 'Paragraph' || node.t == 'Header')) {
+ this.inlineParser.parse(node, this.refmap);
}
- newblock.children = newchildren;
}
- return newblock;
};
// The main parsing function. Returns a parsed document AST.
@@ -675,7 +635,8 @@ var parse = function(input) {
while (this.tip) {
this.finalize(this.tip, len - 1);
}
- return this.processInlines(this.doc);
+ this.processInlines(this.doc);
+ return this.doc;
};
diff --git a/js/lib/html-renderer.js b/js/lib/html-renderer.js
index f6efe32..e7953cf 100644
--- a/js/lib/html-renderer.js
+++ b/js/lib/html-renderer.js
@@ -1,6 +1,6 @@
-// Helper function to produce content in a pair of HTML tags.
-var inTags = function(tag, attribs, contents, selfclosing) {
- var result = '<' + tag;
+// Helper function to produce an HTML tag.
+var tag = function(name, attribs, selfclosing) {
+ var result = '<' + name;
if (attribs) {
var i = 0;
var attrib;
@@ -9,132 +9,207 @@ var inTags = function(tag, attribs, contents, selfclosing) {
i++;
}
}
- if (contents) {
- result = result.concat('>', contents, '</', tag, '>');
- } else if (selfclosing) {
- result = result + ' />';
- } else {
- result = result.concat('></', tag, '>');
- }
- return result;
-};
-
-// Render an inline element as HTML.
-var renderInline = function(inline) {
- var attrs;
- switch (inline.t) {
- case 'Text':
- return this.escape(inline.c);
- case 'Softbreak':
- return this.softbreak;
- case 'Hardbreak':
- return inTags('br', [], "", true) + '\n';
- case 'Emph':
- return inTags('em', [], this.renderInlines(inline.children));
- case 'Strong':
- return inTags('strong', [], this.renderInlines(inline.children));
- case 'Html':
- return inline.c;
- case 'Link':
- attrs = [['href', this.escape(inline.destination, true)]];
- if (inline.title) {
- attrs.push(['title', this.escape(inline.title, true)]);
- }
- return inTags('a', attrs, this.renderInlines(inline.children));
- case 'Image':
- attrs = [['src', this.escape(inline.destination, true)],
- ['alt', this.renderInlines(inline.children).
- replace(/\<[^>]*alt="([^"]*)"[^>]*\>/g, '$1').
- replace(/\<[^>]*\>/g, '')]];
- if (inline.title) {
- attrs.push(['title', this.escape(inline.title, true)]);
- }
- return inTags('img', attrs, "", true);
- case 'Code':
- return inTags('code', [], this.escape(inline.c));
- default:
- console.log("Unknown inline type " + inline.t);
- return "";
- }
-};
+ if (selfclosing)
+ result += ' /';
-// Render a list of inlines.
-var renderInlines = function(inlines) {
- var result = '';
- for (var i = 0; i < inlines.length; i++) {
- result = result + this.renderInline(inlines[i]);
- }
+ result += '>';
return result;
};
-// Render a single block element.
-var renderBlock = function(block, in_tight_list) {
- var tag;
- var attr;
+var renderNodes = function(block) {
+
+ var attrs;
var info_words;
- switch (block.t) {
- case 'Document':
- var whole_doc = this.renderBlocks(block.children);
- return (whole_doc === '' ? '' : whole_doc + '\n');
- case 'Paragraph':
- if (in_tight_list) {
- return this.renderInlines(block.children);
+ var tagname;
+ var walker = block.walker();
+ var event, node, entering;
+ var buffer = [];
+ var disableTags = 0;
+ var grandparent;
+ var out = function(s) {
+ if (disableTags > 0) {
+ buffer.push(s.replace(/\<[^>]*\>/g, ''));
} else {
- return inTags('p', [], this.renderInlines(block.children));
- }
- break;
- case 'BlockQuote':
- var filling = this.renderBlocks(block.children);
- return inTags('blockquote', [], filling === '' ? this.innersep :
- this.innersep + filling + this.innersep);
- case 'ListItem':
- var contents = this.renderBlocks(block.children, in_tight_list);
- if (/^[<]/.test(contents)) {
- contents = '\n' + contents;
+ buffer.push(s);
}
- if (/[>]$/.test(contents)) {
- contents = contents + '\n';
+ }
+ var esc = this.escape;
+ var cr = function() {
+ if (buffer.length > 0 && buffer[buffer.length - 1] !== '\n') {
+ out('\n');
}
- return inTags('li', [], contents, false).trim();
- case 'List':
- tag = block.list_data.type === 'Bullet' ? 'ul' : 'ol';
- attr = (!block.list_data.start || block.list_data.start === 1) ?
- [] : [['start', block.list_data.start.toString()]];
- return inTags(tag, attr, this.innersep +
- this.renderBlocks(block.children, block.tight) +
- this.innersep);
- case 'Header':
- tag = 'h' + block.level;
- return inTags(tag, [], this.renderInlines(block.children));
- case 'CodeBlock':
- info_words = block.info ? block.info.split(/ +/) : [];
- attr = (info_words.length === 0 || info_words[0].length === 0) ?
- [] : [['class', 'language-' + this.escape(info_words[0], true)]];
- return inTags('pre', [],
- inTags('code', attr, this.escape(block.string_content)));
- case 'HtmlBlock':
- return block.string_content;
- case 'ReferenceDef':
- return "";
- case 'HorizontalRule':
- return inTags('hr', [], "", true);
- default:
- console.log("Unknown block type " + block.t);
- return "";
}
-};
-// Render a list of block elements, separated by this.blocksep.
-var renderBlocks = function(blocks, in_tight_list) {
- var result = [];
- for (var i = 0; i < blocks.length; i++) {
- if (blocks[i].t !== 'ReferenceDef') {
- result.push(this.renderBlock(blocks[i], in_tight_list));
+ while (event = walker.next()) {
+ entering = event.entering;
+ node = event.node;
+
+ switch (node.t) {
+ case 'Text':
+ out(esc(node.c));
+ break;
+
+ case 'Softbreak':
+ out(this.softbreak);
+ break;
+
+ case 'Hardbreak':
+ out(tag('br', [], true));
+ cr();
+ break;
+
+ case 'Emph':
+ out(tag(entering ? 'em' : '/em'));
+ break;
+
+ case 'Strong':
+ out(tag(entering ? 'strong' : '/strong'));
+ break;
+
+ case 'Emph':
+ out(tag(entering ? 'strong' : '/strong'));
+ break;
+
+ case 'Html':
+ out(node.c);
+ break;
+
+ case 'Link':
+ if (entering) {
+ attrs = [['href', esc(node.destination, true)]];
+ if (node.title) {
+ attrs.push(['title', esc(node.title, true)]);
+ }
+ out(tag('a', attrs));
+ } else {
+ out(tag('/a'));
+ }
+ break;
+
+ case 'Image':
+ if (entering) {
+ if (disableTags == 0) {
+ out('<img src="' + esc(node.destination, true) +
+ '" alt="');
+ }
+ disableTags += 1;
+ } else {
+ disableTags -= 1;
+ if (disableTags == 0) {
+ if (node.title) {
+ out('" title="' + esc(node.title, true));
+ }
+ out('" />');
+ }
+ }
+ break;
+
+ case 'Code':
+ out(tag('code') + esc(node.c) + tag('/code'));
+ break;
+
+ case 'Document':
+ break;
+
+ case 'Paragraph':
+ grandparent = node.parent.parent;
+ if (grandparent !== null &&
+ grandparent.t === 'List') {
+ if (grandparent.tight)
+ break;
+ }
+ if (entering) {
+ cr();
+ out(tag('p'));
+ } else {
+ out(tag('/p'));
+ cr();
+ }
+ break;
+
+ case 'BlockQuote':
+ if (entering) {
+ cr();
+ out(tag('blockquote'));
+ cr();
+ } else {
+ cr();
+ out(tag('/blockquote'));
+ cr();
+ }
+ break;
+
+ case 'ListItem':
+ if (entering) {
+ out(tag('li'));
+ } else {
+ out(tag('/li'));
+ cr();
+ }
+ break;
+
+ case 'List':
+ tagname = node.list_data.type === 'Bullet' ? 'ul' : 'ol';
+ if (entering) {
+ attr = (!node.list_data.start || node.list_data.start === 1) ?
+ [] : [['start', node.list_data.start.toString()]];
+ cr();
+ out(tag(tagname, attr));
+ cr();
+ } else {
+ cr();
+ out(tag('/' + tagname));
+ cr();
+ }
+ break;
+
+ case 'Header':
+ tagname = 'h' + node.level;
+ if (entering) {
+ cr();
+ out(tag(tagname));
+ } else {
+ out(tag('/' + tagname));
+ cr();
+ }
+ break;
+
+ case 'CodeBlock':
+ info_words = node.info ? node.info.split(/ +/) : [];
+ attr = (info_words.length === 0 || info_words[0].length === 0)
+ ? [] : [['class', 'language-' + esc(info_words[0], true)]];
+ cr();
+ out(tag('pre') + tag('code', attr));
+ out(this.escape(node.string_content));
+ out(tag('/code') + tag('/pre'));
+ cr();
+ break;
+
+ case 'HtmlBlock':
+ cr();
+ out(node.string_content);
+ cr();
+ break;
+
+ case 'HorizontalRule':
+ cr();
+ out(tag('hr', [], true));
+ cr();
+ break;
+
+
+ case 'ReferenceDef':
+ break;
+
+ default:
+ console.log("Unknown node type " + node.t);
}
+
}
- return result.join(this.blocksep);
+ return buffer.join('');
};
+
// The HtmlRenderer object.
function HtmlRenderer(){
return {
@@ -157,11 +232,7 @@ function HtmlRenderer(){
.replace(/["]/g, '&quot;');
}
},
- renderInline: renderInline,
- renderInlines: renderInlines,
- renderBlock: renderBlock,
- renderBlocks: renderBlocks,
- render: renderBlock
+ render: renderNodes
};
}
diff --git a/js/lib/index.js b/js/lib/index.js
index 4e04532..b2388ce 100755
--- a/js/lib/index.js
+++ b/js/lib/index.js
@@ -17,6 +17,7 @@ var renderAST = function(tree) {
return util.inspect(tree, {depth: null});
};
+module.exports.Node = require('./node');
module.exports.DocParser = require('./blocks');
module.exports.HtmlRenderer = require('./html-renderer');
module.exports.ASTRenderer = renderAST;
diff --git a/js/lib/inlines.js b/js/lib/inlines.js
index ef7a00f..23b2b1e 100644
--- a/js/lib/inlines.js
+++ b/js/lib/inlines.js
@@ -85,6 +85,12 @@ var normalizeReference = function(s) {
.toUpperCase();
};
+var text = function(s) {
+ var node = new Node('Text');
+ node.c = s;
+ return node;
+}
+
// INLINE PARSER
// These are methods of an InlineParser object, defined below.
@@ -123,9 +129,9 @@ var spnl = function() {
// in the subject. If they succeed in matching anything, they
// return the inline matched, advancing the subject.
-// Attempt to parse backticks, returning either a backtick code span or a
+// Attempt to parse backticks, adding either a backtick code span or a
// literal sequence of backticks.
-var parseBackticks = function(inlines) {
+var parseBackticks = function(block) {
var ticks = this.match(/^`+/);
if (!ticks) {
return 0;
@@ -133,37 +139,42 @@ var parseBackticks = function(inlines) {
var afterOpenTicks = this.pos;
var foundCode = false;
var matched;
+ var node;
while (!foundCode && (matched = this.match(/`+/m))) {
if (matched === ticks) {
- inlines.push({ t: 'Code', c: this.subject.slice(afterOpenTicks,
- this.pos - ticks.length)
- .replace(/[ \n]+/g, ' ')
- .trim() });
+ node = new Node('Code');
+ node.c = this.subject.slice(afterOpenTicks,
+ this.pos - ticks.length)
+ .replace(/[ \n]+/g, ' ')
+ .trim();
+ block.appendChild(node);
return true;
}
}
// If we got here, we didn't match a closing backtick sequence.
this.pos = afterOpenTicks;
- inlines.push({ t: 'Text', c: ticks });
+ block.appendChild(text(ticks));
return true;
};
// Parse a backslash-escaped special character, adding either the escaped
// character, a hard line break (if the backslash is followed by a newline),
-// or a literal backslash to the 'inlines' list.
-var parseBackslash = function(inlines) {
+// or a literal backslash to the block's children.
+var parseBackslash = function(block) {
var subj = this.subject,
pos = this.pos;
+ var node;
if (subj.charCodeAt(pos) === C_BACKSLASH) {
if (subj.charAt(pos + 1) === '\n') {
this.pos = this.pos + 2;
- inlines.push({ t: 'Hardbreak' });
+ node = new Node('Hardbreak');
+ block.appendChild(node);
} else if (reEscapable.test(subj.charAt(pos + 1))) {
this.pos = this.pos + 2;
- inlines.push({ t: 'Text', c: subj.charAt(pos + 1) });
+ block.appendChild(text(subj.charAt(pos + 1)));
} else {
this.pos++;
- inlines.push({t: 'Text', c: '\\'});
+ block.appendChild(text('\\'));
}
return true;
} else {
@@ -172,22 +183,23 @@ var parseBackslash = function(inlines) {
};
// Attempt to parse an autolink (URL or email in pointy brackets).
-var parseAutolink = function(inlines) {
+var parseAutolink = function(block) {
var m;
var dest;
+ var node;
if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink
dest = m.slice(1, -1);
- inlines.push(
- {t: 'Link',
- children: [{ t: 'Text', c: dest }],
- destination: 'mailto:' + encodeURI(unescape(dest)) });
+ node = new Node('Link');
+ node.destination = 'mailto:' + encodeURI(unescape(dest));
+ node.appendChild(text(dest));
+ block.appendChild(node);
return true;
} else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) {
dest = m.slice(1, -1);
- inlines.push({
- t: 'Link',
- children: [{ t: 'Text', c: dest }],
- destination: encodeURI(unescape(dest)) });
+ node = new Node('Link');
+ node.destination = encodeURI(unescape(dest));
+ node.appendChild(text(dest));
+ block.appendChild(node);
return true;
} else {
return false;
@@ -195,10 +207,13 @@ var parseAutolink = function(inlines) {
};
// Attempt to parse a raw HTML tag.
-var parseHtmlTag = function(inlines) {
+var parseHtmlTag = function(block) {
var m = this.match(reHtmlTag);
+ var node;
if (m) {
- inlines.push({ t: 'Html', c: m });
+ node = new Node('Html');
+ node.c = m;
+ block.appendChild(node);
return true;
} else {
return false;
@@ -247,20 +262,8 @@ var scanDelims = function(cc) {
can_close: can_close };
};
-var Emph = function(ils) {
- return {t: 'Emph', children: ils};
-};
-
-var Strong = function(ils) {
- return {t: 'Strong', children: ils};
-};
-
-var Str = function(s) {
- return {t: 'Text', c: s};
-};
-
// Attempt to parse emphasis or strong emphasis.
-var parseEmphasis = function(cc, inlines) {
+var parseEmphasis = function(cc, block) {
var res = this.scanDelims(cc);
var numdelims = res.numdelims;
@@ -271,12 +274,13 @@ var parseEmphasis = function(cc, inlines) {
}
this.pos += numdelims;
- inlines.push(Str(this.subject.slice(startpos, this.pos)));
+ var node = text(this.subject.slice(startpos, this.pos));
+ block.appendChild(node);
// Add entry to stack for this opener
this.delimiters = { cc: cc,
numdelims: numdelims,
- pos: inlines.length - 1,
+ node: node,
previous: this.delimiters,
next: null,
can_open: res.can_open,
@@ -302,28 +306,14 @@ var removeDelimiter = function(delim) {
}
};
-var removeGaps = function(inlines) {
- // remove gaps from inlines
- var i, j;
- j = 0;
- for (i = 0 ; i < inlines.length; i++) {
- if (inlines[i] !== null) {
- inlines[j] = inlines[i];
- j++;
- }
- }
- inlines.splice(j);
-};
-
-var processEmphasis = function(inlines, stack_bottom) {
+var processEmphasis = function(block, stack_bottom) {
"use strict";
var opener, closer;
var opener_inl, closer_inl;
var nextstack, tempstack;
var use_delims;
var contents;
- var emph;
- var i;
+ var tmp, next;
// find first closer above stack_bottom:
closer = this.delimiters;
@@ -350,8 +340,8 @@ var processEmphasis = function(inlines, stack_bottom) {
use_delims = closer.numdelims % 2 === 0 ? 2 : 1;
}
- opener_inl = inlines[opener.pos];
- closer_inl = inlines[closer.pos];
+ opener_inl = opener.node;
+ closer_inl = closer.node;
// remove used delimiters from stack elts and inlines
opener.numdelims -= use_delims;
@@ -360,17 +350,18 @@ var processEmphasis = function(inlines, stack_bottom) {
closer_inl.c = closer_inl.c.slice(0, closer_inl.c.length - use_delims);
// build contents for new emph element
- contents = inlines.slice(opener.pos + 1, closer.pos);
- removeGaps(contents);
-
- emph = use_delims === 1 ? Emph(contents) : Strong(contents);
-
- // insert into list of inlines
- inlines[opener.pos + 1] = emph;
- for (i = opener.pos + 2; i < closer.pos; i++) {
- inlines[i] = null;
+ var emph = new Node(use_delims === 1 ? 'Emph' : 'Strong');
+
+ tmp = opener_inl.next;
+ while (tmp && tmp !== closer_inl) {
+ next = tmp.next;
+ tmp.unlink();
+ emph.appendChild(tmp);
+ tmp = next;
}
+ opener_inl.insertAfter(emph);
+
// remove elts btw opener and closer in delimiters stack
tempstack = closer.previous;
while (tempstack !== null && tempstack !== opener) {
@@ -381,18 +372,17 @@ var processEmphasis = function(inlines, stack_bottom) {
// if opener has 0 delims, remove it and the inline
if (opener.numdelims === 0) {
- inlines[opener.pos] = null;
+ opener_inl.unlink();
this.removeDelimiter(opener);
}
if (closer.numdelims === 0) {
- inlines[closer.pos] = null;
+ closer_inl.unlink();
tempstack = closer.next;
this.removeDelimiter(closer);
closer = tempstack;
}
-
} else {
closer = closer.next;
}
@@ -403,8 +393,6 @@ var processEmphasis = function(inlines, stack_bottom) {
}
- removeGaps(inlines);
-
// remove all delimiters
while (this.delimiters !== stack_bottom) {
this.removeDelimiter(this.delimiters);
@@ -445,18 +433,19 @@ var parseLinkLabel = function() {
return m === null ? 0 : m.length;
};
-// Add open bracket to delimiter stack and add a Str to inlines.
-var parseOpenBracket = function(inlines) {
+// Add open bracket to delimiter stack and add a text node to block's children.
+var parseOpenBracket = function(block) {
var startpos = this.pos;
this.pos += 1;
- inlines.push(Str("["));
+ var node = text('[');
+ block.appendChild(node);
// Add entry to stack for this opener
this.delimiters = { cc: C_OPEN_BRACKET,
numdelims: 1,
- pos: inlines.length - 1,
+ node: node,
previous: this.delimiters,
next: null,
can_open: true,
@@ -472,19 +461,21 @@ var parseOpenBracket = function(inlines) {
};
// IF next character is [, and ! delimiter to delimiter stack and
-// add a Str to inlines. Otherwise just add a Str.
-var parseBang = function(inlines) {
+// add a text node to block's children. Otherwise just add a text node.
+var parseBang = function(block) {
var startpos = this.pos;
this.pos += 1;
if (this.peek() === C_OPEN_BRACKET) {
this.pos += 1;
- inlines.push(Str("!["));
+
+ var node = text('![');
+ block.appendChild(node);
// Add entry to stack for this opener
this.delimiters = { cc: C_BANG,
numdelims: 1,
- pos: inlines.length - 1,
+ node: node,
previous: this.delimiters,
next: null,
can_open: true,
@@ -495,22 +486,21 @@ var parseBang = function(inlines) {
this.delimiters.previous.next = this.delimiters;
}
} else {
- inlines.push(Str("!"));
+ block.appendChild(text('!'));
}
return true;
};
// Try to match close bracket against an opening in the delimiter
// stack. Add either a link or image, or a plain [ character,
-// to the inlines stack. If there is a matching delimiter,
+// to block's children. If there is a matching delimiter,
// remove it from the delimiter stack.
-var parseCloseBracket = function(inlines) {
+var parseCloseBracket = function(block) {
var startpos;
var is_image;
var dest;
var title;
var matched = false;
- var link_text;
var i;
var reflabel;
var opener;
@@ -530,13 +520,13 @@ var parseCloseBracket = function(inlines) {
if (opener === null) {
// no matched opener, just return a literal
- inlines.push(Str("]"));
+ block.appendChild(text(']'));
return true;
}
if (!opener.active) {
// no matched opener, just return a literal
- inlines.push(Str("]"));
+ block.appendChild(text(']'));
// take opener off emphasis stack
this.removeDelimiter(opener);
return true;
@@ -544,15 +534,6 @@ var parseCloseBracket = function(inlines) {
// If we got here, open is a potential opener
is_image = opener.cc === C_BANG;
- // instead of copying a slice, we null out the
- // parts of inlines that don't correspond to link_text;
- // later, we'll collapse them. This is awkward, and could
- // be simplified if we made inlines a linked list rather than
- // an array:
- link_text = inlines.slice(0);
- for (i = 0; i < opener.pos + 1; i++) {
- link_text[i] = null;
- }
// Check to see if we have a link/image
@@ -597,13 +578,22 @@ var parseCloseBracket = function(inlines) {
}
if (matched) {
- this.processEmphasis(link_text, opener.previous);
-
- // remove the part of inlines that became link_text.
- // see note above on why we need to do this instead of splice:
- for (i = opener.pos; i < inlines.length; i++) {
- inlines[i] = null;
+ var node = new Node(is_image ? 'Image' : 'Link');
+ node.destination = dest;
+ node.title = title;
+
+ var tmp, next;
+ tmp = opener.node.next;
+ while (tmp) {
+ next = tmp.next;
+ tmp.unlink();
+ node.appendChild(tmp);
+ tmp = next;
}
+ block.appendChild(node);
+ this.processEmphasis(node, opener.previous);
+
+ opener.node.unlink();
// processEmphasis will remove this and later delimiters.
// Now, for a link, we also deactivate earlier link openers.
@@ -618,27 +608,23 @@ var parseCloseBracket = function(inlines) {
}
}
- inlines.push({t: is_image ? 'Image' : 'Link',
- destination: dest,
- title: title,
- children: link_text});
return true;
} else { // no match
this.removeDelimiter(opener); // remove this opener from stack
this.pos = startpos;
- inlines.push(Str("]"));
+ block.appendChild(text(']'));
return true;
}
};
// Attempt to parse an entity, return Entity object if successful.
-var parseEntity = function(inlines) {
+var parseEntity = function(block) {
var m;
if ((m = this.match(reEntityHere))) {
- inlines.push({ t: 'Text', c: entityToChar(m) });
+ block.appendChild(text(entityToChar(m)));
return true;
} else {
return false;
@@ -646,11 +632,11 @@ var parseEntity = function(inlines) {
};
// Parse a run of ordinary characters, or a single character with
-// a special meaning in markdown, as a plain string, adding to inlines.
-var parseString = function(inlines) {
+// a special meaning in markdown, as a plain string.
+var parseString = function(block) {
var m;
if ((m = this.match(reMain))) {
- inlines.push({ t: 'Text', c: m });
+ block.appendChild(text(m));
return true;
} else {
return false;
@@ -659,17 +645,15 @@ var parseString = function(inlines) {
// Parse a newline. If it was preceded by two spaces, return a hard
// line break; otherwise a soft line break.
-var parseNewline = function(inlines) {
+var parseNewline = function(block) {
var m = this.match(/^ *\n/);
if (m) {
- if (m.length > 2) {
- inlines.push({ t: 'Hardbreak' });
- } else if (m.length > 0) {
- inlines.push({ t: 'Softbreak' });
- }
+ var node = new Node(m.length > 2 ? 'Hardbreak' : 'Softbreak');
+ block.appendChild(node);
return true;
+ } else {
+ return false;
}
- return false;
};
// Attempt to parse a link reference, modifying refmap.
@@ -731,9 +715,9 @@ var parseReference = function(s, refmap) {
};
// Parse the next inline element in subject, advancing subject position.
-// On success, add the result to the inlines list, and return true.
+// On success, add the result to block's children and return true.
// On failure, return false.
-var parseInline = function(inlines) {
+var parseInline = function(block) {
"use strict";
var c = this.peek();
if (c === -1) {
@@ -743,56 +727,57 @@ var parseInline = function(inlines) {
switch(c) {
case C_NEWLINE:
case C_SPACE:
- res = this.parseNewline(inlines);
+ res = this.parseNewline(block);
break;
case C_BACKSLASH:
- res = this.parseBackslash(inlines);
+ res = this.parseBackslash(block);
break;
case C_BACKTICK:
- res = this.parseBackticks(inlines);
+ res = this.parseBackticks(block);
break;
case C_ASTERISK:
case C_UNDERSCORE:
- res = this.parseEmphasis(c, inlines);
+ res = this.parseEmphasis(c, block);
break;
case C_OPEN_BRACKET:
- res = this.parseOpenBracket(inlines);
+ res = this.parseOpenBracket(block);
break;
case C_BANG:
- res = this.parseBang(inlines);
+ res = this.parseBang(block);
break;
case C_CLOSE_BRACKET:
- res = this.parseCloseBracket(inlines);
+ res = this.parseCloseBracket(block);
break;
case C_LESSTHAN:
- res = this.parseAutolink(inlines) || this.parseHtmlTag(inlines);
+ res = this.parseAutolink(block) || this.parseHtmlTag(block);
break;
case C_AMPERSAND:
- res = this.parseEntity(inlines);
+ res = this.parseEntity(block);
break;
default:
- res = this.parseString(inlines);
+ res = this.parseString(block);
break;
}
if (!res) {
this.pos += 1;
- inlines.push({t: 'Text', c: fromCodePoint(c)});
+ var textnode = new Node('Text');
+ textnode.c = fromCodePoint(c);
+ block.appendChild(textnode);
}
return true;
};
-// Parse s as a list of inlines, using refmap to resolve references.
-var parseInlines = function(s, refmap) {
- this.subject = s;
+// Parse string_content in block into inline children,
+// using refmap to resolve references.
+var parseInlines = function(block, refmap) {
+ this.subject = block.string_content.trim();
this.pos = 0;
this.refmap = refmap || {};
this.delimiters = null;
- var inlines = [];
- while (this.parseInline(inlines)) {
+ while (this.parseInline(block)) {
}
- this.processEmphasis(inlines, null);
- return inlines;
+ this.processEmphasis(block, null);
};
// The InlineParser object.
diff --git a/js/lib/node.js b/js/lib/node.js
new file mode 100644
index 0000000..f96d65c
--- /dev/null
+++ b/js/lib/node.js
@@ -0,0 +1,195 @@
+util = require('util');
+
+function isContainer(node) {
+ var t = node.t;
+ return (t == 'Document' ||
+ t == 'BlockQuote' ||
+ t == 'List' ||
+ t == 'ListItem' ||
+ t == 'Paragraph' ||
+ t == 'Header' ||
+ t == 'Emph' ||
+ t == 'Strong' ||
+ t == 'Link' ||
+ t == 'Image');
+}
+
+function NodeWalker(root) {
+ this.current = root;
+ this.root = root;
+ this.entering = true;
+}
+
+NodeWalker.prototype.resumeAt = function(node, entering) {
+ this.current = node;
+ this.entering = (entering === true);
+};
+
+NodeWalker.prototype.next = function(){
+ var cur = this.current;
+ var entering = this.entering;
+
+ if (!cur)
+ return null;
+
+ var container = isContainer(cur);
+
+ if (entering && container) {
+ if (cur.firstChild) {
+ this.current = cur.firstChild;
+ this.entering = true;
+ } else {
+ // stay on node but exit
+ this.entering = false;
+ }
+
+ } else if (!entering && cur == this.root) {
+ // don't move past root
+ this.current = null;
+
+ } else if (cur.next) {
+ this.current = cur.next;
+ this.entering = true;
+
+ } else {
+ this.current = cur.parent;
+ this.entering = false;
+ }
+
+ return {entering: entering, node: cur};
+};
+
+function Node(nodeType) {
+ this.t = nodeType;
+ this.parent = null;
+ this.firstChild = null;
+ this.lastChild = null;
+ this.prev = null;
+ this.next = null;
+ this.pos = {};
+}
+
+Node.prototype.isContainer = function() {
+ return isContainer(this);
+};
+
+Node.prototype.appendChild = function(child) {
+ child.unlink();
+ child.parent = this;
+ if (this.lastChild) {
+ this.lastChild.next = child;
+ child.prev = this.lastChild;
+ this.lastChild = child;
+ } else {
+ this.firstChild = child;
+ this.lastChild = child;
+ }
+};
+
+Node.prototype.prependChild = function(child) {
+ child.unlink();
+ child.parent = this;
+ if (this.firstChild) {
+ this.firstChild.prev = child;
+ child.next = this.firstChild;
+ this.firstChild = child;
+ } else {
+ this.firstChild = child;
+ this.lastChild = child;
+ }
+};
+
+Node.prototype.unlink = function() {
+ if (this.prev) {
+ this.prev.next = this.next;
+ } else if (this.parent) {
+ this.parent.firstChild = this.next;
+ }
+ if (this.next) {
+ this.next.prev = this.prev;
+ } else if (this.parent) {
+ this.parent.lastChild = this.prev;
+ }
+ this.parent = null;
+ this.next = null;
+ this.prev = null;
+};
+
+Node.prototype.insertAfter = function(sibling) {
+ sibling.unlink();
+ sibling.next = this.next;
+ if (sibling.next) {
+ sibling.next.prev = sibling;
+ }
+ sibling.prev = this;
+ this.next = sibling;
+ sibling.parent = this.parent;
+ if (!sibling.next) {
+ sibling.parent.lastChild = sibling;
+ }
+};
+
+Node.prototype.insertBefore = function(sibling) {
+ sibling.unlink();
+ sibling.prev = this.prev;
+ if (sibling.prev) {
+ sibling.prev.next = sibling;
+ }
+ sibling.next = this;
+ this.prev = sibling;
+ sibling.parent = this.parent;
+ if (!sibling.prev) {
+ sibling.parent.firstChild = sibling;
+ }
+};
+
+Node.prototype.walker = function() {
+ var walker = new NodeWalker(this);
+ return walker;
+};
+
+Node.prototype.toAST = function() {
+ var children;
+ var cur;
+ var result = { t: this.t };
+
+ var propsToShow = ['t', 'c', 'list_data', 'string_content',
+ 'pos', 'start_column', 'end_column',
+ 'tight', 'info'];
+
+ for (var i = 0; i < propsToShow.length; i++) {
+ var prop = propsToShow[i];
+ if (this[prop] !== undefined) {
+ result[prop] = this[prop];
+ }
+ }
+
+ if (isContainer(this)) {
+ children = [];
+ if (this.firstChild) {
+ cur = this.firstChild;
+ while (cur) {
+ // TODO avoid recursion here...
+ children.push(cur.toAST());
+ cur = cur.next;
+ }
+ result.children = children;
+ }
+ }
+ return result;
+
+};
+
+module.exports = Node;
+
+
+/* Example of use of walker:
+
+ var walker = w.walker();
+ var event;
+
+ while (event = walker.next()) {
+ console.log(event.entering, event.node.t);
+ }
+
+*/
diff --git a/js/test.js b/js/test.js
index 73ec450..6cb7c98 100755
--- a/js/test.js
+++ b/js/test.js
@@ -56,7 +56,7 @@ fs.readFile('spec.txt', 'utf8', function(err, data) {
current_section = example.section;
cursor.reset().write(current_section).reset().write(' ');
}
- var actual = writer.renderBlock(reader.parse(example.markdown.replace(/→/g, '\t')));
+ var actual = writer.render(reader.parse(example.markdown.replace(/→/g, '\t')));
if (actual === example.html) {
passed++;
cursor.green().write('✓').reset();