From 6d7d6cf150dedb53b7f0972b79313df3364ebbed Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 7 Sep 2014 15:20:41 -0700 Subject: stmd.js: Added memoization of inline parsing. --- js/stmd.js | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/js/stmd.js b/js/stmd.js index 15d7345..63234f6 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -672,6 +672,13 @@ var parseReference = function(s, refmap) { // Parse the next inline element in subject, advancing subject position // and adding the result to 'inlines'. var parseInline = function(inlines) { + var startpos = this.pos; + var memoized = this.memo[startpos]; + if (memoized) { + inlines.push(memoized.inlines); + this.pos += memoized.len; + return memoized.len; + } var c = this.peek(); var res; switch(c) { @@ -703,7 +710,13 @@ var parseInline = function(inlines) { break; default: } - return res || this.parseString(inlines); + if (!res) { + res = this.parseString(inlines); + } + if (res > 0) { + this.memo[startpos] = { inlines: inlines[inlines.length - 1], len: res }; + } + return res; }; // Parse s as a list of inlines, using refmap to resolve references. @@ -711,6 +724,7 @@ var parseInlines = function(s, refmap) { this.subject = s; this.pos = 0; this.refmap = refmap || {}; + this.memo = {}; var inlines = []; while (this.parseInline(inlines)) ; return inlines; @@ -723,6 +737,7 @@ function InlineParser(){ label_nest_level: 0, // used by parseLinkLabel method pos: 0, refmap: {}, + memo: {}, match: match, peek: peek, spnl: spnl, -- cgit v1.2.3 From a56eca884caec58308387acffb9813b75241f0be Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 7 Sep 2014 22:12:44 -0700 Subject: New strategy: did parseNewlines, parseString. --- js/stmd.js | 59 +++++++++++++++++++++++++++-------------------------------- 1 file changed, 27 insertions(+), 32 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 63234f6..1de6315 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -71,7 +71,7 @@ var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/; // Matches a character with a special meaning in markdown, // or a string of non-special characters. -var reMain = /^(?:[\n`\[\]\\!<&*_]|[^\n`\[\]\\!<&*_]+)/m; +var reMain = /^(?: +(?!\n)|[\n `\[\]\\!<&*_]|[^\n `\[\]\\!<&*_]+)/m; // UTILITY FUNCTIONS @@ -438,7 +438,7 @@ var parseLinkLabel = function() { this.parseBackticks([]); break; case '<': - this.parseAutolink([]) || this.parseHtmlTag([]) || this.parseString([]); + this.parseAutolink([]) || this.parseHtmlTag([]) || this.parseString(); break; case '[': // nested [] nest_level++; @@ -452,7 +452,7 @@ var parseLinkLabel = function() { this.parseEscaped([]); break; default: - this.parseString([]); + this.parseString(); } } if (c === ']') { @@ -559,34 +559,25 @@ var parseEntity = function(inlines) { // Parse a run of ordinary characters, or a single character with // a special meaning in markdown, as a plain string, adding to inlines. -var parseString = function(inlines) { +var parseString = function() { var m; if ((m = this.match(reMain))) { - inlines.push({ t: 'Str', c: m }); - return m.length; + return { t: 'Str', c: m }; } else { - return 0; + return null; } }; // Parse a newline. If it was preceded by two spaces, return a hard // line break; otherwise a soft line break. -var parseNewline = function(inlines) { - if (this.peek() == '\n') { - this.pos++; - var last = inlines[inlines.length - 1]; - if (last && last.t == 'Str' && last.c.slice(-2) == ' ') { - last.c = last.c.replace(/ *$/,''); - inlines.push({ t: 'Hardbreak' }); - } else { - if (last && last.t == 'Str' && last.c.slice(-1) == ' ') { - last.c = last.c.slice(0, -1); - } - inlines.push({ t: 'Softbreak' }); - } - return 1; +var parseNewline = function() { + var m = this.match(/ *\n/); + if (m.length > 2) { + return { t: 'Hardbreak' }; + } else if (m.length > 0) { + return { t: 'Softbreak' }; } else { - return 0; + return null; } }; @@ -670,20 +661,20 @@ var parseReference = function(s, refmap) { }; // Parse the next inline element in subject, advancing subject position -// and adding the result to 'inlines'. -var parseInline = function(inlines) { +// and returning the inline parsed. +var parseInline = function() { var startpos = this.pos; var memoized = this.memo[startpos]; if (memoized) { - inlines.push(memoized.inlines); - this.pos += memoized.len; - return memoized.len; + this.pos = memoized.endpos; + return memoized.inline; } var c = this.peek(); var res; switch(c) { case '\n': - res = this.parseNewline(inlines); + case ' ': + res = this.parseNewline(); break; case '\\': res = this.parseEscaped(inlines); @@ -711,10 +702,11 @@ var parseInline = function(inlines) { default: } if (!res) { - res = this.parseString(inlines); + res = this.parseString(); } - if (res > 0) { - this.memo[startpos] = { inlines: inlines[inlines.length - 1], len: res }; + if (res) { + this.memo[startpos] = { inline: res, + endpos: this.pos - startpos }; } return res; }; @@ -726,7 +718,10 @@ var parseInlines = function(s, refmap) { this.refmap = refmap || {}; this.memo = {}; var inlines = []; - while (this.parseInline(inlines)) ; + var next_inline; + while (next_inline = this.parseInline(inlines)) { + inlines.push(next_inline); + } return inlines; }; -- cgit v1.2.3 From 70976e9cfa26a83e1cf74cac79e36ba771567b0f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 7 Sep 2014 22:15:41 -0700 Subject: Did parseBackslash (used to be parseEscaped). --- js/stmd.js | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 1de6315..870a253 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -150,8 +150,7 @@ var spnl = function() { // All of the parsers below try to match something at the current position // in the subject. If they succeed in matching anything, they -// push an inline element onto the 'inlines' list. They return the -// number of characters parsed (possibly 0). +// return the inline matched, advancing the subject. // Attempt to parse backticks, adding either a backtick code span or a // literal sequence of backticks to the 'inlines' list. @@ -182,25 +181,22 @@ var parseBackticks = function(inlines) { // Parse a backslash-escaped special character, adding either the escaped // character, a hard line break (if the backslash is followed by a newline), // or a literal backslash to the 'inlines' list. -var parseEscaped = function(inlines) { +var parseBackslash = function() { var subj = this.subject, pos = this.pos; if (subj[pos] === '\\') { if (subj[pos + 1] === '\n') { - inlines.push({ t: 'Hardbreak' }); this.pos = this.pos + 2; - return 2; + return { t: 'Hardbreak' }; } else if (reEscapable.test(subj[pos + 1])) { - inlines.push({ t: 'Str', c: subj[pos + 1] }); this.pos = this.pos + 2; - return 2; + return { t: 'Str', c: subj[pos + 1] }; } else { this.pos++; - inlines.push({t: 'Str', c: '\\'}); - return 1; + return {t: 'Str', c: '\\'}; } } else { - return 0; + return null; } }; @@ -449,7 +445,7 @@ var parseLinkLabel = function() { this.pos++; break; case '\\': - this.parseEscaped([]); + this.parseBackslash(); break; default: this.parseString(); @@ -677,7 +673,7 @@ var parseInline = function() { res = this.parseNewline(); break; case '\\': - res = this.parseEscaped(inlines); + res = this.parseBackslash(); break; case '`': res = this.parseBackticks(inlines); @@ -737,7 +733,7 @@ function InlineParser(){ peek: peek, spnl: spnl, parseBackticks: parseBackticks, - parseEscaped: parseEscaped, + parseBackslash: parseBackslash, parseAutolink: parseAutolink, parseHtmlTag: parseHtmlTag, scanDelims: scanDelims, -- cgit v1.2.3 From cbd2da6c9585bb5070cbac8b964617140047456e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 7 Sep 2014 23:18:56 -0700 Subject: Shell of parseEmphasis. --- js/stmd.js | 81 +++++++++++++++++++++++++++++++------------------------------- 1 file changed, 40 insertions(+), 41 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 870a253..6d86c30 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -71,7 +71,7 @@ var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/; // Matches a character with a special meaning in markdown, // or a string of non-special characters. -var reMain = /^(?: +(?!\n)|[\n `\[\]\\!<&*_]|[^\n `\[\]\\!<&*_]+)/m; +var reMain = /^(?: +|[\n`\[\]\\!<&*_]|[^\n `\[\]\\!<&*_]+)/m; // UTILITY FUNCTIONS @@ -262,59 +262,51 @@ var scanDelims = function(c) { can_close: can_close }; }; -// Attempt to parse emphasis or strong emphasis in an efficient way, -// with no backtracking. -var parseEmphasis = function(inlines) { +// Attempt to parse emphasis or strong emphasis. +var parseEmphasis = function() { var startpos = this.pos; var c ; var first_close = 0; - var nxt = this.peek(); - if (nxt == '*' || nxt == '_') { - c = nxt; - } else { - return 0; + var c = this.peek(); + if (!(c === '*' || c === '_')) { + return null; } var numdelims; var delimpos; + var inlines = []; // Get opening delimiters. res = this.scanDelims(c); numdelims = res.numdelims; - this.pos += numdelims; - // We provisionally add a literal string. If we match appropriate - // closing delimiters, we'll change this to Strong or Emph. - inlines.push({t: 'Str', - c: this.subject.substr(this.pos - numdelims, numdelims)}); - // Record the position of this opening delimiter: - delimpos = inlines.length - 1; if (!res.can_open || numdelims === 0) { - return 0; + this.pos = startpos; + return null; } + this.pos += numdelims; + var first_close_delims = 0; + var next_inline; switch (numdelims) { case 1: // we started with * or _ while (true) { res = this.scanDelims(c); if (res.numdelims >= 1 && res.can_close) { - this.pos += 1; - // Convert the inline at delimpos, currently a string with the delim, - // into an Emph whose contents are the succeeding inlines - inlines[delimpos].t = 'Emph'; - inlines[delimpos].c = inlines.slice(delimpos + 1); - inlines.splice(delimpos + 1); - break; + this.pos += 1; + return {t: 'Emph', c: inlines}; + } else if (next_inline = this.parseInline(inlines)) { + inlines.push(next_inline); } else { - if (this.parseInline(inlines) === 0) { - break; - } + // didn't find closing delimiter + this.pos = startpos; + return null; } } - return (this.pos - startpos); +/* case 2: // We started with ** or __ while (true) { res = this.scanDelims(c); @@ -373,7 +365,7 @@ var parseEmphasis = function(inlines) { } } return (this.pos - startpos); - +*/ default: return res; } @@ -557,7 +549,7 @@ var parseEntity = function(inlines) { // a special meaning in markdown, as a plain string, adding to inlines. var parseString = function() { var m; - if ((m = this.match(reMain))) { + if (m = this.match(reMain)) { return { t: 'Str', c: m }; } else { return null; @@ -567,14 +559,15 @@ var parseString = function() { // Parse a newline. If it was preceded by two spaces, return a hard // line break; otherwise a soft line break. var parseNewline = function() { - var m = this.match(/ *\n/); - if (m.length > 2) { - return { t: 'Hardbreak' }; - } else if (m.length > 0) { - return { t: 'Softbreak' }; - } else { - return null; + var m = this.match(/^ *\n/); + if (m) { + if (m.length > 2) { + return { t: 'Hardbreak' }; + } else if (m.length > 0) { + return { t: 'Softbreak' }; + } } + return null; }; // Attempt to parse an image. If the opening '!' is not followed @@ -666,6 +659,9 @@ var parseInline = function() { return memoized.inline; } var c = this.peek(); + if (!c) { + return null; + } var res; switch(c) { case '\n': @@ -680,7 +676,7 @@ var parseInline = function() { break; case '*': case '_': - res = this.parseEmphasis(inlines); + res = this.parseEmphasis(); break; case '[': res = this.parseLink(inlines); @@ -696,13 +692,16 @@ var parseInline = function() { res = this.parseEntity(inlines); break; default: - } - if (!res) { res = this.parseString(); + break; + } + if (res === null) { + this.pos += 1; + res = {t: 'Str', c: c}; } if (res) { this.memo[startpos] = { inline: res, - endpos: this.pos - startpos }; + endpos: this.pos }; } return res; }; -- cgit v1.2.3 From 0e9674cbe56810b4c15386b1fc091777e9c7026b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 8 Sep 2014 07:25:49 -0700 Subject: parseEmphasis: added Strong and shell for triples. --- js/stmd.js | 84 ++++++++++++++++++++++++-------------------------------------- 1 file changed, 33 insertions(+), 51 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 6d86c30..753eff8 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -305,72 +305,54 @@ var parseEmphasis = function() { return null; } } + break; -/* case 2: // We started with ** or __ while (true) { res = this.scanDelims(c); if (res.numdelims >= 2 && res.can_close) { - this.pos += 2; - inlines[delimpos].t = 'Strong'; - inlines[delimpos].c = inlines.slice(delimpos + 1); - inlines.splice(delimpos + 1); - break; + this.pos += 2; + return {t: 'Strong', c: inlines}; + } else if (next_inline = this.parseInline(inlines)) { + inlines.push(next_inline); } else { - if (this.parseInline(inlines) === 0) { - break; - } + // didn't find closing delimiter + this.pos = startpos; + return null; } } - return (this.pos - startpos); + break; - case 3: // We started with *** or ___ + case 3: // We started with *** or ___ while (true) { - res = this.scanDelims(c); - if (res.numdelims >= 1 && res.numdelims <= 3 && res.can_close && - res.numdelims != first_close_delims) { - - if (first_close_delims === 1 && numdelims > 2) { - res.numdelims = 2; - } else if (first_close_delims === 2) { - res.numdelims = 1; - } else if (res.numdelims === 3) { - // If we opened with ***, then we interpret *** as ** followed by * - // giving us - res.numdelims = 1; - } - - this.pos += res.numdelims; - - if (first_close > 0) { // if we've already passed the first closer: - inlines[delimpos].t = first_close_delims === 1 ? 'Strong' : 'Emph'; - inlines[delimpos].c = [ - { t: first_close_delims === 1 ? 'Emph' : 'Strong', - c: inlines.slice(delimpos + 1, first_close)} - ].concat(inlines.slice(first_close + 1)); - inlines.splice(delimpos + 1); - break; - } else { // this is the first closer; for now, add literal string; - // we'll change this when he hit the second closer - inlines.push({t: 'Str', - c: this.subject.slice(this.pos - res.numdelims, - this.pos) }); - first_close = inlines.length - 1; - first_close_delims = res.numdelims; - } - } else { // parse another inline element, til we hit the end - if (this.parseInline(inlines) === 0) { - break; + res = this.scanDelims(c); + var numdelims = res.numdelims; + var can_close = res.can_close; + var first_delim === 0; + if (can_close && numdelims === 3 && first_delim === 0) { + // TODO - return Strong Emph with inlines + } else if (can_close && numdelims === 2 && first_delim === 0) { + // TODO - set first_delim, make inlines a Strong + } else if (can_close && numdelims === 1 && first_delim === 0) { + // TODO - set first_delim, make inlines an Emph + } else if (can_close && numdelims === 2 && first_delim === 1) { + // TODO - return Strong inlines + } else if (can_close && numdelims === 1 && first_delim === 2) { + // TODO - return Emph inlines + } else if (next_inline = this.parseInline(inlines)) { + inlines.push(next_inline); + } else { + // didn't find closing delimiter + this.pos = startpos; + return null; } - } } - return (this.pos - startpos); -*/ + break; + default: - return res; } - return 0; + return null; }; // Attempt to parse link title (sans quotes), returning the string -- cgit v1.2.3 From 56f6b364c40563102779a84d1a1595226e1f1ccc Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 8 Sep 2014 08:21:59 -0700 Subject: Finished parseEmphasis. This seems to work properly. We now get proper results for `***hi**`. --- js/stmd.js | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 753eff8..d04fd04 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -324,21 +324,24 @@ var parseEmphasis = function() { break; case 3: // We started with *** or ___ + var first_delim = 0; while (true) { res = this.scanDelims(c); var numdelims = res.numdelims; var can_close = res.can_close; - var first_delim === 0; + this.pos += numdelims; if (can_close && numdelims === 3 && first_delim === 0) { - // TODO - return Strong Emph with inlines + return {t: 'Strong', c: [{t: 'Emph', c: inlines}]}; } else if (can_close && numdelims === 2 && first_delim === 0) { - // TODO - set first_delim, make inlines a Strong + first_delim = 2; + inlines = [{t: 'Strong', c: inlines}]; } else if (can_close && numdelims === 1 && first_delim === 0) { - // TODO - set first_delim, make inlines an Emph + first_delim = 1; + inlines = [{t: 'Emph', c: inlines}]; } else if (can_close && numdelims === 2 && first_delim === 1) { - // TODO - return Strong inlines + return {t: 'Strong', c: inlines}; } else if (can_close && numdelims === 1 && first_delim === 2) { - // TODO - return Emph inlines + return {t: 'Emph', c: inlines}; } else if (next_inline = this.parseInline(inlines)) { inlines.push(next_inline); } else { -- cgit v1.2.3 From 0a345c93475fab82d7cd49ed84450a882bab4b14 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 8 Sep 2014 09:07:23 -0700 Subject: Did parseBackticks. --- js/stmd.js | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index d04fd04..524e99f 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -152,9 +152,9 @@ var spnl = function() { // in the subject. If they succeed in matching anything, they // return the inline matched, advancing the subject. -// Attempt to parse backticks, adding either a backtick code span or a -// literal sequence of backticks to the 'inlines' list. -var parseBackticks = function(inlines) { +// Attempt to parse backticks, returning either a backtick code span or a +// literal sequence of backticks. +var parseBackticks = function() { var startpos = this.pos; var ticks = this.match(/^`+/); if (!ticks) { @@ -165,17 +165,15 @@ var parseBackticks = function(inlines) { var match; while (!foundCode && (match = this.match(/`+/m))) { if (match == ticks) { - inlines.push({ t: 'Code', c: this.subject.slice(afterOpenTicks, + return { t: 'Code', c: this.subject.slice(afterOpenTicks, this.pos - ticks.length) .replace(/[ \n]+/g,' ') - .trim() }); - return (this.pos - startpos); + .trim() }; } } // If we got here, we didn't match a closing backtick sequence. - inlines.push({ t: 'Str', c: ticks }); this.pos = afterOpenTicks; - return (this.pos - startpos); + return { t: 'Str', c: ticks }; }; // Parse a backslash-escaped special character, adding either the escaped @@ -657,7 +655,7 @@ var parseInline = function() { res = this.parseBackslash(); break; case '`': - res = this.parseBackticks(inlines); + res = this.parseBackticks(); break; case '*': case '_': -- cgit v1.2.3 From f9b9ed96c5e34a1a7224c6df825f52ef2ce2e368 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 8 Sep 2014 09:08:19 -0700 Subject: Did parseEntity. --- js/stmd.js | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 524e99f..394ad06 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -517,14 +517,13 @@ var parseLink = function(inlines) { return 0; }; -// Attempt to parse an entity, adding to inlines if successful. -var parseEntity = function(inlines) { +// Attempt to parse an entity, return Entity object if successful. +var parseEntity = function() { var m; if ((m = this.match(/^&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});/i))) { - inlines.push({ t: 'Entity', c: m }); - return m.length; + return { t: 'Entity', c: m }; } else { - return 0; + return null; } }; @@ -672,7 +671,7 @@ var parseInline = function() { this.parseHtmlTag(inlines); break; case '&': - res = this.parseEntity(inlines); + res = this.parseEntity(); break; default: res = this.parseString(); -- cgit v1.2.3 From 33a425b931b844691b5e4ca4b63101d8566ab159 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 8 Sep 2014 09:09:14 -0700 Subject: Did parseHtmLTag. --- js/stmd.js | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 394ad06..5fb0fb5 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -221,10 +221,9 @@ var parseAutolink = function(inlines) { var parseHtmlTag = function(inlines) { var m = this.match(reHtmlTag); if (m) { - inlines.push({ t: 'Html', c: m }); - return m.length; + return { t: 'Html', c: m }; } else { - return 0; + return null; } }; @@ -668,7 +667,7 @@ var parseInline = function() { break; case '<': res = this.parseAutolink(inlines) || - this.parseHtmlTag(inlines); + this.parseHtmlTag(); break; case '&': res = this.parseEntity(); -- cgit v1.2.3 From 9ead350be9302268214801ef966f4f50efc4996a Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 8 Sep 2014 09:10:47 -0700 Subject: Did parseAutolink. --- js/stmd.js | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 5fb0fb5..330ebef 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -199,21 +199,21 @@ var parseBackslash = function() { }; // Attempt to parse an autolink (URL or email in pointy brackets). -var parseAutolink = function(inlines) { +var parseAutolink = function() { var m; var dest; if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink dest = m.slice(1,-1); - inlines.push({ t: 'Link', label: [{ t: 'Str', c: dest }], - destination: 'mailto:' + dest }); - return m.length; + return {t: 'Link', + label: [{ t: 'Str', c: dest }], + destination: 'mailto:' + dest }; } else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) { dest = m.slice(1,-1); - inlines.push({ t: 'Link', label: [{ t: 'Str', c: dest }], - destination: dest }); - return m.length; + return { t: 'Link', + label: [{ t: 'Str', c: dest }], + destination: dest }; } else { - return 0; + return null; } }; @@ -666,7 +666,7 @@ var parseInline = function() { res = this.parseImage(inlines); break; case '<': - res = this.parseAutolink(inlines) || + res = this.parseAutolink() || this.parseHtmlTag(); break; case '&': -- cgit v1.2.3 From 3810f76a5939023d01e7ab082a6693e4634f15ad Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 8 Sep 2014 09:11:17 -0700 Subject: Cleanup. --- js/stmd.js | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 330ebef..5b97666 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -405,10 +405,10 @@ var parseLinkLabel = function() { while ((c = this.peek()) && (c != ']' || nest_level > 0)) { switch (c) { case '`': - this.parseBackticks([]); + this.parseBackticks(); break; case '<': - this.parseAutolink([]) || this.parseHtmlTag([]) || this.parseString(); + this.parseAutolink() || this.parseHtmlTag() || this.parseString(); break; case '[': // nested [] nest_level++; @@ -666,8 +666,7 @@ var parseInline = function() { res = this.parseImage(inlines); break; case '<': - res = this.parseAutolink() || - this.parseHtmlTag(); + res = this.parseAutolink() || this.parseHtmlTag(); break; case '&': res = this.parseEntity(); -- cgit v1.2.3 From 2f718ac9a7e314ae1e195e040664b7478e93416d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 8 Sep 2014 09:17:41 -0700 Subject: Completed conversion to memoized strategy. Test suite runs, but many failures. --- js/stmd.js | 62 +++++++++++++++++++++++++++----------------------------------- 1 file changed, 27 insertions(+), 35 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 5b97666..8fc7f20 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -218,7 +218,7 @@ var parseAutolink = function() { }; // Attempt to parse a raw HTML tag. -var parseHtmlTag = function(inlines) { +var parseHtmlTag = function() { var m = this.match(reHtmlTag); if (m) { return { t: 'Html', c: m }; @@ -294,7 +294,7 @@ var parseEmphasis = function() { if (res.numdelims >= 1 && res.can_close) { this.pos += 1; return {t: 'Emph', c: inlines}; - } else if (next_inline = this.parseInline(inlines)) { + } else if (next_inline = this.parseInline()) { inlines.push(next_inline); } else { // didn't find closing delimiter @@ -310,7 +310,7 @@ var parseEmphasis = function() { if (res.numdelims >= 2 && res.can_close) { this.pos += 2; return {t: 'Strong', c: inlines}; - } else if (next_inline = this.parseInline(inlines)) { + } else if (next_inline = this.parseInline()) { inlines.push(next_inline); } else { // didn't find closing delimiter @@ -339,7 +339,7 @@ var parseEmphasis = function() { return {t: 'Strong', c: inlines}; } else if (can_close && numdelims === 1 && first_delim === 2) { return {t: 'Emph', c: inlines}; - } else if (next_inline = this.parseInline(inlines)) { + } else if (next_inline = this.parseInline()) { inlines.push(next_inline); } else { // didn't find closing delimiter @@ -446,9 +446,8 @@ var parseRawLabel = function(s) { return new InlineParser().parse(s.substr(1, s.length - 2), {}); }; -// Attempt to parse a link. If successful, add the link to -// inlines. -var parseLink = function(inlines) { +// Attempt to parse a link. If successful, return the link. +var parseLink = function() { var startpos = this.pos; var reflabel; var n; @@ -474,11 +473,10 @@ var parseLink = function(inlines) { (title = this.parseLinkTitle() || '') || true) && this.spnl() && this.match(/^\)/)) { - inlines.push({ t: 'Link', - destination: dest, - title: title, - label: parseRawLabel(rawlabel) }); - return this.pos - startpos; + return { t: 'Link', + destination: dest, + title: title, + label: parseRawLabel(rawlabel) }; } else { this.pos = startpos; return 0; @@ -502,18 +500,16 @@ var parseLink = function(inlines) { // lookup rawlabel in refmap var link = this.refmap[normalizeReference(reflabel)]; if (link) { - inlines.push({t: 'Link', - destination: link.destination, - title: link.title, - label: parseRawLabel(rawlabel) }); - return this.pos - startpos; + return {t: 'Link', + destination: link.destination, + title: link.title, + label: parseRawLabel(rawlabel) }; } else { - this.pos = startpos; - return 0; + return null; } // Nothing worked, rewind: this.pos = startpos; - return 0; + return null; }; // Attempt to parse an entity, return Entity object if successful. @@ -552,22 +548,18 @@ var parseNewline = function() { }; // Attempt to parse an image. If the opening '!' is not followed -// by a link, add a literal '!' to inlines. -var parseImage = function(inlines) { +// by a link, return a literal '!'. +var parseImage = function() { if (this.match(/^!/)) { - var n = this.parseLink(inlines); - if (n === 0) { - inlines.push({ t: 'Str', c: '!' }); - return 1; - } else if (inlines[inlines.length - 1] && - inlines[inlines.length - 1].t == 'Link') { - inlines[inlines.length - 1].t = 'Image'; - return n+1; + var link = this.parseLink(); + if (link) { + link.t = 'Image'; + return link; } else { - throw "Shouldn't happen"; + return { t: 'Str', c: '!' }; } } else { - return 0; + return null; } }; @@ -660,10 +652,10 @@ var parseInline = function() { res = this.parseEmphasis(); break; case '[': - res = this.parseLink(inlines); + res = this.parseLink(); break; case '!': - res = this.parseImage(inlines); + res = this.parseImage(); break; case '<': res = this.parseAutolink() || this.parseHtmlTag(); @@ -694,7 +686,7 @@ var parseInlines = function(s, refmap) { this.memo = {}; var inlines = []; var next_inline; - while (next_inline = this.parseInline(inlines)) { + while (next_inline = this.parseInline()) { inlines.push(next_inline); } return inlines; -- cgit v1.2.3 From a407869dfc062d6ec24f00482aae6019e083d8c7 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 8 Sep 2014 09:21:07 -0700 Subject: Fixed rewind on parseLabel. 14 test failures now, all with emphasis. IN most of all of these cases, the examples in the spec seem to be mistakes, given what the spec says. More troubling, performance is down from around 220 to 83. This needs investigation. --- js/stmd.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 8fc7f20..7d0a532 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -456,7 +456,7 @@ var parseLink = function() { n = this.parseLinkLabel(); if (n === 0) { - return 0; + return null; } var afterlabel = this.pos; var rawlabel = this.subject.substr(startpos, n); @@ -479,7 +479,7 @@ var parseLink = function() { label: parseRawLabel(rawlabel) }; } else { this.pos = startpos; - return 0; + return null; } } // If we're here, it wasn't an explicit link. Try to parse a reference link. @@ -505,6 +505,7 @@ var parseLink = function() { title: link.title, label: parseRawLabel(rawlabel) }; } else { + this.pos = startpos; return null; } // Nothing worked, rewind: -- cgit v1.2.3 From 9dde9c96a7b7fb9810a60ae65dd2623b03b83da8 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 8 Sep 2014 09:51:40 -0700 Subject: Fixed reMain regex for better performance. --- js/stmd.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/js/stmd.js b/js/stmd.js index 7d0a532..cfd5051 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -71,7 +71,7 @@ var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/; // Matches a character with a special meaning in markdown, // or a string of non-special characters. -var reMain = /^(?: +|[\n`\[\]\\!<&*_]|[^\n `\[\]\\!<&*_]+)/m; +var reMain = /^(?:[\n`\[\]\\!<&*_]|(?: *[^\n `\[\]\\!<&*_]+)+|[ \n]+)/m; // UTILITY FUNCTIONS -- cgit v1.2.3 From e829aaf75ff5feb57c9c0f1a0cd260903116752a Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 8 Sep 2014 15:56:04 -0700 Subject: Handle case with 4+ delimiters in a row. Spec says to skip these. --- js/stmd.js | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/js/stmd.js b/js/stmd.js index cfd5051..4b3d994 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -277,6 +277,11 @@ var parseEmphasis = function() { res = this.scanDelims(c); numdelims = res.numdelims; + if (numdelims >= 4) { + this.pos += numdelims; + return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)}; + } + if (!res.can_open || numdelims === 0) { this.pos = startpos; return null; @@ -349,7 +354,7 @@ var parseEmphasis = function() { } break; - default: + default: // shouldn't happen } return null; -- cgit v1.2.3 From 977d40f2789eb4e22ba8380e99eab77e5860c21b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 9 Sep 2014 22:23:42 -0700 Subject: Simplified parseEmphasis. --- js/stmd.js | 73 ++++++++++++++------------------------------------------------ 1 file changed, 16 insertions(+), 57 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 4b3d994..aa21335 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -289,73 +289,32 @@ var parseEmphasis = function() { this.pos += numdelims; - var first_close_delims = 0; var next_inline; - switch (numdelims) { - case 1: // we started with * or _ - while (true) { - res = this.scanDelims(c); - if (res.numdelims >= 1 && res.can_close) { - this.pos += 1; - return {t: 'Emph', c: inlines}; - } else if (next_inline = this.parseInline()) { - inlines.push(next_inline); - } else { - // didn't find closing delimiter - this.pos = startpos; - return null; - } - } - break; - - case 2: // We started with ** or __ - while (true) { - res = this.scanDelims(c); - if (res.numdelims >= 2 && res.can_close) { - this.pos += 2; - return {t: 'Strong', c: inlines}; - } else if (next_inline = this.parseInline()) { - inlines.push(next_inline); - } else { - // didn't find closing delimiter - this.pos = startpos; - return null; - } - } - break; - - case 3: // We started with *** or ___ - var first_delim = 0; + var delims_to_match = numdelims; while (true) { res = this.scanDelims(c); - var numdelims = res.numdelims; - var can_close = res.can_close; - this.pos += numdelims; - if (can_close && numdelims === 3 && first_delim === 0) { - return {t: 'Strong', c: [{t: 'Emph', c: inlines}]}; - } else if (can_close && numdelims === 2 && first_delim === 0) { - first_delim = 2; - inlines = [{t: 'Strong', c: inlines}]; - } else if (can_close && numdelims === 1 && first_delim === 0) { - first_delim = 1; - inlines = [{t: 'Emph', c: inlines}]; - } else if (can_close && numdelims === 2 && first_delim === 1) { - return {t: 'Strong', c: inlines}; - } else if (can_close && numdelims === 1 && first_delim === 2) { - return {t: 'Emph', c: inlines}; + if (res.can_close) { + if (res.numdelims >= 2 && delims_to_match >= 2) { + delims_to_match -= 2; + this.pos += 2; + inlines = [{t: 'Strong', c: inlines}]; + } else if (res.numdelims >= 1 && delims_to_match >= 1) { + delims_to_match -= 1; + this.pos += 1; + inlines = [{t: 'Emph', c: inlines}]; + } + if (delims_to_match === 0) { + return inlines[0]; + } } else if (next_inline = this.parseInline()) { inlines.push(next_inline); } else { // didn't find closing delimiter - this.pos = startpos; - return null; + this.pos = startpos + numdelims; + return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)}; } } - break; - - default: // shouldn't happen - } return null; }; -- cgit v1.2.3 From bd271515770a17f3c320eb394f2012ccd51a417b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 9 Sep 2014 22:30:54 -0700 Subject: spec: change nesting order of strong/emph in ***a***. --- spec.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec.txt b/spec.txt index 4a9e9fd..88c8dea 100644 --- a/spec.txt +++ b/spec.txt @@ -4392,13 +4392,13 @@ The rules are sufficient for the following nesting patterns: . ***foo bar*** . -

foo bar

+

foo bar

. . ___foo bar___ . -

foo bar

+

foo bar

. . -- cgit v1.2.3 From 905b5d4d11cf1e56137fea1e68eb503863f1b113 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 10 Sep 2014 08:42:39 -0700 Subject: Revert "spec: change nesting order of strong/emph in ***a***." This reverts commit 49a03b7666e2901d1ab2813fc0bdd23968d22979. --- spec.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec.txt b/spec.txt index 88c8dea..4a9e9fd 100644 --- a/spec.txt +++ b/spec.txt @@ -4392,13 +4392,13 @@ The rules are sufficient for the following nesting patterns: . ***foo bar*** . -

foo bar

+

foo bar

. . ___foo bar___ . -

foo bar

+

foo bar

. . -- cgit v1.2.3 From 6df247e24f2b12d6d1440001877967e2f7c90093 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 10 Sep 2014 08:45:24 -0700 Subject: Special-case ***xx*** as strong/em. --- js/stmd.js | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index aa21335..7c7362e 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -294,12 +294,16 @@ var parseEmphasis = function() { var delims_to_match = numdelims; while (true) { res = this.scanDelims(c); + numclosedelims = res.numdelims; if (res.can_close) { - if (res.numdelims >= 2 && delims_to_match >= 2) { + if (numclosedelims === 3 && delims_to_match === 3) { + this.pos += 3; + return {t: 'Strong', c: [{t: 'Emph', c: inlines}]}; + } else if (numclosedelims >= 2 && delims_to_match >= 2) { delims_to_match -= 2; this.pos += 2; inlines = [{t: 'Strong', c: inlines}]; - } else if (res.numdelims >= 1 && delims_to_match >= 1) { + } else if (numclosedelims >= 1 && delims_to_match >= 1) { delims_to_match -= 1; this.pos += 1; inlines = [{t: 'Emph', c: inlines}]; -- cgit v1.2.3 From e245f1a2d5ec76807633806a5af1ebe52fe5bd6d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 10 Sep 2014 08:56:20 -0700 Subject: Updated spec (but not yet examples) with new rules. These reflect the current parsing algorithm. We now get a symmetry that we lacked before: **a* b* *a *b** are both emphasis within emphasis. One asymmetry remains: **a* has no emphasis, while *a** has emphasis. Further tweaking of the algorithm could regularize this. --- spec.txt | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/spec.txt b/spec.txt index 4a9e9fd..37f92c5 100644 --- a/spec.txt +++ b/spec.txt @@ -4024,7 +4024,7 @@ for efficient parsing strategies that do not backtrack: (a) it is not part of a sequence of four or more unescaped `*`s, (b) it is not followed by whitespace, and (c) either it is not followed by a `*` character or it is - followed immediately by strong emphasis. + followed immediately by emphasis or strong emphasis. 2. A single `_` character [can open emphasis](#can-open-emphasis) iff @@ -4032,7 +4032,7 @@ for efficient parsing strategies that do not backtrack: (b) it is not followed by whitespace, (c) is is not preceded by an ASCII alphanumeric character, and (d) either it is not followed by a `_` character or it is - followed immediately by strong emphasis. + followed immediately by emphasis or strong emphasis. 3. A single `*` character [can close emphasis](#can-close-emphasis) iff @@ -4088,6 +4088,11 @@ for efficient parsing strategies that do not backtrack: emphasis](#can-close-strong-emphasis), and that uses the same character (`_` or `*`) as the opening delimiter, is reached. +11. In case of ambiguity, strong emphasis takes precedence. Thus, + `**foo**` is `foo`, not `foo`, + and `***foo***` is `foo`, not + `foo` or `foo`. + These rules can be illustrated through a series of examples. Simple emphasis: -- cgit v1.2.3 From 5cd513026fe49e83cfd544a7b375bf4fa1466b21 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 10 Sep 2014 09:00:40 -0700 Subject: Updated test cases in spec to reflect last change. --- spec.txt | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/spec.txt b/spec.txt index 37f92c5..e1aa502 100644 --- a/spec.txt +++ b/spec.txt @@ -4612,17 +4612,11 @@ Note that there are some asymmetries here: **foo* bar* .

foo bar

-

**foo* bar*

+

foo bar

. More cases with mismatched delimiters: -. -**foo* bar* -. -

**foo* bar*

-. - . *bar*** . -- cgit v1.2.3 From 5f56a1988ff8edfc020c97e37dbf834b499157d6 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 10 Sep 2014 09:30:23 -0700 Subject: Fixed bug. --- js/stmd.js | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 7c7362e..0cfb6b3 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -70,8 +70,9 @@ var reAllTab = /\t/g; var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/; // Matches a character with a special meaning in markdown, -// or a string of non-special characters. -var reMain = /^(?:[\n`\[\]\\!<&*_]|(?: *[^\n `\[\]\\!<&*_]+)+|[ \n]+)/m; +// or a string of non-special characters. Note: we match +// clumps of _ or * or `, because they need to be handled in groups. +var reMain = /^(?:[_*`\n]+|[\[\]\\!<&*_]|(?: *[^\n `\[\]\\!<&*_]+)+|[ \n]+)/m; // UTILITY FUNCTIONS @@ -277,16 +278,16 @@ var parseEmphasis = function() { res = this.scanDelims(c); numdelims = res.numdelims; - if (numdelims >= 4) { - this.pos += numdelims; - return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)}; - } - - if (!res.can_open || numdelims === 0) { + if (numdelims === 0) { this.pos = startpos; return null; } + if (numdelims >= 4 || !res.can_open) { + this.pos += numdelims; + return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)}; + } + this.pos += numdelims; var next_inline; -- cgit v1.2.3 From 23c24d88401a4dbb8319c8c1fc6bbb0c44fb29cb Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 10 Sep 2014 23:06:22 -0700 Subject: Added last_closer to Inline object. This helps us avoid unneeded backtracking in pathological input of the form: *a **a *a **a *a etc. If we get to position k without finding a closing delimiter, then backtrack to 1, we can assume we won't find a closing delimiter when parsing forward again. This could no doubt be polished up, e.g. by making it sensitive to the kind of delimiter. --- js/stmd.js | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 0cfb6b3..fdbc188 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -291,14 +291,19 @@ var parseEmphasis = function() { this.pos += numdelims; var next_inline; + var last_closer = null; - var delims_to_match = numdelims; - while (true) { + var delims_to_match = numdelims; + while (this.last_closer === null || this.last_closer >= this.pos) { res = this.scanDelims(c); numclosedelims = res.numdelims; if (res.can_close) { + if (last_closer < this.pos) { + last_closer = this.pos; + } if (numclosedelims === 3 && delims_to_match === 3) { this.pos += 3; + this.last_closer = null; return {t: 'Strong', c: [{t: 'Emph', c: inlines}]}; } else if (numclosedelims >= 2 && delims_to_match >= 2) { delims_to_match -= 2; @@ -310,18 +315,24 @@ var parseEmphasis = function() { inlines = [{t: 'Emph', c: inlines}]; } if (delims_to_match === 0) { + this.last_closer = null; return inlines[0]; } } else if (next_inline = this.parseInline()) { inlines.push(next_inline); } else { - // didn't find closing delimiter - this.pos = startpos + numdelims; - return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)}; + break; } } - return null; + // didn't find closing delimiter + this.pos = startpos + numdelims; + if (last_closer === null) { + this.last_closer = startpos; + } else { + this.last_closer = last_closer; + } + return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)}; }; // Attempt to parse link title (sans quotes), returning the string @@ -654,6 +665,7 @@ var parseInlines = function(s, refmap) { this.pos = 0; this.refmap = refmap || {}; this.memo = {}; + this.last_closer = null; var inlines = []; var next_inline; while (next_inline = this.parseInline()) { @@ -670,6 +682,7 @@ function InlineParser(){ pos: 0, refmap: {}, memo: {}, + last_closer: null, match: match, peek: peek, spnl: spnl, -- cgit v1.2.3 From c11c900b618f6ca48f37ff1bdd2b9602317ec177 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 11 Sep 2014 09:04:09 -0700 Subject: Renamed last_closer -> last_emphasis_closer. --- js/stmd.js | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index fdbc188..fab3a51 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -291,19 +291,20 @@ var parseEmphasis = function() { this.pos += numdelims; var next_inline; - var last_closer = null; + var last_emphasis_closer = null; var delims_to_match = numdelims; - while (this.last_closer === null || this.last_closer >= this.pos) { + while (this.last_emphasis_closer === null || + this.last_emphasis_closer >= this.pos) { res = this.scanDelims(c); numclosedelims = res.numdelims; if (res.can_close) { - if (last_closer < this.pos) { - last_closer = this.pos; + if (last_emphasis_closer < this.pos) { + last_emphasis_closer = this.pos; } if (numclosedelims === 3 && delims_to_match === 3) { this.pos += 3; - this.last_closer = null; + this.last_emphasis_closer = null; return {t: 'Strong', c: [{t: 'Emph', c: inlines}]}; } else if (numclosedelims >= 2 && delims_to_match >= 2) { delims_to_match -= 2; @@ -315,7 +316,7 @@ var parseEmphasis = function() { inlines = [{t: 'Emph', c: inlines}]; } if (delims_to_match === 0) { - this.last_closer = null; + this.last_emphasis_closer = null; return inlines[0]; } } else if (next_inline = this.parseInline()) { @@ -327,10 +328,10 @@ var parseEmphasis = function() { // didn't find closing delimiter this.pos = startpos + numdelims; - if (last_closer === null) { - this.last_closer = startpos; + if (last_emphasis_closer === null) { + this.last_emphasis_closer = startpos; } else { - this.last_closer = last_closer; + this.last_emphasis_closer = last_emphasis_closer; } return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)}; }; @@ -665,7 +666,7 @@ var parseInlines = function(s, refmap) { this.pos = 0; this.refmap = refmap || {}; this.memo = {}; - this.last_closer = null; + this.last_emphasis_closer = null; var inlines = []; var next_inline; while (next_inline = this.parseInline()) { @@ -679,10 +680,10 @@ function InlineParser(){ return { subject: '', label_nest_level: 0, // used by parseLinkLabel method + last_emphasis_closer: null, // used by parseEmphasis method pos: 0, refmap: {}, memo: {}, - last_closer: null, match: match, peek: peek, spnl: spnl, -- cgit v1.2.3 From 2fc6c0d06f4199f4e7ee6fb0e46337bfc6749d24 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 11 Sep 2014 09:10:03 -0700 Subject: Add check for null in last_emphasis_closer. --- js/stmd.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/js/stmd.js b/js/stmd.js index fab3a51..1b82fd5 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -299,7 +299,8 @@ var parseEmphasis = function() { res = this.scanDelims(c); numclosedelims = res.numdelims; if (res.can_close) { - if (last_emphasis_closer < this.pos) { + if (last_emphasis_closer === null || + last_emphasis_closer < this.pos) { last_emphasis_closer = this.pos; } if (numclosedelims === 3 && delims_to_match === 3) { -- cgit v1.2.3 From 9c218c305e175183abd577c07daec5daf230801c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 11 Sep 2014 09:24:24 -0700 Subject: Clarified code logic for last_emphasis_closer. --- js/stmd.js | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 1b82fd5..250814e 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -294,8 +294,12 @@ var parseEmphasis = function() { var last_emphasis_closer = null; var delims_to_match = numdelims; - while (this.last_emphasis_closer === null || - this.last_emphasis_closer >= this.pos) { + + // We need not look for closers if we have already recorded that + // there are no closers past this point. + if (this.last_emphasis_closer === null || + this.last_emphasis_closer >= this.pos) { + while (true) { res = this.scanDelims(c); numclosedelims = res.numdelims; if (res.can_close) { @@ -325,11 +329,13 @@ var parseEmphasis = function() { } else { break; } + } } // didn't find closing delimiter this.pos = startpos + numdelims; if (last_emphasis_closer === null) { + // we know there are no closers after startpos, so: this.last_emphasis_closer = startpos; } else { this.last_emphasis_closer = last_emphasis_closer; -- cgit v1.2.3 From e6c06dbb715f59b5b9dd4ad7fb7090f83e3ad90d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 11 Sep 2014 09:26:23 -0700 Subject: Reindented source with js2-mode. --- js/stmd.js | 2936 ++++++++++++++++++++++++++++++------------------------------ 1 file changed, 1468 insertions(+), 1468 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 250814e..6cf65d4 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -11,1505 +11,1505 @@ (function(exports) { -// Some regexps used in inline parser: - -var ESCAPABLE = '[!"#$%&\'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]'; -var ESCAPED_CHAR = '\\\\' + ESCAPABLE; -var IN_DOUBLE_QUOTES = '"(' + ESCAPED_CHAR + '|[^"\\x00])*"'; -var IN_SINGLE_QUOTES = '\'(' + ESCAPED_CHAR + '|[^\'\\x00])*\''; -var IN_PARENS = '\\((' + ESCAPED_CHAR + '|[^)\\x00])*\\)'; -var REG_CHAR = '[^\\\\()\\x00-\\x20]'; -var IN_PARENS_NOSP = '\\((' + REG_CHAR + '|' + ESCAPED_CHAR + ')*\\)'; -var TAGNAME = '[A-Za-z][A-Za-z0-9]*'; -var BLOCKTAGNAME = '(?:article|header|aside|hgroup|iframe|blockquote|hr|body|li|map|button|object|canvas|ol|caption|output|col|p|colgroup|pre|dd|progress|div|section|dl|table|td|dt|tbody|embed|textarea|fieldset|tfoot|figcaption|th|figure|thead|footer|footer|tr|form|ul|h1|h2|h3|h4|h5|h6|video|script|style)'; -var ATTRIBUTENAME = '[a-zA-Z_:][a-zA-Z0-9:._-]*'; -var UNQUOTEDVALUE = "[^\"'=<>`\\x00-\\x20]+"; -var SINGLEQUOTEDVALUE = "'[^']*'"; -var DOUBLEQUOTEDVALUE = '"[^"]*"'; -var ATTRIBUTEVALUE = "(?:" + UNQUOTEDVALUE + "|" + SINGLEQUOTEDVALUE + "|" + DOUBLEQUOTEDVALUE + ")"; -var ATTRIBUTEVALUESPEC = "(?:" + "\\s*=" + "\\s*" + ATTRIBUTEVALUE + ")"; -var ATTRIBUTE = "(?:" + "\\s+" + ATTRIBUTENAME + ATTRIBUTEVALUESPEC + "?)"; -var OPENTAG = "<" + TAGNAME + ATTRIBUTE + "*" + "\\s*/?>"; -var CLOSETAG = "]"; -var OPENBLOCKTAG = "<" + BLOCKTAGNAME + ATTRIBUTE + "*" + "\\s*/?>"; -var CLOSEBLOCKTAG = "]"; -var HTMLCOMMENT = ""; -var PROCESSINGINSTRUCTION = "[<][?].*?[?][>]"; -var DECLARATION = "]*>"; -var CDATA = "])*\\]\\]>"; -var HTMLTAG = "(?:" + OPENTAG + "|" + CLOSETAG + "|" + HTMLCOMMENT + "|" + - PROCESSINGINSTRUCTION + "|" + DECLARATION + "|" + CDATA + ")"; -var HTMLBLOCKOPEN = "<(?:" + BLOCKTAGNAME + "[\\s/>]" + "|" + - "/" + BLOCKTAGNAME + "[\\s>]" + "|" + "[?!])"; - -var reHtmlTag = new RegExp('^' + HTMLTAG, 'i'); - -var reHtmlBlockOpen = new RegExp('^' + HTMLBLOCKOPEN, 'i'); - -var reLinkTitle = new RegExp( - '^(?:"(' + ESCAPED_CHAR + '|[^"\\x00])*"' + - '|' + - '\'(' + ESCAPED_CHAR + '|[^\'\\x00])*\'' + - '|' + - '\\((' + ESCAPED_CHAR + '|[^)\\x00])*\\))'); - -var reLinkDestinationBraces = new RegExp( - '^(?:[<](?:[^<>\\n\\\\\\x00]' + '|' + ESCAPED_CHAR + '|' + '\\\\)*[>])'); - -var reLinkDestination = new RegExp( - '^(?:' + REG_CHAR + '+|' + ESCAPED_CHAR + '|' + IN_PARENS_NOSP + ')*'); - -var reEscapable = new RegExp(ESCAPABLE); - -var reAllEscapedChar = new RegExp('\\\\(' + ESCAPABLE + ')', 'g'); - -var reEscapedChar = new RegExp('^\\\\(' + ESCAPABLE + ')'); - -var reAllTab = /\t/g; - -var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/; - -// Matches a character with a special meaning in markdown, -// or a string of non-special characters. Note: we match -// clumps of _ or * or `, because they need to be handled in groups. -var reMain = /^(?:[_*`\n]+|[\[\]\\!<&*_]|(?: *[^\n `\[\]\\!<&*_]+)+|[ \n]+)/m; - -// UTILITY FUNCTIONS - -// Replace backslash escapes with literal characters. -var unescape = function(s) { - return s.replace(reAllEscapedChar, '$1'); -}; - -// Returns true if string contains only space characters. -var isBlank = function(s) { - return /^\s*$/.test(s); -}; - -// Normalize reference label: collapse internal whitespace -// to single space, remove leading/trailing whitespace, case fold. -var normalizeReference = function(s) { - return s.trim() - .replace(/\s+/,' ') - .toUpperCase(); -}; - -// Attempt to match a regex in string s at offset offset. -// Return index of match or null. -var matchAt = function(re, s, offset) { - var res = s.slice(offset).match(re); - if (res) { - return offset + res.index; - } else { - return null; - } -}; - -// Convert tabs to spaces on each line using a 4-space tab stop. -var detabLine = function(text) { - if (text.indexOf('\t') == -1) { - return text; - } else { - var lastStop = 0; - return text.replace(reAllTab, function(match, offset) { - var result = ' '.slice((offset - lastStop) % 4); - lastStop = offset + 1; - return result; - }); - } -}; - -// INLINE PARSER - -// These are methods of an InlineParser object, defined below. -// An InlineParser keeps track of a subject (a string to be -// parsed) and a position in that subject. - -// If re matches at current position in the subject, advance -// position in subject and return the match; otherwise return null. -var match = function(re) { - var match = re.exec(this.subject.slice(this.pos)); - if (match) { - this.pos += match.index + match[0].length; - return match[0]; - } else { - return null; - } -}; - -// Returns the character at the current subject position, or null if -// there are no more characters. -var peek = function() { - return this.subject[this.pos] || null; -}; - -// Parse zero or more space characters, including at most one newline -var spnl = function() { - this.match(/^ *(?:\n *)?/); - return 1; -}; - -// All of the parsers below try to match something at the current position -// in the subject. If they succeed in matching anything, they -// return the inline matched, advancing the subject. - -// Attempt to parse backticks, returning either a backtick code span or a -// literal sequence of backticks. -var parseBackticks = function() { - var startpos = this.pos; - var ticks = this.match(/^`+/); - if (!ticks) { - return 0; - } - var afterOpenTicks = this.pos; - var foundCode = false; - var match; - while (!foundCode && (match = this.match(/`+/m))) { - if (match == ticks) { - return { t: 'Code', c: this.subject.slice(afterOpenTicks, - this.pos - ticks.length) - .replace(/[ \n]+/g,' ') - .trim() }; - } - } - // If we got here, we didn't match a closing backtick sequence. - this.pos = afterOpenTicks; - return { t: 'Str', c: ticks }; -}; - -// Parse a backslash-escaped special character, adding either the escaped -// character, a hard line break (if the backslash is followed by a newline), -// or a literal backslash to the 'inlines' list. -var parseBackslash = function() { - var subj = this.subject, - pos = this.pos; - if (subj[pos] === '\\') { - if (subj[pos + 1] === '\n') { - this.pos = this.pos + 2; - return { t: 'Hardbreak' }; - } else if (reEscapable.test(subj[pos + 1])) { - this.pos = this.pos + 2; - return { t: 'Str', c: subj[pos + 1] }; - } else { - this.pos++; - return {t: 'Str', c: '\\'}; - } - } else { - return null; - } -}; - -// Attempt to parse an autolink (URL or email in pointy brackets). -var parseAutolink = function() { - var m; - var dest; - if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink - dest = m.slice(1,-1); - return {t: 'Link', - label: [{ t: 'Str', c: dest }], - destination: 'mailto:' + dest }; - } else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) { - dest = m.slice(1,-1); - return { t: 'Link', - label: [{ t: 'Str', c: dest }], - destination: dest }; - } else { - return null; - } -}; - -// Attempt to parse a raw HTML tag. -var parseHtmlTag = function() { - var m = this.match(reHtmlTag); - if (m) { - return { t: 'Html', c: m }; - } else { - return null; - } -}; - -// Scan a sequence of characters == c, and return information about -// the number of delimiters and whether they are positioned such that -// they can open and/or close emphasis or strong emphasis. A utility -// function for strong/emph parsing. -var scanDelims = function(c) { - var numdelims = 0; - var first_close_delims = 0; - var char_before, char_after; - var startpos = this.pos; - - char_before = this.pos === 0 ? '\n' : - this.subject[this.pos - 1]; - - while (this.peek() === c) { - numdelims++; - this.pos++; - } - - char_after = this.peek() || '\n'; - - var can_open = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_after)); - var can_close = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_before)); - if (c === '_') { - can_open = can_open && !((/[a-z0-9]/i).test(char_before)); - can_close = can_close && !((/[a-z0-9]/i).test(char_after)); - } - this.pos = startpos; - return { numdelims: numdelims, - can_open: can_open, - can_close: can_close }; -}; - -// Attempt to parse emphasis or strong emphasis. -var parseEmphasis = function() { - var startpos = this.pos; - var c ; - var first_close = 0; - var c = this.peek(); - if (!(c === '*' || c === '_')) { - return null; - } - - var numdelims; - var delimpos; - var inlines = []; - - // Get opening delimiters. - res = this.scanDelims(c); - numdelims = res.numdelims; - - if (numdelims === 0) { - this.pos = startpos; - return null; - } - - if (numdelims >= 4 || !res.can_open) { - this.pos += numdelims; - return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)}; - } - - this.pos += numdelims; - - var next_inline; - var last_emphasis_closer = null; - - var delims_to_match = numdelims; - - // We need not look for closers if we have already recorded that - // there are no closers past this point. - if (this.last_emphasis_closer === null || - this.last_emphasis_closer >= this.pos) { - while (true) { - res = this.scanDelims(c); - numclosedelims = res.numdelims; - if (res.can_close) { - if (last_emphasis_closer === null || - last_emphasis_closer < this.pos) { - last_emphasis_closer = this.pos; + // Some regexps used in inline parser: + + var ESCAPABLE = '[!"#$%&\'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]'; + var ESCAPED_CHAR = '\\\\' + ESCAPABLE; + var IN_DOUBLE_QUOTES = '"(' + ESCAPED_CHAR + '|[^"\\x00])*"'; + var IN_SINGLE_QUOTES = '\'(' + ESCAPED_CHAR + '|[^\'\\x00])*\''; + var IN_PARENS = '\\((' + ESCAPED_CHAR + '|[^)\\x00])*\\)'; + var REG_CHAR = '[^\\\\()\\x00-\\x20]'; + var IN_PARENS_NOSP = '\\((' + REG_CHAR + '|' + ESCAPED_CHAR + ')*\\)'; + var TAGNAME = '[A-Za-z][A-Za-z0-9]*'; + var BLOCKTAGNAME = '(?:article|header|aside|hgroup|iframe|blockquote|hr|body|li|map|button|object|canvas|ol|caption|output|col|p|colgroup|pre|dd|progress|div|section|dl|table|td|dt|tbody|embed|textarea|fieldset|tfoot|figcaption|th|figure|thead|footer|footer|tr|form|ul|h1|h2|h3|h4|h5|h6|video|script|style)'; + var ATTRIBUTENAME = '[a-zA-Z_:][a-zA-Z0-9:._-]*'; + var UNQUOTEDVALUE = "[^\"'=<>`\\x00-\\x20]+"; + var SINGLEQUOTEDVALUE = "'[^']*'"; + var DOUBLEQUOTEDVALUE = '"[^"]*"'; + var ATTRIBUTEVALUE = "(?:" + UNQUOTEDVALUE + "|" + SINGLEQUOTEDVALUE + "|" + DOUBLEQUOTEDVALUE + ")"; + var ATTRIBUTEVALUESPEC = "(?:" + "\\s*=" + "\\s*" + ATTRIBUTEVALUE + ")"; + var ATTRIBUTE = "(?:" + "\\s+" + ATTRIBUTENAME + ATTRIBUTEVALUESPEC + "?)"; + var OPENTAG = "<" + TAGNAME + ATTRIBUTE + "*" + "\\s*/?>"; + var CLOSETAG = "]"; + var OPENBLOCKTAG = "<" + BLOCKTAGNAME + ATTRIBUTE + "*" + "\\s*/?>"; + var CLOSEBLOCKTAG = "]"; + var HTMLCOMMENT = ""; + var PROCESSINGINSTRUCTION = "[<][?].*?[?][>]"; + var DECLARATION = "]*>"; + var CDATA = "])*\\]\\]>"; + var HTMLTAG = "(?:" + OPENTAG + "|" + CLOSETAG + "|" + HTMLCOMMENT + "|" + + PROCESSINGINSTRUCTION + "|" + DECLARATION + "|" + CDATA + ")"; + var HTMLBLOCKOPEN = "<(?:" + BLOCKTAGNAME + "[\\s/>]" + "|" + + "/" + BLOCKTAGNAME + "[\\s>]" + "|" + "[?!])"; + + var reHtmlTag = new RegExp('^' + HTMLTAG, 'i'); + + var reHtmlBlockOpen = new RegExp('^' + HTMLBLOCKOPEN, 'i'); + + var reLinkTitle = new RegExp( + '^(?:"(' + ESCAPED_CHAR + '|[^"\\x00])*"' + + '|' + + '\'(' + ESCAPED_CHAR + '|[^\'\\x00])*\'' + + '|' + + '\\((' + ESCAPED_CHAR + '|[^)\\x00])*\\))'); + + var reLinkDestinationBraces = new RegExp( + '^(?:[<](?:[^<>\\n\\\\\\x00]' + '|' + ESCAPED_CHAR + '|' + '\\\\)*[>])'); + + var reLinkDestination = new RegExp( + '^(?:' + REG_CHAR + '+|' + ESCAPED_CHAR + '|' + IN_PARENS_NOSP + ')*'); + + var reEscapable = new RegExp(ESCAPABLE); + + var reAllEscapedChar = new RegExp('\\\\(' + ESCAPABLE + ')', 'g'); + + var reEscapedChar = new RegExp('^\\\\(' + ESCAPABLE + ')'); + + var reAllTab = /\t/g; + + var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/; + + // Matches a character with a special meaning in markdown, + // or a string of non-special characters. Note: we match + // clumps of _ or * or `, because they need to be handled in groups. + var reMain = /^(?:[_*`\n]+|[\[\]\\!<&*_]|(?: *[^\n `\[\]\\!<&*_]+)+|[ \n]+)/m; + + // UTILITY FUNCTIONS + + // Replace backslash escapes with literal characters. + var unescape = function(s) { + return s.replace(reAllEscapedChar, '$1'); + }; + + // Returns true if string contains only space characters. + var isBlank = function(s) { + return /^\s*$/.test(s); + }; + + // Normalize reference label: collapse internal whitespace + // to single space, remove leading/trailing whitespace, case fold. + var normalizeReference = function(s) { + return s.trim() + .replace(/\s+/,' ') + .toUpperCase(); + }; + + // Attempt to match a regex in string s at offset offset. + // Return index of match or null. + var matchAt = function(re, s, offset) { + var res = s.slice(offset).match(re); + if (res) { + return offset + res.index; + } else { + return null; + } + }; + + // Convert tabs to spaces on each line using a 4-space tab stop. + var detabLine = function(text) { + if (text.indexOf('\t') == -1) { + return text; + } else { + var lastStop = 0; + return text.replace(reAllTab, function(match, offset) { + var result = ' '.slice((offset - lastStop) % 4); + lastStop = offset + 1; + return result; + }); + } + }; + + // INLINE PARSER + + // These are methods of an InlineParser object, defined below. + // An InlineParser keeps track of a subject (a string to be + // parsed) and a position in that subject. + + // If re matches at current position in the subject, advance + // position in subject and return the match; otherwise return null. + var match = function(re) { + var match = re.exec(this.subject.slice(this.pos)); + if (match) { + this.pos += match.index + match[0].length; + return match[0]; + } else { + return null; + } + }; + + // Returns the character at the current subject position, or null if + // there are no more characters. + var peek = function() { + return this.subject[this.pos] || null; + }; + + // Parse zero or more space characters, including at most one newline + var spnl = function() { + this.match(/^ *(?:\n *)?/); + return 1; + }; + + // All of the parsers below try to match something at the current position + // in the subject. If they succeed in matching anything, they + // return the inline matched, advancing the subject. + + // Attempt to parse backticks, returning either a backtick code span or a + // literal sequence of backticks. + var parseBackticks = function() { + var startpos = this.pos; + var ticks = this.match(/^`+/); + if (!ticks) { + return 0; + } + var afterOpenTicks = this.pos; + var foundCode = false; + var match; + while (!foundCode && (match = this.match(/`+/m))) { + if (match == ticks) { + return { t: 'Code', c: this.subject.slice(afterOpenTicks, + this.pos - ticks.length) + .replace(/[ \n]+/g,' ') + .trim() }; } - if (numclosedelims === 3 && delims_to_match === 3) { - this.pos += 3; - this.last_emphasis_closer = null; - return {t: 'Strong', c: [{t: 'Emph', c: inlines}]}; - } else if (numclosedelims >= 2 && delims_to_match >= 2) { - delims_to_match -= 2; - this.pos += 2; - inlines = [{t: 'Strong', c: inlines}]; - } else if (numclosedelims >= 1 && delims_to_match >= 1) { - delims_to_match -= 1; - this.pos += 1; - inlines = [{t: 'Emph', c: inlines}]; + } + // If we got here, we didn't match a closing backtick sequence. + this.pos = afterOpenTicks; + return { t: 'Str', c: ticks }; + }; + + // Parse a backslash-escaped special character, adding either the escaped + // character, a hard line break (if the backslash is followed by a newline), + // or a literal backslash to the 'inlines' list. + var parseBackslash = function() { + var subj = this.subject, + pos = this.pos; + if (subj[pos] === '\\') { + if (subj[pos + 1] === '\n') { + this.pos = this.pos + 2; + return { t: 'Hardbreak' }; + } else if (reEscapable.test(subj[pos + 1])) { + this.pos = this.pos + 2; + return { t: 'Str', c: subj[pos + 1] }; + } else { + this.pos++; + return {t: 'Str', c: '\\'}; } - if (delims_to_match === 0) { - this.last_emphasis_closer = null; - return inlines[0]; + } else { + return null; + } + }; + + // Attempt to parse an autolink (URL or email in pointy brackets). + var parseAutolink = function() { + var m; + var dest; + if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink + dest = m.slice(1,-1); + return {t: 'Link', + label: [{ t: 'Str', c: dest }], + destination: 'mailto:' + dest }; + } else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) { + dest = m.slice(1,-1); + return { t: 'Link', + label: [{ t: 'Str', c: dest }], + destination: dest }; + } else { + return null; + } + }; + + // Attempt to parse a raw HTML tag. + var parseHtmlTag = function() { + var m = this.match(reHtmlTag); + if (m) { + return { t: 'Html', c: m }; + } else { + return null; + } + }; + + // Scan a sequence of characters == c, and return information about + // the number of delimiters and whether they are positioned such that + // they can open and/or close emphasis or strong emphasis. A utility + // function for strong/emph parsing. + var scanDelims = function(c) { + var numdelims = 0; + var first_close_delims = 0; + var char_before, char_after; + var startpos = this.pos; + + char_before = this.pos === 0 ? '\n' : + this.subject[this.pos - 1]; + + while (this.peek() === c) { + numdelims++; + this.pos++; + } + + char_after = this.peek() || '\n'; + + var can_open = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_after)); + var can_close = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_before)); + if (c === '_') { + can_open = can_open && !((/[a-z0-9]/i).test(char_before)); + can_close = can_close && !((/[a-z0-9]/i).test(char_after)); + } + this.pos = startpos; + return { numdelims: numdelims, + can_open: can_open, + can_close: can_close }; + }; + + // Attempt to parse emphasis or strong emphasis. + var parseEmphasis = function() { + var startpos = this.pos; + var c ; + var first_close = 0; + var c = this.peek(); + if (!(c === '*' || c === '_')) { + return null; + } + + var numdelims; + var delimpos; + var inlines = []; + + // Get opening delimiters. + res = this.scanDelims(c); + numdelims = res.numdelims; + + if (numdelims === 0) { + this.pos = startpos; + return null; + } + + if (numdelims >= 4 || !res.can_open) { + this.pos += numdelims; + return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)}; + } + + this.pos += numdelims; + + var next_inline; + var last_emphasis_closer = null; + + var delims_to_match = numdelims; + + // We need not look for closers if we have already recorded that + // there are no closers past this point. + if (this.last_emphasis_closer === null || + this.last_emphasis_closer >= this.pos) { + while (true) { + res = this.scanDelims(c); + numclosedelims = res.numdelims; + if (res.can_close) { + if (last_emphasis_closer === null || + last_emphasis_closer < this.pos) { + last_emphasis_closer = this.pos; + } + if (numclosedelims === 3 && delims_to_match === 3) { + this.pos += 3; + this.last_emphasis_closer = null; + return {t: 'Strong', c: [{t: 'Emph', c: inlines}]}; + } else if (numclosedelims >= 2 && delims_to_match >= 2) { + delims_to_match -= 2; + this.pos += 2; + inlines = [{t: 'Strong', c: inlines}]; + } else if (numclosedelims >= 1 && delims_to_match >= 1) { + delims_to_match -= 1; + this.pos += 1; + inlines = [{t: 'Emph', c: inlines}]; + } + if (delims_to_match === 0) { + this.last_emphasis_closer = null; + return inlines[0]; + } + } else if (next_inline = this.parseInline()) { + inlines.push(next_inline); + } else { + break; + } } - } else if (next_inline = this.parseInline()) { - inlines.push(next_inline); + } + + // didn't find closing delimiter + this.pos = startpos + numdelims; + if (last_emphasis_closer === null) { + // we know there are no closers after startpos, so: + this.last_emphasis_closer = startpos; } else { - break; + this.last_emphasis_closer = last_emphasis_closer; } - } - } + return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)}; + }; + + // Attempt to parse link title (sans quotes), returning the string + // or null if no match. + var parseLinkTitle = function() { + var title = this.match(reLinkTitle); + if (title) { + // chop off quotes from title and unescape: + return unescape(title.substr(1, title.length - 2)); + } else { + return null; + } + }; + + // Attempt to parse link destination, returning the string or + // null if no match. + var parseLinkDestination = function() { + var res = this.match(reLinkDestinationBraces); + if (res) { // chop off surrounding <..>: + return unescape(res.substr(1, res.length - 2)); + } else { + res = this.match(reLinkDestination); + if (res !== null) { + return unescape(res); + } else { + return null; + } + } + }; - // didn't find closing delimiter - this.pos = startpos + numdelims; - if (last_emphasis_closer === null) { - // we know there are no closers after startpos, so: - this.last_emphasis_closer = startpos; - } else { - this.last_emphasis_closer = last_emphasis_closer; - } - return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)}; -}; - -// Attempt to parse link title (sans quotes), returning the string -// or null if no match. -var parseLinkTitle = function() { - var title = this.match(reLinkTitle); - if (title) { - // chop off quotes from title and unescape: - return unescape(title.substr(1, title.length - 2)); - } else { - return null; - } -}; - -// Attempt to parse link destination, returning the string or -// null if no match. -var parseLinkDestination = function() { - var res = this.match(reLinkDestinationBraces); - if (res) { // chop off surrounding <..>: - return unescape(res.substr(1, res.length - 2)); - } else { - res = this.match(reLinkDestination); - if (res !== null) { - return unescape(res); - } else { - return null; - } - } -}; - -// Attempt to parse a link label, returning number of characters parsed. -var parseLinkLabel = function() { - if (this.peek() != '[') { - return 0; - } - var startpos = this.pos; - var nest_level = 0; - if (this.label_nest_level > 0) { - // If we've already checked to the end of this subject - // for a label, even with a different starting [, we - // know we won't find one here and we can just return. - // This avoids lots of backtracking. - // Note: nest level 1 would be: [foo [bar] - // nest level 2 would be: [foo [bar [baz] - this.label_nest_level--; - return 0; - } - this.pos++; // advance past [ - var c; - while ((c = this.peek()) && (c != ']' || nest_level > 0)) { - switch (c) { - case '`': - this.parseBackticks(); - break; - case '<': - this.parseAutolink() || this.parseHtmlTag() || this.parseString(); - break; - case '[': // nested [] - nest_level++; - this.pos++; - break; - case ']': // nested [] - nest_level--; - this.pos++; - break; - case '\\': - this.parseBackslash(); - break; - default: - this.parseString(); - } - } - if (c === ']') { - this.label_nest_level = 0; - this.pos++; // advance past ] - return this.pos - startpos; - } else { - if (!c) { - this.label_nest_level = nest_level; - } - this.pos = startpos; - return 0; - } -}; - -// Parse raw link label, including surrounding [], and return -// inline contents. (Note: this is not a method of InlineParser.) -var parseRawLabel = function(s) { - // note: parse without a refmap; we don't want links to resolve - // in nested brackets! - return new InlineParser().parse(s.substr(1, s.length - 2), {}); -}; - -// Attempt to parse a link. If successful, return the link. -var parseLink = function() { - var startpos = this.pos; - var reflabel; - var n; - var dest; - var title; - - n = this.parseLinkLabel(); - if (n === 0) { - return null; - } - var afterlabel = this.pos; - var rawlabel = this.subject.substr(startpos, n); - - // if we got this far, we've parsed a label. - // Try to parse an explicit link: [label](url "title") - if (this.peek() == '(') { - this.pos++; - if (this.spnl() && - ((dest = this.parseLinkDestination()) !== null) && - this.spnl() && - // make sure there's a space before the title: - (/^\s/.test(this.subject[this.pos - 1]) && - (title = this.parseLinkTitle() || '') || true) && - this.spnl() && - this.match(/^\)/)) { - return { t: 'Link', - destination: dest, - title: title, - label: parseRawLabel(rawlabel) }; - } else { + // Attempt to parse a link label, returning number of characters parsed. + var parseLinkLabel = function() { + if (this.peek() != '[') { + return 0; + } + var startpos = this.pos; + var nest_level = 0; + if (this.label_nest_level > 0) { + // If we've already checked to the end of this subject + // for a label, even with a different starting [, we + // know we won't find one here and we can just return. + // This avoids lots of backtracking. + // Note: nest level 1 would be: [foo [bar] + // nest level 2 would be: [foo [bar [baz] + this.label_nest_level--; + return 0; + } + this.pos++; // advance past [ + var c; + while ((c = this.peek()) && (c != ']' || nest_level > 0)) { + switch (c) { + case '`': + this.parseBackticks(); + break; + case '<': + this.parseAutolink() || this.parseHtmlTag() || this.parseString(); + break; + case '[': // nested [] + nest_level++; + this.pos++; + break; + case ']': // nested [] + nest_level--; + this.pos++; + break; + case '\\': + this.parseBackslash(); + break; + default: + this.parseString(); + } + } + if (c === ']') { + this.label_nest_level = 0; + this.pos++; // advance past ] + return this.pos - startpos; + } else { + if (!c) { + this.label_nest_level = nest_level; + } + this.pos = startpos; + return 0; + } + }; + + // Parse raw link label, including surrounding [], and return + // inline contents. (Note: this is not a method of InlineParser.) + var parseRawLabel = function(s) { + // note: parse without a refmap; we don't want links to resolve + // in nested brackets! + return new InlineParser().parse(s.substr(1, s.length - 2), {}); + }; + + // Attempt to parse a link. If successful, return the link. + var parseLink = function() { + var startpos = this.pos; + var reflabel; + var n; + var dest; + var title; + + n = this.parseLinkLabel(); + if (n === 0) { + return null; + } + var afterlabel = this.pos; + var rawlabel = this.subject.substr(startpos, n); + + // if we got this far, we've parsed a label. + // Try to parse an explicit link: [label](url "title") + if (this.peek() == '(') { + this.pos++; + if (this.spnl() && + ((dest = this.parseLinkDestination()) !== null) && + this.spnl() && + // make sure there's a space before the title: + (/^\s/.test(this.subject[this.pos - 1]) && + (title = this.parseLinkTitle() || '') || true) && + this.spnl() && + this.match(/^\)/)) { + return { t: 'Link', + destination: dest, + title: title, + label: parseRawLabel(rawlabel) }; + } else { + this.pos = startpos; + return null; + } + } + // If we're here, it wasn't an explicit link. Try to parse a reference link. + // first, see if there's another label + var savepos = this.pos; + this.spnl(); + var beforelabel = this.pos; + n = this.parseLinkLabel(); + if (n == 2) { + // empty second label + reflabel = rawlabel; + } else if (n > 0) { + reflabel = this.subject.slice(beforelabel, beforelabel + n); + } else { + this.pos = savepos; + reflabel = rawlabel; + } + // lookup rawlabel in refmap + var link = this.refmap[normalizeReference(reflabel)]; + if (link) { + return {t: 'Link', + destination: link.destination, + title: link.title, + label: parseRawLabel(rawlabel) }; + } else { + this.pos = startpos; + return null; + } + // Nothing worked, rewind: this.pos = startpos; return null; - } - } - // If we're here, it wasn't an explicit link. Try to parse a reference link. - // first, see if there's another label - var savepos = this.pos; - this.spnl(); - var beforelabel = this.pos; - n = this.parseLinkLabel(); - if (n == 2) { - // empty second label - reflabel = rawlabel; - } else if (n > 0) { - reflabel = this.subject.slice(beforelabel, beforelabel + n); - } else { - this.pos = savepos; - reflabel = rawlabel; - } - // lookup rawlabel in refmap - var link = this.refmap[normalizeReference(reflabel)]; - if (link) { - return {t: 'Link', - destination: link.destination, - title: link.title, - label: parseRawLabel(rawlabel) }; - } else { - this.pos = startpos; - return null; - } - // Nothing worked, rewind: - this.pos = startpos; - return null; -}; - -// Attempt to parse an entity, return Entity object if successful. -var parseEntity = function() { - var m; - if ((m = this.match(/^&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});/i))) { - return { t: 'Entity', c: m }; - } else { - return null; - } -}; - -// Parse a run of ordinary characters, or a single character with -// a special meaning in markdown, as a plain string, adding to inlines. -var parseString = function() { - var m; - if (m = this.match(reMain)) { - return { t: 'Str', c: m }; - } else { - return null; - } -}; - -// Parse a newline. If it was preceded by two spaces, return a hard -// line break; otherwise a soft line break. -var parseNewline = function() { - var m = this.match(/^ *\n/); - if (m) { - if (m.length > 2) { - return { t: 'Hardbreak' }; - } else if (m.length > 0) { - return { t: 'Softbreak' }; - } - } - return null; -}; - -// Attempt to parse an image. If the opening '!' is not followed -// by a link, return a literal '!'. -var parseImage = function() { - if (this.match(/^!/)) { - var link = this.parseLink(); - if (link) { - link.t = 'Image'; - return link; - } else { - return { t: 'Str', c: '!' }; - } - } else { - return null; - } -}; - -// Attempt to parse a link reference, modifying refmap. -var parseReference = function(s, refmap) { - this.subject = s; - this.pos = 0; - var rawlabel; - var dest; - var title; - var matchChars; - var startpos = this.pos; - var match; - - // label: - matchChars = this.parseLinkLabel(); - if (matchChars === 0) { - return 0; - } else { - rawlabel = this.subject.substr(0, matchChars); - } - - // colon: - if (this.peek() === ':') { - this.pos++; - } else { - this.pos = startpos; - return 0; - } - - // link url - this.spnl(); - - dest = this.parseLinkDestination(); - if (dest === null || dest.length === 0) { - this.pos = startpos; - return 0; - } - - var beforetitle = this.pos; - this.spnl(); - title = this.parseLinkTitle(); - if (title === null) { - title = ''; - // rewind before spaces - this.pos = beforetitle; - } - - // make sure we're at line end: - if (this.match(/^ *(?:\n|$)/) === null) { - this.pos = startpos; - return 0; - } - - var normlabel = normalizeReference(rawlabel); - - if (!refmap[normlabel]) { - refmap[normlabel] = { destination: dest, title: title }; - } - return this.pos - startpos; -}; - -// Parse the next inline element in subject, advancing subject position -// and returning the inline parsed. -var parseInline = function() { - var startpos = this.pos; - var memoized = this.memo[startpos]; - if (memoized) { - this.pos = memoized.endpos; - return memoized.inline; - } - var c = this.peek(); - if (!c) { - return null; - } - var res; - switch(c) { - case '\n': - case ' ': - res = this.parseNewline(); - break; - case '\\': - res = this.parseBackslash(); - break; - case '`': - res = this.parseBackticks(); - break; - case '*': - case '_': - res = this.parseEmphasis(); - break; - case '[': - res = this.parseLink(); - break; - case '!': - res = this.parseImage(); - break; - case '<': - res = this.parseAutolink() || this.parseHtmlTag(); - break; - case '&': - res = this.parseEntity(); - break; - default: - res = this.parseString(); - break; - } - if (res === null) { - this.pos += 1; - res = {t: 'Str', c: c}; - } - if (res) { - this.memo[startpos] = { inline: res, - endpos: this.pos }; - } - return res; -}; - -// Parse s as a list of inlines, using refmap to resolve references. -var parseInlines = function(s, refmap) { - this.subject = s; - this.pos = 0; - this.refmap = refmap || {}; - this.memo = {}; - this.last_emphasis_closer = null; - var inlines = []; - var next_inline; - while (next_inline = this.parseInline()) { - inlines.push(next_inline); - } - return inlines; -}; - -// The InlineParser object. -function InlineParser(){ - return { - subject: '', - label_nest_level: 0, // used by parseLinkLabel method - last_emphasis_closer: null, // used by parseEmphasis method - pos: 0, - refmap: {}, - memo: {}, - match: match, - peek: peek, - spnl: spnl, - parseBackticks: parseBackticks, - parseBackslash: parseBackslash, - parseAutolink: parseAutolink, - parseHtmlTag: parseHtmlTag, - scanDelims: scanDelims, - parseEmphasis: parseEmphasis, - parseLinkTitle: parseLinkTitle, - parseLinkDestination: parseLinkDestination, - parseLinkLabel: parseLinkLabel, - parseLink: parseLink, - parseEntity: parseEntity, - parseString: parseString, - parseNewline: parseNewline, - parseImage: parseImage, - parseReference: parseReference, - parseInline: parseInline, - parse: parseInlines - }; -} - -// DOC PARSER - -// These are methods of a DocParser object, defined below. - -var makeBlock = function(tag, start_line, start_column) { - return { t: tag, - open: true, - last_line_blank: false, - start_line: start_line, - start_column: start_column, - end_line: start_line, - children: [], - parent: null, - // string_content is formed by concatenating strings, in finalize: - string_content: "", - strings: [], - inline_content: [] + }; + + // Attempt to parse an entity, return Entity object if successful. + var parseEntity = function() { + var m; + if ((m = this.match(/^&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});/i))) { + return { t: 'Entity', c: m }; + } else { + return null; + } + }; + + // Parse a run of ordinary characters, or a single character with + // a special meaning in markdown, as a plain string, adding to inlines. + var parseString = function() { + var m; + if (m = this.match(reMain)) { + return { t: 'Str', c: m }; + } else { + return null; + } + }; + + // Parse a newline. If it was preceded by two spaces, return a hard + // line break; otherwise a soft line break. + var parseNewline = function() { + var m = this.match(/^ *\n/); + if (m) { + if (m.length > 2) { + return { t: 'Hardbreak' }; + } else if (m.length > 0) { + return { t: 'Softbreak' }; + } + } + return null; + }; + + // Attempt to parse an image. If the opening '!' is not followed + // by a link, return a literal '!'. + var parseImage = function() { + if (this.match(/^!/)) { + var link = this.parseLink(); + if (link) { + link.t = 'Image'; + return link; + } else { + return { t: 'Str', c: '!' }; + } + } else { + return null; + } + }; + + // Attempt to parse a link reference, modifying refmap. + var parseReference = function(s, refmap) { + this.subject = s; + this.pos = 0; + var rawlabel; + var dest; + var title; + var matchChars; + var startpos = this.pos; + var match; + + // label: + matchChars = this.parseLinkLabel(); + if (matchChars === 0) { + return 0; + } else { + rawlabel = this.subject.substr(0, matchChars); + } + + // colon: + if (this.peek() === ':') { + this.pos++; + } else { + this.pos = startpos; + return 0; + } + + // link url + this.spnl(); + + dest = this.parseLinkDestination(); + if (dest === null || dest.length === 0) { + this.pos = startpos; + return 0; + } + + var beforetitle = this.pos; + this.spnl(); + title = this.parseLinkTitle(); + if (title === null) { + title = ''; + // rewind before spaces + this.pos = beforetitle; + } + + // make sure we're at line end: + if (this.match(/^ *(?:\n|$)/) === null) { + this.pos = startpos; + return 0; + } + + var normlabel = normalizeReference(rawlabel); + + if (!refmap[normlabel]) { + refmap[normlabel] = { destination: dest, title: title }; + } + return this.pos - startpos; + }; + + // Parse the next inline element in subject, advancing subject position + // and returning the inline parsed. + var parseInline = function() { + var startpos = this.pos; + var memoized = this.memo[startpos]; + if (memoized) { + this.pos = memoized.endpos; + return memoized.inline; + } + var c = this.peek(); + if (!c) { + return null; + } + var res; + switch(c) { + case '\n': + case ' ': + res = this.parseNewline(); + break; + case '\\': + res = this.parseBackslash(); + break; + case '`': + res = this.parseBackticks(); + break; + case '*': + case '_': + res = this.parseEmphasis(); + break; + case '[': + res = this.parseLink(); + break; + case '!': + res = this.parseImage(); + break; + case '<': + res = this.parseAutolink() || this.parseHtmlTag(); + break; + case '&': + res = this.parseEntity(); + break; + default: + res = this.parseString(); + break; + } + if (res === null) { + this.pos += 1; + res = {t: 'Str', c: c}; + } + if (res) { + this.memo[startpos] = { inline: res, + endpos: this.pos }; + } + return res; + }; + + // Parse s as a list of inlines, using refmap to resolve references. + var parseInlines = function(s, refmap) { + this.subject = s; + this.pos = 0; + this.refmap = refmap || {}; + this.memo = {}; + this.last_emphasis_closer = null; + var inlines = []; + var next_inline; + while (next_inline = this.parseInline()) { + inlines.push(next_inline); + } + return inlines; + }; + + // The InlineParser object. + function InlineParser(){ + return { + subject: '', + label_nest_level: 0, // used by parseLinkLabel method + last_emphasis_closer: null, // used by parseEmphasis method + pos: 0, + refmap: {}, + memo: {}, + match: match, + peek: peek, + spnl: spnl, + parseBackticks: parseBackticks, + parseBackslash: parseBackslash, + parseAutolink: parseAutolink, + parseHtmlTag: parseHtmlTag, + scanDelims: scanDelims, + parseEmphasis: parseEmphasis, + parseLinkTitle: parseLinkTitle, + parseLinkDestination: parseLinkDestination, + parseLinkLabel: parseLinkLabel, + parseLink: parseLink, + parseEntity: parseEntity, + parseString: parseString, + parseNewline: parseNewline, + parseImage: parseImage, + parseReference: parseReference, + parseInline: parseInline, + parse: parseInlines }; -}; - -// Returns true if parent block can contain child block. -var canContain = function(parent_type, child_type) { - return ( parent_type == 'Document' || - parent_type == 'BlockQuote' || - parent_type == 'ListItem' || - (parent_type == 'List' && child_type == 'ListItem') ); -}; - -// Returns true if block type can accept lines of text. -var acceptsLines = function(block_type) { - return ( block_type == 'Paragraph' || - block_type == 'IndentedCode' || - block_type == 'FencedCode' ); -}; - -// Returns true if block ends with a blank line, descending if needed -// into lists and sublists. -var endsWithBlankLine = function(block) { - if (block.last_line_blank) { - return true; - } - if ((block.t == 'List' || block.t == 'ListItem') && block.children.length > 0) { - return endsWithBlankLine(block.children[block.children.length - 1]); - } else { - return false; - } -}; - -// Break out of all containing lists, resetting the tip of the -// document to the parent of the highest list, and finalizing -// all the lists. (This is used to implement the "two blank lines -// break of of all lists" feature.) -var breakOutOfLists = function(block, line_number) { - var b = block; - var last_list = null; - do { - if (b.t === 'List') { - last_list = b; } - b = b.parent; - } while (b); - if (last_list) { - while (block != last_list) { - this.finalize(block, line_number); - block = block.parent; - } - this.finalize(last_list, line_number); - this.tip = last_list.parent; - } -}; - -// Add a line to the block at the tip. We assume the tip -// can accept lines -- that check should be done before calling this. -var addLine = function(ln, offset) { - var s = ln.slice(offset); - if (!(this.tip.open)) { - throw({ msg: "Attempted to add line (" + ln + ") to closed container." }); - } - this.tip.strings.push(s); -}; - -// Add block of type tag as a child of the tip. If the tip can't -// accept children, close and finalize it and try its parent, -// and so on til we find a block that can accept children. -var addChild = function(tag, line_number, offset) { - while (!canContain(this.tip.t, tag)) { - this.finalize(this.tip, line_number); - } - - var column_number = offset + 1; // offset 0 = column 1 - var newBlock = makeBlock(tag, line_number, column_number); - this.tip.children.push(newBlock); - newBlock.parent = this.tip; - this.tip = newBlock; - return newBlock; -}; - -// Parse a list marker and return data on the marker (type, -// start, delimiter, bullet character, padding) or null. -var parseListMarker = function(ln, offset) { - var rest = ln.slice(offset); - var match; - var spaces_after_marker; - var data = {}; - if (rest.match(reHrule)) { - return null; - } - if ((match = rest.match(/^[*+-]( +|$)/))) { - spaces_after_marker = match[1].length; - data.type = 'Bullet'; - data.bullet_char = match[0][0]; - - } else if ((match = rest.match(/^(\d+)([.)])( +|$)/))) { - spaces_after_marker = match[3].length; - data.type = 'Ordered'; - data.start = parseInt(match[1]); - data.delimiter = match[2]; - } else { - return null; - } - var blank_item = match[0].length === rest.length; - if (spaces_after_marker >= 5 || - spaces_after_marker < 1 || - blank_item) { - data.padding = match[0].length - spaces_after_marker + 1; - } else { - data.padding = match[0].length; - } - return data; -}; - -// Returns true if the two list items are of the same type, -// with the same delimiter and bullet character. This is used -// in agglomerating list items into lists. -var listsMatch = function(list_data, item_data) { - return (list_data.type === item_data.type && - list_data.delimiter === item_data.delimiter && - list_data.bullet_char === item_data.bullet_char); -}; - -// Analyze a line of text and update the document appropriately. -// We parse markdown text by calling this on each line of input, -// then finalizing the document. -var incorporateLine = function(ln, line_number) { - - var all_matched = true; - var last_child; - var first_nonspace; - var offset = 0; - var match; - var data; - var blank; - var indent; - var last_matched_container; - var i; - var CODE_INDENT = 4; - - var container = this.doc; - var oldtip = this.tip; - - // Convert tabs to spaces: - ln = detabLine(ln); - - // For each containing block, try to parse the associated line start. - // Bail out on failure: container will point to the last matching block. - // Set all_matched to false if not all containers match. - while (container.children.length > 0) { - last_child = container.children[container.children.length - 1]; - if (!last_child.open) { - break; - } - container = last_child; - - match = matchAt(/[^ ]/, ln, offset); - if (match === null) { - first_nonspace = ln.length; - blank = true; - } else { - first_nonspace = match; - blank = false; - } - indent = first_nonspace - offset; - - switch (container.t) { - case 'BlockQuote': - var matched = indent <= 3 && ln[first_nonspace] === '>'; - if (matched) { - offset = first_nonspace + 1; - if (ln[offset] === ' ') { - offset++; - } + // DOC PARSER + + // These are methods of a DocParser object, defined below. + + var makeBlock = function(tag, start_line, start_column) { + return { t: tag, + open: true, + last_line_blank: false, + start_line: start_line, + start_column: start_column, + end_line: start_line, + children: [], + parent: null, + // string_content is formed by concatenating strings, in finalize: + string_content: "", + strings: [], + inline_content: [] + }; + }; + + // Returns true if parent block can contain child block. + var canContain = function(parent_type, child_type) { + return ( parent_type == 'Document' || + parent_type == 'BlockQuote' || + parent_type == 'ListItem' || + (parent_type == 'List' && child_type == 'ListItem') ); + }; + + // Returns true if block type can accept lines of text. + var acceptsLines = function(block_type) { + return ( block_type == 'Paragraph' || + block_type == 'IndentedCode' || + block_type == 'FencedCode' ); + }; + + // Returns true if block ends with a blank line, descending if needed + // into lists and sublists. + var endsWithBlankLine = function(block) { + if (block.last_line_blank) { + return true; + } + if ((block.t == 'List' || block.t == 'ListItem') && block.children.length > 0) { + return endsWithBlankLine(block.children[block.children.length - 1]); } else { - all_matched = false; + return false; } - break; + }; + + // Break out of all containing lists, resetting the tip of the + // document to the parent of the highest list, and finalizing + // all the lists. (This is used to implement the "two blank lines + // break of of all lists" feature.) + var breakOutOfLists = function(block, line_number) { + var b = block; + var last_list = null; + do { + if (b.t === 'List') { + last_list = b; + } + b = b.parent; + } while (b); - case 'ListItem': - if (indent >= container.list_data.marker_offset + - container.list_data.padding) { - offset += container.list_data.marker_offset + - container.list_data.padding; - } else if (blank) { - offset = first_nonspace; - } else { - all_matched = false; + if (last_list) { + while (block != last_list) { + this.finalize(block, line_number); + block = block.parent; + } + this.finalize(last_list, line_number); + this.tip = last_list.parent; + } + }; + + // Add a line to the block at the tip. We assume the tip + // can accept lines -- that check should be done before calling this. + var addLine = function(ln, offset) { + var s = ln.slice(offset); + if (!(this.tip.open)) { + throw({ msg: "Attempted to add line (" + ln + ") to closed container." }); + } + this.tip.strings.push(s); + }; + + // Add block of type tag as a child of the tip. If the tip can't + // accept children, close and finalize it and try its parent, + // and so on til we find a block that can accept children. + var addChild = function(tag, line_number, offset) { + while (!canContain(this.tip.t, tag)) { + this.finalize(this.tip, line_number); } - break; - case 'IndentedCode': - if (indent >= CODE_INDENT) { - offset += CODE_INDENT; - } else if (blank) { - offset = first_nonspace; + var column_number = offset + 1; // offset 0 = column 1 + var newBlock = makeBlock(tag, line_number, column_number); + this.tip.children.push(newBlock); + newBlock.parent = this.tip; + this.tip = newBlock; + return newBlock; + }; + + // Parse a list marker and return data on the marker (type, + // start, delimiter, bullet character, padding) or null. + var parseListMarker = function(ln, offset) { + var rest = ln.slice(offset); + var match; + var spaces_after_marker; + var data = {}; + if (rest.match(reHrule)) { + return null; + } + if ((match = rest.match(/^[*+-]( +|$)/))) { + spaces_after_marker = match[1].length; + data.type = 'Bullet'; + data.bullet_char = match[0][0]; + + } else if ((match = rest.match(/^(\d+)([.)])( +|$)/))) { + spaces_after_marker = match[3].length; + data.type = 'Ordered'; + data.start = parseInt(match[1]); + data.delimiter = match[2]; } else { - all_matched = false; + return null; + } + var blank_item = match[0].length === rest.length; + if (spaces_after_marker >= 5 || + spaces_after_marker < 1 || + blank_item) { + data.padding = match[0].length - spaces_after_marker + 1; + } else { + data.padding = match[0].length; + } + return data; + }; + + // Returns true if the two list items are of the same type, + // with the same delimiter and bullet character. This is used + // in agglomerating list items into lists. + var listsMatch = function(list_data, item_data) { + return (list_data.type === item_data.type && + list_data.delimiter === item_data.delimiter && + list_data.bullet_char === item_data.bullet_char); + }; + + // Analyze a line of text and update the document appropriately. + // We parse markdown text by calling this on each line of input, + // then finalizing the document. + var incorporateLine = function(ln, line_number) { + + var all_matched = true; + var last_child; + var first_nonspace; + var offset = 0; + var match; + var data; + var blank; + var indent; + var last_matched_container; + var i; + var CODE_INDENT = 4; + + var container = this.doc; + var oldtip = this.tip; + + // Convert tabs to spaces: + ln = detabLine(ln); + + // For each containing block, try to parse the associated line start. + // Bail out on failure: container will point to the last matching block. + // Set all_matched to false if not all containers match. + while (container.children.length > 0) { + last_child = container.children[container.children.length - 1]; + if (!last_child.open) { + break; + } + container = last_child; + + match = matchAt(/[^ ]/, ln, offset); + if (match === null) { + first_nonspace = ln.length; + blank = true; + } else { + first_nonspace = match; + blank = false; + } + indent = first_nonspace - offset; + + switch (container.t) { + case 'BlockQuote': + var matched = indent <= 3 && ln[first_nonspace] === '>'; + if (matched) { + offset = first_nonspace + 1; + if (ln[offset] === ' ') { + offset++; + } + } else { + all_matched = false; + } + break; + + case 'ListItem': + if (indent >= container.list_data.marker_offset + + container.list_data.padding) { + offset += container.list_data.marker_offset + + container.list_data.padding; + } else if (blank) { + offset = first_nonspace; + } else { + all_matched = false; + } + break; + + case 'IndentedCode': + if (indent >= CODE_INDENT) { + offset += CODE_INDENT; + } else if (blank) { + offset = first_nonspace; + } else { + all_matched = false; + } + break; + + case 'ATXHeader': + case 'SetextHeader': + case 'HorizontalRule': + // a header can never container > 1 line, so fail to match: + all_matched = false; + break; + + case 'FencedCode': + // skip optional spaces of fence offset + i = container.fence_offset; + while (i > 0 && ln[offset] === ' ') { + offset++; + i--; + } + break; + + case 'HtmlBlock': + if (blank) { + all_matched = false; + } + break; + + case 'Paragraph': + if (blank) { + container.last_line_blank = true; + all_matched = false; + } + break; + + default: + } + + if (!all_matched) { + container = container.parent; // back up to last matching block + break; + } } - break; - case 'ATXHeader': - case 'SetextHeader': - case 'HorizontalRule': - // a header can never container > 1 line, so fail to match: - all_matched = false; - break; + last_matched_container = container; + + // This function is used to finalize and close any unmatched + // blocks. We aren't ready to do this now, because we might + // have a lazy paragraph continuation, in which case we don't + // want to close unmatched blocks. So we store this closure for + // use later, when we have more information. + var closeUnmatchedBlocks = function(mythis) { + // finalize any blocks not matched + while (!already_done && oldtip != last_matched_container) { + mythis.finalize(oldtip, line_number); + oldtip = oldtip.parent; + } + var already_done = true; + }; - case 'FencedCode': - // skip optional spaces of fence offset - i = container.fence_offset; - while (i > 0 && ln[offset] === ' ') { - offset++; - i--; + // Check to see if we've hit 2nd blank line; if so break out of list: + if (blank && container.last_line_blank) { + this.breakOutOfLists(container, line_number); } - break; - case 'HtmlBlock': - if (blank) { - all_matched = false; + // Unless last matched container is a code block, try new container starts, + // adding children to the last matched container: + while (container.t != 'FencedCode' && + container.t != 'IndentedCode' && + container.t != 'HtmlBlock' && + // this is a little performance optimization: + matchAt(/^[ #`~*+_=<>0-9-]/,ln,offset) !== null) { + + match = matchAt(/[^ ]/, ln, offset); + if (match === null) { + first_nonspace = ln.length; + blank = true; + } else { + first_nonspace = match; + blank = false; + } + indent = first_nonspace - offset; + + if (indent >= CODE_INDENT) { + // indented code + if (this.tip.t != 'Paragraph' && !blank) { + offset += CODE_INDENT; + closeUnmatchedBlocks(this); + container = this.addChild('IndentedCode', line_number, offset); + } else { // indent > 4 in a lazy paragraph continuation + break; + } + + } else if (ln[first_nonspace] === '>') { + // blockquote + offset = first_nonspace + 1; + // optional following space + if (ln[offset] === ' ') { + offset++; + } + closeUnmatchedBlocks(this); + container = this.addChild('BlockQuote', line_number, offset); + + } else if ((match = ln.slice(first_nonspace).match(/^#{1,6}(?: +|$)/))) { + // ATX header + offset = first_nonspace + match[0].length; + closeUnmatchedBlocks(this); + container = this.addChild('ATXHeader', line_number, first_nonspace); + container.level = match[0].trim().length; // number of #s + // remove trailing ###s: + container.strings = + [ln.slice(offset).replace(/(?:(\\#) *#*| *#+) *$/,'$1')]; + break; + + } else if ((match = ln.slice(first_nonspace).match(/^`{3,}(?!.*`)|^~{3,}(?!.*~)/))) { + // fenced code block + var fence_length = match[0].length; + closeUnmatchedBlocks(this); + container = this.addChild('FencedCode', line_number, first_nonspace); + container.fence_length = fence_length; + container.fence_char = match[0][0]; + container.fence_offset = first_nonspace - offset; + offset = first_nonspace + fence_length; + break; + + } else if (matchAt(reHtmlBlockOpen, ln, first_nonspace) !== null) { + // html block + closeUnmatchedBlocks(this); + container = this.addChild('HtmlBlock', line_number, first_nonspace); + // note, we don't adjust offset because the tag is part of the text + break; + + } else if (container.t == 'Paragraph' && + container.strings.length === 1 && + ((match = ln.slice(first_nonspace).match(/^(?:=+|-+) *$/)))) { + // setext header line + closeUnmatchedBlocks(this); + container.t = 'SetextHeader'; // convert Paragraph to SetextHeader + container.level = match[0][0] === '=' ? 1 : 2; + offset = ln.length; + + } else if (matchAt(reHrule, ln, first_nonspace) !== null) { + // hrule + closeUnmatchedBlocks(this); + container = this.addChild('HorizontalRule', line_number, first_nonspace); + offset = ln.length - 1; + break; + + } else if ((data = parseListMarker(ln, first_nonspace))) { + // list item + closeUnmatchedBlocks(this); + data.marker_offset = indent; + offset = first_nonspace + data.padding; + + // add the list if needed + if (container.t !== 'List' || + !(listsMatch(container.list_data, data))) { + container = this.addChild('List', line_number, first_nonspace); + container.list_data = data; + } + + // add the list item + container = this.addChild('ListItem', line_number, first_nonspace); + container.list_data = data; + + } else { + break; + + } + + if (acceptsLines(container.t)) { + // if it's a line container, it can't contain other containers + break; + } } - break; - case 'Paragraph': - if (blank) { - container.last_line_blank = true; - all_matched = false; + // What remains at the offset is a text line. Add the text to the + // appropriate container. + + match = matchAt(/[^ ]/, ln, offset); + if (match === null) { + first_nonspace = ln.length; + blank = true; + } else { + first_nonspace = match; + blank = false; } - break; + indent = first_nonspace - offset; + + // First check for a lazy paragraph continuation: + if (this.tip !== last_matched_container && + !blank && + this.tip.t == 'Paragraph' && + this.tip.strings.length > 0) { + // lazy paragraph continuation + + this.last_line_blank = false; + this.addLine(ln, offset); + + } else { // not a lazy continuation + + // finalize any blocks not matched + closeUnmatchedBlocks(this); + + // Block quote lines are never blank as they start with > + // and we don't count blanks in fenced code for purposes of tight/loose + // lists or breaking out of lists. We also don't set last_line_blank + // on an empty list item. + container.last_line_blank = blank && + !(container.t == 'BlockQuote' || + container.t == 'FencedCode' || + (container.t == 'ListItem' && + container.children.length === 0 && + container.start_line == line_number)); + + var cont = container; + while (cont.parent) { + cont.parent.last_line_blank = false; + cont = cont.parent; + } - default: - } + switch (container.t) { + case 'IndentedCode': + case 'HtmlBlock': + this.addLine(ln, offset); + break; + + case 'FencedCode': + // check for closing code fence: + match = (indent <= 3 && + ln[first_nonspace] == container.fence_char && + ln.slice(first_nonspace).match(/^(?:`{3,}|~{3,})(?= *$)/)); + if (match && match[0].length >= container.fence_length) { + // don't add closing fence to container; instead, close it: + this.finalize(container, line_number); + } else { + this.addLine(ln, offset); + } + break; + + case 'ATXHeader': + case 'SetextHeader': + case 'HorizontalRule': + // nothing to do; we already added the contents. + break; + + default: + if (acceptsLines(container.t)) { + this.addLine(ln, first_nonspace); + } else if (blank) { + // do nothing + } else if (container.t != 'HorizontalRule' && + container.t != 'SetextHeader') { + // create paragraph container for line + container = this.addChild('Paragraph', line_number, first_nonspace); + this.addLine(ln, first_nonspace); + } else { + console.log("Line " + line_number.toString() + + " with container type " + container.t + + " did not match any condition."); + + } + } + } + }; + + // Finalize a block. Close it and do any necessary postprocessing, + // e.g. creating string_content from strings, setting the 'tight' + // or 'loose' status of a list, and parsing the beginnings + // of paragraphs for reference definitions. Reset the tip to the + // parent of the closed block. + var finalize = function(block, line_number) { + var pos; + // don't do anything if the block is already closed + if (!block.open) { + return 0; + } + block.open = false; + if (line_number > block.start_line) { + block.end_line = line_number - 1; + } else { + block.end_line = line_number; + } - if (!all_matched) { - container = container.parent; // back up to last matching block - break; - } - } - - last_matched_container = container; - - // This function is used to finalize and close any unmatched - // blocks. We aren't ready to do this now, because we might - // have a lazy paragraph continuation, in which case we don't - // want to close unmatched blocks. So we store this closure for - // use later, when we have more information. - var closeUnmatchedBlocks = function(mythis) { - // finalize any blocks not matched - while (!already_done && oldtip != last_matched_container) { - mythis.finalize(oldtip, line_number); - oldtip = oldtip.parent; - } - var already_done = true; - }; - - // Check to see if we've hit 2nd blank line; if so break out of list: - if (blank && container.last_line_blank) { - this.breakOutOfLists(container, line_number); - } - - // Unless last matched container is a code block, try new container starts, - // adding children to the last matched container: - while (container.t != 'FencedCode' && - container.t != 'IndentedCode' && - container.t != 'HtmlBlock' && - // this is a little performance optimization: - matchAt(/^[ #`~*+_=<>0-9-]/,ln,offset) !== null) { - - match = matchAt(/[^ ]/, ln, offset); - if (match === null) { - first_nonspace = ln.length; - blank = true; - } else { - first_nonspace = match; - blank = false; - } - indent = first_nonspace - offset; - - if (indent >= CODE_INDENT) { - // indented code - if (this.tip.t != 'Paragraph' && !blank) { - offset += CODE_INDENT; - closeUnmatchedBlocks(this); - container = this.addChild('IndentedCode', line_number, offset); - } else { // indent > 4 in a lazy paragraph continuation - break; - } - - } else if (ln[first_nonspace] === '>') { - // blockquote - offset = first_nonspace + 1; - // optional following space - if (ln[offset] === ' ') { - offset++; - } - closeUnmatchedBlocks(this); - container = this.addChild('BlockQuote', line_number, offset); - - } else if ((match = ln.slice(first_nonspace).match(/^#{1,6}(?: +|$)/))) { - // ATX header - offset = first_nonspace + match[0].length; - closeUnmatchedBlocks(this); - container = this.addChild('ATXHeader', line_number, first_nonspace); - container.level = match[0].trim().length; // number of #s - // remove trailing ###s: - container.strings = - [ln.slice(offset).replace(/(?:(\\#) *#*| *#+) *$/,'$1')]; - break; - - } else if ((match = ln.slice(first_nonspace).match(/^`{3,}(?!.*`)|^~{3,}(?!.*~)/))) { - // fenced code block - var fence_length = match[0].length; - closeUnmatchedBlocks(this); - container = this.addChild('FencedCode', line_number, first_nonspace); - container.fence_length = fence_length; - container.fence_char = match[0][0]; - container.fence_offset = first_nonspace - offset; - offset = first_nonspace + fence_length; - break; - - } else if (matchAt(reHtmlBlockOpen, ln, first_nonspace) !== null) { - // html block - closeUnmatchedBlocks(this); - container = this.addChild('HtmlBlock', line_number, first_nonspace); - // note, we don't adjust offset because the tag is part of the text - break; - - } else if (container.t == 'Paragraph' && - container.strings.length === 1 && - ((match = ln.slice(first_nonspace).match(/^(?:=+|-+) *$/)))) { - // setext header line - closeUnmatchedBlocks(this); - container.t = 'SetextHeader'; // convert Paragraph to SetextHeader - container.level = match[0][0] === '=' ? 1 : 2; - offset = ln.length; - - } else if (matchAt(reHrule, ln, first_nonspace) !== null) { - // hrule - closeUnmatchedBlocks(this); - container = this.addChild('HorizontalRule', line_number, first_nonspace); - offset = ln.length - 1; - break; - - } else if ((data = parseListMarker(ln, first_nonspace))) { - // list item - closeUnmatchedBlocks(this); - data.marker_offset = indent; - offset = first_nonspace + data.padding; - - // add the list if needed - if (container.t !== 'List' || - !(listsMatch(container.list_data, data))) { - container = this.addChild('List', line_number, first_nonspace); - container.list_data = data; - } - - // add the list item - container = this.addChild('ListItem', line_number, first_nonspace); - container.list_data = data; - - } else { - break; + switch (block.t) { + case 'Paragraph': + block.string_content = block.strings.join('\n').replace(/^ */m,''); + + // try parsing the beginning as link reference definitions: + while (block.string_content[0] === '[' && + (pos = this.inlineParser.parseReference(block.string_content, + this.refmap))) { + block.string_content = block.string_content.slice(pos); + if (isBlank(block.string_content)) { + block.t = 'ReferenceDef'; + break; + } + } + break; - } + case 'ATXHeader': + case 'SetextHeader': + case 'HtmlBlock': + block.string_content = block.strings.join('\n'); + break; - if (acceptsLines(container.t)) { - // if it's a line container, it can't contain other containers - break; - } - } - - // What remains at the offset is a text line. Add the text to the - // appropriate container. - - match = matchAt(/[^ ]/, ln, offset); - if (match === null) { - first_nonspace = ln.length; - blank = true; - } else { - first_nonspace = match; - blank = false; - } - indent = first_nonspace - offset; - - // First check for a lazy paragraph continuation: - if (this.tip !== last_matched_container && - !blank && - this.tip.t == 'Paragraph' && - this.tip.strings.length > 0) { - // lazy paragraph continuation - - this.last_line_blank = false; - this.addLine(ln, offset); - - } else { // not a lazy continuation - - // finalize any blocks not matched - closeUnmatchedBlocks(this); - - // Block quote lines are never blank as they start with > - // and we don't count blanks in fenced code for purposes of tight/loose - // lists or breaking out of lists. We also don't set last_line_blank - // on an empty list item. - container.last_line_blank = blank && - !(container.t == 'BlockQuote' || - container.t == 'FencedCode' || - (container.t == 'ListItem' && - container.children.length === 0 && - container.start_line == line_number)); - - var cont = container; - while (cont.parent) { - cont.parent.last_line_blank = false; - cont = cont.parent; - } + case 'IndentedCode': + block.string_content = block.strings.join('\n').replace(/(\n *)*$/,'\n'); + break; - switch (container.t) { - case 'IndentedCode': - case 'HtmlBlock': - this.addLine(ln, offset); - break; - - case 'FencedCode': - // check for closing code fence: - match = (indent <= 3 && - ln[first_nonspace] == container.fence_char && - ln.slice(first_nonspace).match(/^(?:`{3,}|~{3,})(?= *$)/)); - if (match && match[0].length >= container.fence_length) { - // don't add closing fence to container; instead, close it: - this.finalize(container, line_number); - } else { - this.addLine(ln, offset); - } - break; - - case 'ATXHeader': - case 'SetextHeader': - case 'HorizontalRule': - // nothing to do; we already added the contents. - break; - - default: - if (acceptsLines(container.t)) { - this.addLine(ln, first_nonspace); - } else if (blank) { - // do nothing - } else if (container.t != 'HorizontalRule' && - container.t != 'SetextHeader') { - // create paragraph container for line - container = this.addChild('Paragraph', line_number, first_nonspace); - this.addLine(ln, first_nonspace); - } else { - console.log("Line " + line_number.toString() + - " with container type " + container.t + - " did not match any condition."); - - } - } - } -}; - -// Finalize a block. Close it and do any necessary postprocessing, -// e.g. creating string_content from strings, setting the 'tight' -// or 'loose' status of a list, and parsing the beginnings -// of paragraphs for reference definitions. Reset the tip to the -// parent of the closed block. -var finalize = function(block, line_number) { - var pos; - // don't do anything if the block is already closed - if (!block.open) { - return 0; - } - block.open = false; - if (line_number > block.start_line) { - block.end_line = line_number - 1; - } else { - block.end_line = line_number; - } - - switch (block.t) { - case 'Paragraph': - block.string_content = block.strings.join('\n').replace(/^ */m,''); - - // try parsing the beginning as link reference definitions: - while (block.string_content[0] === '[' && - (pos = this.inlineParser.parseReference(block.string_content, - this.refmap))) { - block.string_content = block.string_content.slice(pos); - if (isBlank(block.string_content)) { - block.t = 'ReferenceDef'; - break; - } - } - break; - - case 'ATXHeader': - case 'SetextHeader': - case 'HtmlBlock': - block.string_content = block.strings.join('\n'); - break; - - case 'IndentedCode': - block.string_content = block.strings.join('\n').replace(/(\n *)*$/,'\n'); - break; - - case 'FencedCode': - // first line becomes info string - block.info = unescape(block.strings[0].trim()); - if (block.strings.length == 1) { - block.string_content = ''; - } else { - block.string_content = block.strings.slice(1).join('\n') + '\n'; - } - break; - - case 'List': - block.tight = true; // tight by default - - var numitems = block.children.length; - var i = 0; - while (i < numitems) { - var item = block.children[i]; - // check for non-final list item ending with blank line: - var last_item = i == numitems - 1; - if (endsWithBlankLine(item) && !last_item) { - block.tight = false; - break; - } - // recurse into children of list item, to see if there are - // spaces between any of them: - var numsubitems = item.children.length; - var j = 0; - while (j < numsubitems) { - var subitem = item.children[j]; - var last_subitem = j == numsubitems - 1; - if (endsWithBlankLine(subitem) && !(last_item && last_subitem)) { - block.tight = false; - break; - } - j++; - } - i++; - } - break; - - default: - break; - } - - this.tip = block.parent || this.top; -}; - -// Walk through a block & children recursively, parsing string content -// into inline content where appropriate. -var processInlines = function(block) { - switch(block.t) { - case 'Paragraph': - case 'SetextHeader': - case 'ATXHeader': - block.inline_content = - this.inlineParser.parse(block.string_content.trim(), this.refmap); - block.string_content = ""; - break; - default: - break; - } - - if (block.children) { - for (var i = 0; i < block.children.length; i++) { - this.processInlines(block.children[i]); - } - } - -}; - -// The main parsing function. Returns a parsed document AST. -var parse = function(input) { - this.doc = makeBlock('Document', 1, 1); - this.tip = this.doc; - this.refmap = {}; - var lines = input.replace(/\n$/,'').split(/\r\n|\n|\r/); - var len = lines.length; - for (var i = 0; i < len; i++) { - this.incorporateLine(lines[i], i+1); - } - while (this.tip) { - this.finalize(this.tip, len - 1); - } - this.processInlines(this.doc); - return this.doc; -}; - - -// The DocParser object. -function DocParser(){ - return { - doc: makeBlock('Document', 1, 1), - tip: this.doc, - refmap: {}, - inlineParser: new InlineParser(), - breakOutOfLists: breakOutOfLists, - addLine: addLine, - addChild: addChild, - incorporateLine: incorporateLine, - finalize: finalize, - processInlines: processInlines, - parse: parse - }; -} - -// HTML RENDERER - -// Helper function to produce content in a pair of HTML tags. -var inTags = function(tag, attribs, contents, selfclosing) { - var result = '<' + tag; - if (attribs) { - var i = 0; - var attrib; - while ((attrib = attribs[i]) !== undefined) { - result = result.concat(' ', attrib[0], '="', attrib[1], '"'); - i++; + case 'FencedCode': + // first line becomes info string + block.info = unescape(block.strings[0].trim()); + if (block.strings.length == 1) { + block.string_content = ''; + } else { + block.string_content = block.strings.slice(1).join('\n') + '\n'; + } + break; + + case 'List': + block.tight = true; // tight by default + + var numitems = block.children.length; + var i = 0; + while (i < numitems) { + var item = block.children[i]; + // check for non-final list item ending with blank line: + var last_item = i == numitems - 1; + if (endsWithBlankLine(item) && !last_item) { + block.tight = false; + break; + } + // recurse into children of list item, to see if there are + // spaces between any of them: + var numsubitems = item.children.length; + var j = 0; + while (j < numsubitems) { + var subitem = item.children[j]; + var last_subitem = j == numsubitems - 1; + if (endsWithBlankLine(subitem) && !(last_item && last_subitem)) { + block.tight = false; + break; + } + j++; + } + i++; + } + break; + + default: + break; + } + + this.tip = block.parent || this.top; + }; + + // Walk through a block & children recursively, parsing string content + // into inline content where appropriate. + var processInlines = function(block) { + switch(block.t) { + case 'Paragraph': + case 'SetextHeader': + case 'ATXHeader': + block.inline_content = + this.inlineParser.parse(block.string_content.trim(), this.refmap); + block.string_content = ""; + break; + default: + break; + } + + if (block.children) { + for (var i = 0; i < block.children.length; i++) { + this.processInlines(block.children[i]); + } + } + + }; + + // The main parsing function. Returns a parsed document AST. + var parse = function(input) { + this.doc = makeBlock('Document', 1, 1); + this.tip = this.doc; + this.refmap = {}; + var lines = input.replace(/\n$/,'').split(/\r\n|\n|\r/); + var len = lines.length; + for (var i = 0; i < len; i++) { + this.incorporateLine(lines[i], i+1); + } + while (this.tip) { + this.finalize(this.tip, len - 1); + } + this.processInlines(this.doc); + return this.doc; + }; + + + // The DocParser object. + function DocParser(){ + return { + doc: makeBlock('Document', 1, 1), + tip: this.doc, + refmap: {}, + inlineParser: new InlineParser(), + breakOutOfLists: breakOutOfLists, + addLine: addLine, + addChild: addChild, + incorporateLine: incorporateLine, + finalize: finalize, + processInlines: processInlines, + parse: parse + }; } - } - if (contents) { - result = result.concat('>', contents, ''); - } else if (selfclosing) { - result = result + ' />'; - } else { - result = result.concat('>'); - } - return result; -}; - -// Render an inline element as HTML. -var renderInline = function(inline) { - var attrs; - switch (inline.t) { - case 'Str': - return this.escape(inline.c); - case 'Softbreak': - return this.softbreak; - case 'Hardbreak': - return inTags('br',[],"",true) + '\n'; - case 'Emph': - return inTags('em', [], this.renderInlines(inline.c)); - case 'Strong': - return inTags('strong', [], this.renderInlines(inline.c)); - case 'Html': - return inline.c; - case 'Entity': - return inline.c; - case 'Link': - attrs = [['href', this.escape(inline.destination, true)]]; - if (inline.title) { - attrs.push(['title', this.escape(inline.title, true)]); - } - return inTags('a', attrs, this.renderInlines(inline.label)); - case 'Image': - attrs = [['src', this.escape(inline.destination, true)], - ['alt', this.escape(this.renderInlines(inline.label))]]; - if (inline.title) { - attrs.push(['title', this.escape(inline.title, true)]); - } - return inTags('img', attrs, "", true); - case 'Code': - return inTags('code', [], this.escape(inline.c)); - default: - console.log("Uknown inline type " + inline.t); - return ""; - } -}; - -// Render a list of inlines. -var renderInlines = function(inlines) { - var result = ''; - for (var i=0; i < inlines.length; i++) { - result = result + this.renderInline(inlines[i]); - } - return result; -}; - -// Render a single block element. -var renderBlock = function(block, in_tight_list) { - var tag; - var attr; - var info_words; - switch (block.t) { - case 'Document': - var whole_doc = this.renderBlocks(block.children); - return (whole_doc === '' ? '' : whole_doc + '\n'); - case 'Paragraph': - if (in_tight_list) { - return this.renderInlines(block.inline_content); - } else { - return inTags('p', [], this.renderInlines(block.inline_content)); - } - break; - case 'BlockQuote': - var filling = this.renderBlocks(block.children); - return inTags('blockquote', [], filling === '' ? this.innersep : - this.innersep + this.renderBlocks(block.children) + this.innersep); - case 'ListItem': - return inTags('li', [], this.renderBlocks(block.children, in_tight_list).trim()); - case 'List': - tag = block.list_data.type == 'Bullet' ? 'ul' : 'ol'; - attr = (!block.list_data.start || block.list_data.start == 1) ? - [] : [['start', block.list_data.start.toString()]]; - return inTags(tag, attr, this.innersep + - this.renderBlocks(block.children, block.tight) + - this.innersep); - case 'ATXHeader': - case 'SetextHeader': - tag = 'h' + block.level; - return inTags(tag, [], this.renderInlines(block.inline_content)); - case 'IndentedCode': - return inTags('pre', [], - inTags('code', [], this.escape(block.string_content))); - case 'FencedCode': - info_words = block.info.split(/ +/); - attr = info_words.length === 0 || info_words[0].length === 0 ? - [] : [['class','language-' + - this.escape(info_words[0],true)]]; - return inTags('pre', [], - inTags('code', attr, this.escape(block.string_content))); - case 'HtmlBlock': - return block.string_content; - case 'ReferenceDef': - return ""; - case 'HorizontalRule': - return inTags('hr',[],"",true); - default: - console.log("Uknown block type " + block.t); - return ""; - } -}; - -// Render a list of block elements, separated by this.blocksep. -var renderBlocks = function(blocks, in_tight_list) { - var result = []; - for (var i=0; i < blocks.length; i++) { - if (blocks[i].t !== 'ReferenceDef') { - result.push(this.renderBlock(blocks[i], in_tight_list)); + + // HTML RENDERER + + // Helper function to produce content in a pair of HTML tags. + var inTags = function(tag, attribs, contents, selfclosing) { + var result = '<' + tag; + if (attribs) { + var i = 0; + var attrib; + while ((attrib = attribs[i]) !== undefined) { + result = result.concat(' ', attrib[0], '="', attrib[1], '"'); + i++; + } + } + if (contents) { + result = result.concat('>', contents, ''); + } else if (selfclosing) { + result = result + ' />'; + } else { + result = result.concat('>'); + } + return result; + }; + + // Render an inline element as HTML. + var renderInline = function(inline) { + var attrs; + switch (inline.t) { + case 'Str': + return this.escape(inline.c); + case 'Softbreak': + return this.softbreak; + case 'Hardbreak': + return inTags('br',[],"",true) + '\n'; + case 'Emph': + return inTags('em', [], this.renderInlines(inline.c)); + case 'Strong': + return inTags('strong', [], this.renderInlines(inline.c)); + case 'Html': + return inline.c; + case 'Entity': + return inline.c; + case 'Link': + attrs = [['href', this.escape(inline.destination, true)]]; + if (inline.title) { + attrs.push(['title', this.escape(inline.title, true)]); + } + return inTags('a', attrs, this.renderInlines(inline.label)); + case 'Image': + attrs = [['src', this.escape(inline.destination, true)], + ['alt', this.escape(this.renderInlines(inline.label))]]; + if (inline.title) { + attrs.push(['title', this.escape(inline.title, true)]); + } + return inTags('img', attrs, "", true); + case 'Code': + return inTags('code', [], this.escape(inline.c)); + default: + console.log("Uknown inline type " + inline.t); + return ""; + } + }; + + // Render a list of inlines. + var renderInlines = function(inlines) { + var result = ''; + for (var i=0; i < inlines.length; i++) { + result = result + this.renderInline(inlines[i]); + } + return result; + }; + + // Render a single block element. + var renderBlock = function(block, in_tight_list) { + var tag; + var attr; + var info_words; + switch (block.t) { + case 'Document': + var whole_doc = this.renderBlocks(block.children); + return (whole_doc === '' ? '' : whole_doc + '\n'); + case 'Paragraph': + if (in_tight_list) { + return this.renderInlines(block.inline_content); + } else { + return inTags('p', [], this.renderInlines(block.inline_content)); + } + break; + case 'BlockQuote': + var filling = this.renderBlocks(block.children); + return inTags('blockquote', [], filling === '' ? this.innersep : + this.innersep + this.renderBlocks(block.children) + this.innersep); + case 'ListItem': + return inTags('li', [], this.renderBlocks(block.children, in_tight_list).trim()); + case 'List': + tag = block.list_data.type == 'Bullet' ? 'ul' : 'ol'; + attr = (!block.list_data.start || block.list_data.start == 1) ? + [] : [['start', block.list_data.start.toString()]]; + return inTags(tag, attr, this.innersep + + this.renderBlocks(block.children, block.tight) + + this.innersep); + case 'ATXHeader': + case 'SetextHeader': + tag = 'h' + block.level; + return inTags(tag, [], this.renderInlines(block.inline_content)); + case 'IndentedCode': + return inTags('pre', [], + inTags('code', [], this.escape(block.string_content))); + case 'FencedCode': + info_words = block.info.split(/ +/); + attr = info_words.length === 0 || info_words[0].length === 0 ? + [] : [['class','language-' + + this.escape(info_words[0],true)]]; + return inTags('pre', [], + inTags('code', attr, this.escape(block.string_content))); + case 'HtmlBlock': + return block.string_content; + case 'ReferenceDef': + return ""; + case 'HorizontalRule': + return inTags('hr',[],"",true); + default: + console.log("Uknown block type " + block.t); + return ""; + } + }; + + // Render a list of block elements, separated by this.blocksep. + var renderBlocks = function(blocks, in_tight_list) { + var result = []; + for (var i=0; i < blocks.length; i++) { + if (blocks[i].t !== 'ReferenceDef') { + result.push(this.renderBlock(blocks[i], in_tight_list)); + } + } + return result.join(this.blocksep); + }; + + // The HtmlRenderer object. + function HtmlRenderer(){ + return { + // default options: + blocksep: '\n', // space between blocks + innersep: '\n', // space between block container tag and contents + softbreak: '\n', // by default, soft breaks are rendered as newlines in HTML + // set to "
" to make them hard breaks + // set to " " if you want to ignore line wrapping in source + escape: function(s, preserve_entities) { + if (preserve_entities) { + return s.replace(/[&](?![#](x[a-f0-9]{1,8}|[0-9]{1,8});|[a-z][a-z0-9]{1,31};)/gi,'&') + .replace(/[<]/g,'<') + .replace(/[>]/g,'>') + .replace(/["]/g,'"'); + } else { + return s.replace(/[&]/g,'&') + .replace(/[<]/g,'<') + .replace(/[>]/g,'>') + .replace(/["]/g,'"'); + } + }, + renderInline: renderInline, + renderInlines: renderInlines, + renderBlock: renderBlock, + renderBlocks: renderBlocks, + render: renderBlock + }; } - } - return result.join(this.blocksep); -}; - -// The HtmlRenderer object. -function HtmlRenderer(){ - return { - // default options: - blocksep: '\n', // space between blocks - innersep: '\n', // space between block container tag and contents - softbreak: '\n', // by default, soft breaks are rendered as newlines in HTML - // set to "
" to make them hard breaks - // set to " " if you want to ignore line wrapping in source - escape: function(s, preserve_entities) { - if (preserve_entities) { - return s.replace(/[&](?![#](x[a-f0-9]{1,8}|[0-9]{1,8});|[a-z][a-z0-9]{1,31};)/gi,'&') - .replace(/[<]/g,'<') - .replace(/[>]/g,'>') - .replace(/["]/g,'"'); - } else { - return s.replace(/[&]/g,'&') - .replace(/[<]/g,'<') - .replace(/[>]/g,'>') - .replace(/["]/g,'"'); - } - }, - renderInline: renderInline, - renderInlines: renderInlines, - renderBlock: renderBlock, - renderBlocks: renderBlocks, - render: renderBlock - }; -} - -exports.DocParser = DocParser; -exports.HtmlRenderer = HtmlRenderer; + + exports.DocParser = DocParser; + exports.HtmlRenderer = HtmlRenderer; })(typeof exports === 'undefined' ? this.stmd = {} : exports); -- cgit v1.2.3 From 0efcb9ff947ee9fcda77f317f2bec811160dca4a Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 11 Sep 2014 09:34:29 -0700 Subject: jshint improvements. --- js/stmd.js | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 6cf65d4..f7a1e4c 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -265,7 +265,7 @@ var startpos = this.pos; var c ; var first_close = 0; - var c = this.peek(); + c = this.peek(); if (!(c === '*' || c === '_')) { return null; } @@ -324,7 +324,7 @@ this.last_emphasis_closer = null; return inlines[0]; } - } else if (next_inline = this.parseInline()) { + } else if ((next_inline = this.parseInline())) { inlines.push(next_inline); } else { break; @@ -396,7 +396,9 @@ this.parseBackticks(); break; case '<': - this.parseAutolink() || this.parseHtmlTag() || this.parseString(); + if (!(this.parseAutolink())) { + this.parseHtmlTag(); + } break; case '[': // nested [] nest_level++; @@ -515,7 +517,7 @@ // a special meaning in markdown, as a plain string, adding to inlines. var parseString = function() { var m; - if (m = this.match(reMain)) { + if ((m = this.match(reMain))) { return { t: 'Str', c: m }; } else { return null; @@ -676,7 +678,7 @@ this.last_emphasis_closer = null; var inlines = []; var next_inline; - while (next_inline = this.parseInline()) { + while ((next_inline = this.parseInline())) { inlines.push(next_inline); } return inlines; -- cgit v1.2.3 From 026fd723dc8bc327b86096c489df5b8f8e9035ba Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 11 Sep 2014 10:39:51 -0700 Subject: Fixed typo. starting --- js/stmd.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index f7a1e4c..4b80581 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -1401,7 +1401,7 @@ case 'Code': return inTags('code', [], this.escape(inline.c)); default: - console.log("Uknown inline type " + inline.t); + console.log("Unknown inline type " + inline.t); return ""; } }; @@ -1465,7 +1465,7 @@ case 'HorizontalRule': return inTags('hr',[],"",true); default: - console.log("Uknown block type " + block.t); + console.log("Unknown block type " + block.t); return ""; } }; -- cgit v1.2.3 From 25f65e91293f1bfd74f81a78e2dac2cdbaa55e98 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 11 Sep 2014 10:53:40 -0700 Subject: Fixed performance regression from eccc23dc8d. --- js/stmd.js | 60 +++++++++++++++++++++++++++++------------------------------- 1 file changed, 29 insertions(+), 31 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 4b80581..187d058 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -297,38 +297,36 @@ // We need not look for closers if we have already recorded that // there are no closers past this point. - if (this.last_emphasis_closer === null || - this.last_emphasis_closer >= this.pos) { - while (true) { - res = this.scanDelims(c); - numclosedelims = res.numdelims; - if (res.can_close) { - if (last_emphasis_closer === null || - last_emphasis_closer < this.pos) { - last_emphasis_closer = this.pos; - } - if (numclosedelims === 3 && delims_to_match === 3) { - this.pos += 3; - this.last_emphasis_closer = null; - return {t: 'Strong', c: [{t: 'Emph', c: inlines}]}; - } else if (numclosedelims >= 2 && delims_to_match >= 2) { - delims_to_match -= 2; - this.pos += 2; - inlines = [{t: 'Strong', c: inlines}]; - } else if (numclosedelims >= 1 && delims_to_match >= 1) { - delims_to_match -= 1; - this.pos += 1; - inlines = [{t: 'Emph', c: inlines}]; - } - if (delims_to_match === 0) { - this.last_emphasis_closer = null; - return inlines[0]; - } - } else if ((next_inline = this.parseInline())) { - inlines.push(next_inline); - } else { - break; + while (this.last_emphasis_closer === null || + this.last_emphasis_closer >= this.pos) { + res = this.scanDelims(c); + numclosedelims = res.numdelims; + if (res.can_close) { + if (last_emphasis_closer === null || + last_emphasis_closer < this.pos) { + last_emphasis_closer = this.pos; } + if (numclosedelims === 3 && delims_to_match === 3) { + this.pos += 3; + this.last_emphasis_closer = null; + return {t: 'Strong', c: [{t: 'Emph', c: inlines}]}; + } else if (numclosedelims >= 2 && delims_to_match >= 2) { + delims_to_match -= 2; + this.pos += 2; + inlines = [{t: 'Strong', c: inlines}]; + } else if (numclosedelims >= 1 && delims_to_match >= 1) { + delims_to_match -= 1; + this.pos += 1; + inlines = [{t: 'Emph', c: inlines}]; + } + if (delims_to_match === 0) { + this.last_emphasis_closer = null; + return inlines[0]; + } + } else if ((next_inline = this.parseInline())) { + inlines.push(next_inline); + } else { + break; } } -- cgit v1.2.3 From e9f5a586938b926da932a9e957f801281dde4730 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 16 Sep 2014 11:42:11 -0700 Subject: New parseEmphasis algorithm. - State machine for emphasis parsing. - This would require some adjustments to the spec and spec examples. - It currently blows the stack on `tricky'. - Memoization code has been commented out. - Inline parsers return arrays. --- js/stmd.js | 293 +++++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 228 insertions(+), 65 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 187d058..9c84268 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -166,15 +166,15 @@ var match; while (!foundCode && (match = this.match(/`+/m))) { if (match == ticks) { - return { t: 'Code', c: this.subject.slice(afterOpenTicks, + return [{ t: 'Code', c: this.subject.slice(afterOpenTicks, this.pos - ticks.length) .replace(/[ \n]+/g,' ') - .trim() }; + .trim() }]; } } // If we got here, we didn't match a closing backtick sequence. this.pos = afterOpenTicks; - return { t: 'Str', c: ticks }; + return [{ t: 'Str', c: ticks }]; }; // Parse a backslash-escaped special character, adding either the escaped @@ -186,13 +186,13 @@ if (subj[pos] === '\\') { if (subj[pos + 1] === '\n') { this.pos = this.pos + 2; - return { t: 'Hardbreak' }; + return [{ t: 'Hardbreak' }]; } else if (reEscapable.test(subj[pos + 1])) { this.pos = this.pos + 2; - return { t: 'Str', c: subj[pos + 1] }; + return [{ t: 'Str', c: subj[pos + 1] }]; } else { this.pos++; - return {t: 'Str', c: '\\'}; + return [{t: 'Str', c: '\\'}]; } } else { return null; @@ -205,14 +205,14 @@ var dest; if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink dest = m.slice(1,-1); - return {t: 'Link', - label: [{ t: 'Str', c: dest }], - destination: 'mailto:' + dest }; + return [{t: 'Link', + label: [{ t: 'Str', c: dest }], + destination: 'mailto:' + dest }]; } else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) { dest = m.slice(1,-1); - return { t: 'Link', - label: [{ t: 'Str', c: dest }], - destination: dest }; + return [{ t: 'Link', + label: [{ t: 'Str', c: dest }], + destination: dest }]; } else { return null; } @@ -222,7 +222,7 @@ var parseHtmlTag = function() { var m = this.match(reHtmlTag); if (m) { - return { t: 'Html', c: m }; + return [{ t: 'Html', c: m }]; } else { return null; } @@ -285,60 +285,219 @@ if (numdelims >= 4 || !res.can_open) { this.pos += numdelims; - return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)}; + return [{t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)}]; } this.pos += numdelims; var next_inline; - var last_emphasis_closer = null; + var first = []; + var second = []; + var current = first; + var state = 0; - var delims_to_match = numdelims; + if (numdelims === 3) { + state = 1; + } else if (numdelims === 2) { + state = 2; + } else if (numdelims === 1) { + state = 3; + } - // We need not look for closers if we have already recorded that - // there are no closers past this point. - while (this.last_emphasis_closer === null || - this.last_emphasis_closer >= this.pos) { + while (true) { res = this.scanDelims(c); - numclosedelims = res.numdelims; - if (res.can_close) { - if (last_emphasis_closer === null || - last_emphasis_closer < this.pos) { - last_emphasis_closer = this.pos; + + switch (state) { + case 1: // ***a + if (res.numdelims === 3 && res.can_close) { + this.pos += 3; + return [{t: 'Strong', c: [{t: 'Emph', c: first}]}]; + } else if (res.numdelims === 2 && res.can_close) { + this.pos += 2; + current = second; + state = res.can_open ? 4 : 6; + continue; + } else if (res.numdelims === 1 && res.can_close) { + this.pos += 1; + current = second; + state = res.can_open ? 5 : 7; + continue; + } + break; + case 2: // **a + if (res.numdelims === 2 && res.can_close) { + this.pos += 2; + return [{t: 'Strong', c: first}]; + } else if (res.numdelims === 1 && res.can_open) { + this.pos += 1; + current = second; + state = 8; + continue; } - if (numclosedelims === 3 && delims_to_match === 3) { + break; + case 3: // *a + if (res.numdelims === 1 && res.can_close) { + this.pos += 1; + return [{t: 'Emph', c: first}]; + } else if (res.numdelims === 2 && res.can_open) { + this.pos += 2; + current = second; + state = 9; + continue; + } + break; + case 4: // ***a**b + if (res.numdelims === 3 && res.can_close) { this.pos += 3; - this.last_emphasis_closer = null; - return {t: 'Strong', c: [{t: 'Emph', c: inlines}]}; - } else if (numclosedelims >= 2 && delims_to_match >= 2) { - delims_to_match -= 2; + return [{t: 'Strong', + c: [{t: 'Emph', + c: first.concat( + [{t: 'Str', c: c+c}], + second)}]}]; + } else if (res.numdelims === 2 && res.can_close) { this.pos += 2; - inlines = [{t: 'Strong', c: inlines}]; - } else if (numclosedelims >= 1 && delims_to_match >= 1) { - delims_to_match -= 1; + return [{t: 'Strong', + c: [{t: 'Str', c: c+c+c}].concat( + first, + [{t: 'Strong', c: second}])}]; + } else if (res.numdelims === 1 && res.can_close) { this.pos += 1; - inlines = [{t: 'Emph', c: inlines}]; + return [{t: 'Emph', + c: [{t: 'Strong', c: first}].concat(second)}]; } - if (delims_to_match === 0) { - this.last_emphasis_closer = null; - return inlines[0]; + break; + case 5: // ***a*b + if (res.numdelims === 3 && res.can_close) { + this.pos += 3; + return [{t: 'Strong', + c: [{t: 'Emph', + c: first.concat( + [{t: 'Str', c: c}], + second)}]}]; + } else if (res.numdelims === 2 && res.can_close) { + this.pos += 2; + return [{t: 'Strong', + c: [{t: 'Emph', c: first}].concat(second)}]; + } else if (res.numdelims === 1 && res.can_close) { + this.pos += 1; + return [{t: 'Strong', + c: [{t: 'Str', c: c+c+c}].concat( + first, + [{t: 'Emph', c: second}])}]; } - } else if ((next_inline = this.parseInline())) { - inlines.push(next_inline); + break; + case 6: // ***a** b + if (res.numdelims === 3 && res.can_close) { + this.pos += 3; + return [{t: 'Strong', + c: [{t: 'Emph', + c: first.concat( + [{t: 'Str', c: c+c}], + second)}]}]; + } else if (res.numdelims === 1 && res.can_close) { + this.pos += 1; + return [{t: 'Emph', + c: [{t: 'Strong', c: first}].concat(second)}]; + } + break; + case 7: // ***a* b + if (res.numdelims === 3 && res.can_close) { + this.pos += 3; + return [{t: 'Strong', + c: [{t: 'Emph', + c: first.concat( + [{t: 'Str', c: c}], + second)}]}]; + } else if (res.numdelims === 2 && res.can_close) { + this.pos += 2; + return [{t: 'Strong', + c: [{t: 'Emph', c: first}].concat(second)}]; + } + break; + case 8: // **a *b + if (res.numdelims === 3 && res.can_close) { + this.pos += 3; + return [{t: 'Strong', + c: first.concat([{t: 'Emph', + c: second}])}]; + } else if (res.numdelims === 2 && res.can_close) { + this.pos += 2; + return [{t: 'Strong', + c: first.concat( + [{t: 'Str', c: c}], + second)}]; + } else if (res.numdelims === 1 && res.can_close) { + this.pos += 1; + return [{t: 'Str', c: c+c}].concat( + first, + [{t: 'Emph', c: second}]); + } + break; + case 9: // *a **b + if (res.numdelims === 3 && res.can_close) { + this.pos += 3; + return [{t: 'Emph', + c: first.concat([{t: 'Strong', + c: second}])}]; + } else if (res.numdelims === 2 && res.can_close) { + this.pos += 2; + return [{t: 'Str', c: c}].concat( + first, + [{t: 'Strong', c: second}]); + } else if (res.numdelims === 1 && res.can_close) { + this.pos += 1; + return [{t: 'Emph', + c: first.concat( + [{t: 'Str', c: c+c}], + second)}]; + } + break; + default: + break; + } + + if ((next_inline = this.parseInline())) { + Array.prototype.push.apply(current, next_inline); } else { break; } + } - // didn't find closing delimiter - this.pos = startpos + numdelims; - if (last_emphasis_closer === null) { - // we know there are no closers after startpos, so: - this.last_emphasis_closer = startpos; - } else { - this.last_emphasis_closer = last_emphasis_closer; + switch (state) { + case 1: // ***a + return [{t: 'Str', c: c+c+c}].concat(first); + case 2: // **a + return [{t: 'Str', c: c+c}].concat(first); + case 3: // *a + return [{t: 'Str', c: c}].concat(first); + case 4: // ***a**b + case 6: // ***a** b + return [{t: 'Str', c: c+c+c}] + .concat(first, + [{t: 'Str', c: c+c}], + second); + case 5: // ***a*b + case 7: // ***a* b + return [{t: 'Str', c: c+c+c}] + .concat(first, + [{t: 'Str', c: c}], + second); + case 8: // **a *b + return [{t: 'Str', c: c+c}] + .concat(first, + [{t: 'Str', c: c}], + second); + case 9: // *a **b + return [{t: 'Str', c: c}] + .concat(first, + [{t: 'Str', c: c+c}], + second); + default: + console.log("Unknown state, parseEmphasis"); + // shouldn't happen } - return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)}; + }; // Attempt to parse link title (sans quotes), returning the string @@ -461,10 +620,10 @@ (title = this.parseLinkTitle() || '') || true) && this.spnl() && this.match(/^\)/)) { - return { t: 'Link', - destination: dest, - title: title, - label: parseRawLabel(rawlabel) }; + return [{ t: 'Link', + destination: dest, + title: title, + label: parseRawLabel(rawlabel) }]; } else { this.pos = startpos; return null; @@ -488,10 +647,10 @@ // lookup rawlabel in refmap var link = this.refmap[normalizeReference(reflabel)]; if (link) { - return {t: 'Link', - destination: link.destination, - title: link.title, - label: parseRawLabel(rawlabel) }; + return [{t: 'Link', + destination: link.destination, + title: link.title, + label: parseRawLabel(rawlabel) }]; } else { this.pos = startpos; return null; @@ -505,7 +664,7 @@ var parseEntity = function() { var m; if ((m = this.match(/^&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});/i))) { - return { t: 'Entity', c: m }; + return [{ t: 'Entity', c: m }]; } else { return null; } @@ -516,7 +675,7 @@ var parseString = function() { var m; if ((m = this.match(reMain))) { - return { t: 'Str', c: m }; + return [{ t: 'Str', c: m }]; } else { return null; } @@ -528,9 +687,9 @@ var m = this.match(/^ *\n/); if (m) { if (m.length > 2) { - return { t: 'Hardbreak' }; + return [{ t: 'Hardbreak' }]; } else if (m.length > 0) { - return { t: 'Softbreak' }; + return [{ t: 'Softbreak' }]; } } return null; @@ -542,10 +701,10 @@ if (this.match(/^!/)) { var link = this.parseLink(); if (link) { - link.t = 'Image'; + link[0].t = 'Image'; return link; } else { - return { t: 'Str', c: '!' }; + return [{ t: 'Str', c: '!' }]; } } else { return null; @@ -615,11 +774,13 @@ // and returning the inline parsed. var parseInline = function() { var startpos = this.pos; + /* var memoized = this.memo[startpos]; if (memoized) { this.pos = memoized.endpos; return memoized.inline; } + */ var c = this.peek(); if (!c) { return null; @@ -658,12 +819,14 @@ } if (res === null) { this.pos += 1; - res = {t: 'Str', c: c}; + res = [{t: 'Str', c: c}]; } + /* if (res) { this.memo[startpos] = { inline: res, endpos: this.pos }; } + */ return res; }; @@ -672,12 +835,12 @@ this.subject = s; this.pos = 0; this.refmap = refmap || {}; - this.memo = {}; + // this.memo = {}; this.last_emphasis_closer = null; var inlines = []; var next_inline; while ((next_inline = this.parseInline())) { - inlines.push(next_inline); + Array.prototype.push.apply(inlines, next_inline); } return inlines; }; @@ -690,7 +853,7 @@ last_emphasis_closer: null, // used by parseEmphasis method pos: 0, refmap: {}, - memo: {}, + // memo: {}, match: match, peek: peek, spnl: spnl, -- cgit v1.2.3 From a2a6b7dd829bd7097aa52f5af7fbd66dd7e2c667 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 16 Sep 2014 14:15:55 -0700 Subject: Fixed bug in parsing `* **a** b*` etc. --- js/stmd.js | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 9c84268..157fe5f 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -428,9 +428,10 @@ second)}]; } else if (res.numdelims === 1 && res.can_close) { this.pos += 1; - return [{t: 'Str', c: c+c}].concat( - first, - [{t: 'Emph', c: second}]); + first = first.concat([{t: 'Emph', c: second}]); + current = first; + state = 2; + continue; } break; case 9: // *a **b @@ -441,9 +442,10 @@ c: second}])}]; } else if (res.numdelims === 2 && res.can_close) { this.pos += 2; - return [{t: 'Str', c: c}].concat( - first, - [{t: 'Strong', c: second}]); + first = first.concat([{t: 'Strong', c: second}]); + current = first; + state = 3; + continue; } else if (res.numdelims === 1 && res.can_close) { this.pos += 1; return [{t: 'Emph', -- cgit v1.2.3 From 1ffcc1d908a4b3f8c6e0c0ca0af7cc6cc4c28331 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 16 Sep 2014 14:29:22 -0700 Subject: Small performance tweaks. --- js/stmd.js | 303 +++++++++++++++++++++++++++++++------------------------------ 1 file changed, 155 insertions(+), 148 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 157fe5f..c5268d8 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -295,6 +295,8 @@ var second = []; var current = first; var state = 0; + var can_close = false; + var can_open = false; if (numdelims === 3) { state = 1; @@ -307,155 +309,160 @@ while (true) { res = this.scanDelims(c); - switch (state) { - case 1: // ***a - if (res.numdelims === 3 && res.can_close) { - this.pos += 3; - return [{t: 'Strong', c: [{t: 'Emph', c: first}]}]; - } else if (res.numdelims === 2 && res.can_close) { - this.pos += 2; - current = second; - state = res.can_open ? 4 : 6; - continue; - } else if (res.numdelims === 1 && res.can_close) { - this.pos += 1; - current = second; - state = res.can_open ? 5 : 7; - continue; - } - break; - case 2: // **a - if (res.numdelims === 2 && res.can_close) { - this.pos += 2; - return [{t: 'Strong', c: first}]; - } else if (res.numdelims === 1 && res.can_open) { - this.pos += 1; - current = second; - state = 8; - continue; - } - break; - case 3: // *a - if (res.numdelims === 1 && res.can_close) { - this.pos += 1; - return [{t: 'Emph', c: first}]; - } else if (res.numdelims === 2 && res.can_open) { - this.pos += 2; - current = second; - state = 9; - continue; - } - break; - case 4: // ***a**b - if (res.numdelims === 3 && res.can_close) { - this.pos += 3; - return [{t: 'Strong', - c: [{t: 'Emph', - c: first.concat( - [{t: 'Str', c: c+c}], - second)}]}]; - } else if (res.numdelims === 2 && res.can_close) { - this.pos += 2; - return [{t: 'Strong', - c: [{t: 'Str', c: c+c+c}].concat( - first, - [{t: 'Strong', c: second}])}]; - } else if (res.numdelims === 1 && res.can_close) { - this.pos += 1; - return [{t: 'Emph', - c: [{t: 'Strong', c: first}].concat(second)}]; - } - break; - case 5: // ***a*b - if (res.numdelims === 3 && res.can_close) { - this.pos += 3; - return [{t: 'Strong', - c: [{t: 'Emph', - c: first.concat( - [{t: 'Str', c: c}], - second)}]}]; - } else if (res.numdelims === 2 && res.can_close) { - this.pos += 2; - return [{t: 'Strong', - c: [{t: 'Emph', c: first}].concat(second)}]; - } else if (res.numdelims === 1 && res.can_close) { - this.pos += 1; - return [{t: 'Strong', - c: [{t: 'Str', c: c+c+c}].concat( - first, - [{t: 'Emph', c: second}])}]; - } - break; - case 6: // ***a** b - if (res.numdelims === 3 && res.can_close) { - this.pos += 3; - return [{t: 'Strong', - c: [{t: 'Emph', - c: first.concat( - [{t: 'Str', c: c+c}], - second)}]}]; - } else if (res.numdelims === 1 && res.can_close) { - this.pos += 1; - return [{t: 'Emph', - c: [{t: 'Strong', c: first}].concat(second)}]; - } - break; - case 7: // ***a* b - if (res.numdelims === 3 && res.can_close) { - this.pos += 3; - return [{t: 'Strong', - c: [{t: 'Emph', - c: first.concat( - [{t: 'Str', c: c}], - second)}]}]; - } else if (res.numdelims === 2 && res.can_close) { - this.pos += 2; - return [{t: 'Strong', - c: [{t: 'Emph', c: first}].concat(second)}]; - } - break; - case 8: // **a *b - if (res.numdelims === 3 && res.can_close) { - this.pos += 3; - return [{t: 'Strong', - c: first.concat([{t: 'Emph', - c: second}])}]; - } else if (res.numdelims === 2 && res.can_close) { - this.pos += 2; - return [{t: 'Strong', - c: first.concat( - [{t: 'Str', c: c}], - second)}]; - } else if (res.numdelims === 1 && res.can_close) { - this.pos += 1; - first = first.concat([{t: 'Emph', c: second}]); - current = first; - state = 2; - continue; - } - break; - case 9: // *a **b - if (res.numdelims === 3 && res.can_close) { - this.pos += 3; - return [{t: 'Emph', - c: first.concat([{t: 'Strong', - c: second}])}]; - } else if (res.numdelims === 2 && res.can_close) { - this.pos += 2; - first = first.concat([{t: 'Strong', c: second}]); - current = first; - state = 3; - continue; - } else if (res.numdelims === 1 && res.can_close) { - this.pos += 1; - return [{t: 'Emph', - c: first.concat( - [{t: 'Str', c: c+c}], - second)}]; + if (res) { + numdelims = res.numdelims; + can_close = res.can_close; + can_open = res.can_open; + switch (state) { + case 1: // ***a + if (numdelims === 3 && can_close) { + this.pos += 3; + return [{t: 'Strong', c: [{t: 'Emph', c: first}]}]; + } else if (numdelims === 2 && can_close) { + this.pos += 2; + current = second; + state = can_open ? 4 : 6; + continue; + } else if (numdelims === 1 && can_close) { + this.pos += 1; + current = second; + state = can_open ? 5 : 7; + continue; + } + break; + case 2: // **a + if (numdelims === 2 && can_close) { + this.pos += 2; + return [{t: 'Strong', c: first}]; + } else if (numdelims === 1 && can_open) { + this.pos += 1; + current = second; + state = 8; + continue; + } + break; + case 3: // *a + if (numdelims === 1 && can_close) { + this.pos += 1; + return [{t: 'Emph', c: first}]; + } else if (numdelims === 2 && can_open) { + this.pos += 2; + current = second; + state = 9; + continue; + } + break; + case 4: // ***a**b + if (numdelims === 3 && can_close) { + this.pos += 3; + return [{t: 'Strong', + c: [{t: 'Emph', + c: first.concat( + [{t: 'Str', c: c+c}], + second)}]}]; + } else if (numdelims === 2 && can_close) { + this.pos += 2; + return [{t: 'Strong', + c: [{t: 'Str', c: c+c+c}].concat( + first, + [{t: 'Strong', c: second}])}]; + } else if (numdelims === 1 && can_close) { + this.pos += 1; + return [{t: 'Emph', + c: [{t: 'Strong', c: first}].concat(second)}]; + } + break; + case 5: // ***a*b + if (numdelims === 3 && can_close) { + this.pos += 3; + return [{t: 'Strong', + c: [{t: 'Emph', + c: first.concat( + [{t: 'Str', c: c}], + second)}]}]; + } else if (numdelims === 2 && can_close) { + this.pos += 2; + return [{t: 'Strong', + c: [{t: 'Emph', c: first}].concat(second)}]; + } else if (numdelims === 1 && can_close) { + this.pos += 1; + return [{t: 'Strong', + c: [{t: 'Str', c: c+c+c}].concat( + first, + [{t: 'Emph', c: second}])}]; + } + break; + case 6: // ***a** b + if (numdelims === 3 && can_close) { + this.pos += 3; + return [{t: 'Strong', + c: [{t: 'Emph', + c: first.concat( + [{t: 'Str', c: c+c}], + second)}]}]; + } else if (numdelims === 1 && can_close) { + this.pos += 1; + return [{t: 'Emph', + c: [{t: 'Strong', c: first}].concat(second)}]; + } + break; + case 7: // ***a* b + if (numdelims === 3 && can_close) { + this.pos += 3; + return [{t: 'Strong', + c: [{t: 'Emph', + c: first.concat( + [{t: 'Str', c: c}], + second)}]}]; + } else if (numdelims === 2 && can_close) { + this.pos += 2; + return [{t: 'Strong', + c: [{t: 'Emph', c: first}].concat(second)}]; + } + break; + case 8: // **a *b + if (numdelims === 3 && can_close) { + this.pos += 3; + return [{t: 'Strong', + c: first.concat([{t: 'Emph', + c: second}])}]; + } else if (numdelims === 2 && can_close) { + this.pos += 2; + return [{t: 'Strong', + c: first.concat( + [{t: 'Str', c: c}], + second)}]; + } else if (numdelims === 1 && can_close) { + this.pos += 1; + first = first.concat([{t: 'Emph', c: second}]); + current = first; + state = 2; + continue; + } + break; + case 9: // *a **b + if (numdelims === 3 && can_close) { + this.pos += 3; + return [{t: 'Emph', + c: first.concat([{t: 'Strong', + c: second}])}]; + } else if (numdelims === 2 && can_close) { + this.pos += 2; + first = first.concat([{t: 'Strong', c: second}]); + current = first; + state = 3; + continue; + } else if (numdelims === 1 && can_close) { + this.pos += 1; + return [{t: 'Emph', + c: first.concat( + [{t: 'Str', c: c+c}], + second)}]; + } + break; + default: + break; } - break; - default: - break; } if ((next_inline = this.parseInline())) { -- cgit v1.2.3 From ac8529c9f55da7fdc1186e3f34313cf411de6e71 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 16 Sep 2014 22:04:54 -0700 Subject: Re-added backtracking and memoization. Gives better results for things like **foo* --- js/stmd.js | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index c5268d8..ea72b9e 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -289,6 +289,7 @@ } this.pos += numdelims; + var delimpos = this.pos; var next_inline; var first = []; @@ -472,36 +473,31 @@ } } + this.pos = startpos; + return null; switch (state) { case 1: // ***a - return [{t: 'Str', c: c+c+c}].concat(first); + return [{t: 'Emph', c: [{t: 'Str', c: c}]}].concat(first); case 2: // **a return [{t: 'Str', c: c+c}].concat(first); case 3: // *a return [{t: 'Str', c: c}].concat(first); case 4: // ***a**b case 6: // ***a** b - return [{t: 'Str', c: c+c+c}] - .concat(first, - [{t: 'Str', c: c+c}], - second); + return [{t: 'Strong', c: + [{t: 'Str', c: c}].concat(first)}].concat(second); case 5: // ***a*b case 7: // ***a* b - return [{t: 'Str', c: c+c+c}] - .concat(first, - [{t: 'Str', c: c}], - second); + return [{t: 'Emph', c: + [{t: 'Str', c: c+c}].concat(first)}].concat(second); case 8: // **a *b return [{t: 'Str', c: c+c}] .concat(first, [{t: 'Str', c: c}], second); case 9: // *a **b - return [{t: 'Str', c: c}] - .concat(first, - [{t: 'Str', c: c+c}], - second); + return [{t: 'Emph', c: first.concat([{t: 'Str', c: c}])}].concat(second); default: console.log("Unknown state, parseEmphasis"); // shouldn't happen @@ -783,13 +779,11 @@ // and returning the inline parsed. var parseInline = function() { var startpos = this.pos; - /* var memoized = this.memo[startpos]; if (memoized) { this.pos = memoized.endpos; return memoized.inline; } - */ var c = this.peek(); if (!c) { return null; @@ -830,12 +824,10 @@ this.pos += 1; res = [{t: 'Str', c: c}]; } - /* if (res) { this.memo[startpos] = { inline: res, endpos: this.pos }; } - */ return res; }; @@ -844,7 +836,7 @@ this.subject = s; this.pos = 0; this.refmap = refmap || {}; - // this.memo = {}; + this.memo = {}; this.last_emphasis_closer = null; var inlines = []; var next_inline; @@ -862,7 +854,7 @@ last_emphasis_closer: null, // used by parseEmphasis method pos: 0, refmap: {}, - // memo: {}, + memo: {}, match: match, peek: peek, spnl: spnl, -- cgit v1.2.3 From 459f08896d2adf09fa3e0a8ce1d2267921b2be5b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 16 Sep 2014 22:39:17 -0700 Subject: Revert "Re-added backtracking and memoization." This reverts commit ac8529c9f55da7fdc1186e3f34313cf411de6e71. --- js/stmd.js | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index ea72b9e..c5268d8 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -289,7 +289,6 @@ } this.pos += numdelims; - var delimpos = this.pos; var next_inline; var first = []; @@ -473,31 +472,36 @@ } } - this.pos = startpos; - return null; switch (state) { case 1: // ***a - return [{t: 'Emph', c: [{t: 'Str', c: c}]}].concat(first); + return [{t: 'Str', c: c+c+c}].concat(first); case 2: // **a return [{t: 'Str', c: c+c}].concat(first); case 3: // *a return [{t: 'Str', c: c}].concat(first); case 4: // ***a**b case 6: // ***a** b - return [{t: 'Strong', c: - [{t: 'Str', c: c}].concat(first)}].concat(second); + return [{t: 'Str', c: c+c+c}] + .concat(first, + [{t: 'Str', c: c+c}], + second); case 5: // ***a*b case 7: // ***a* b - return [{t: 'Emph', c: - [{t: 'Str', c: c+c}].concat(first)}].concat(second); + return [{t: 'Str', c: c+c+c}] + .concat(first, + [{t: 'Str', c: c}], + second); case 8: // **a *b return [{t: 'Str', c: c+c}] .concat(first, [{t: 'Str', c: c}], second); case 9: // *a **b - return [{t: 'Emph', c: first.concat([{t: 'Str', c: c}])}].concat(second); + return [{t: 'Str', c: c}] + .concat(first, + [{t: 'Str', c: c+c}], + second); default: console.log("Unknown state, parseEmphasis"); // shouldn't happen @@ -779,11 +783,13 @@ // and returning the inline parsed. var parseInline = function() { var startpos = this.pos; + /* var memoized = this.memo[startpos]; if (memoized) { this.pos = memoized.endpos; return memoized.inline; } + */ var c = this.peek(); if (!c) { return null; @@ -824,10 +830,12 @@ this.pos += 1; res = [{t: 'Str', c: c}]; } + /* if (res) { this.memo[startpos] = { inline: res, endpos: this.pos }; } + */ return res; }; @@ -836,7 +844,7 @@ this.subject = s; this.pos = 0; this.refmap = refmap || {}; - this.memo = {}; + // this.memo = {}; this.last_emphasis_closer = null; var inlines = []; var next_inline; @@ -854,7 +862,7 @@ last_emphasis_closer: null, // used by parseEmphasis method pos: 0, refmap: {}, - memo: {}, + // memo: {}, match: match, peek: peek, spnl: spnl, -- cgit v1.2.3 From 3307a5ac1d2819ecbde0763aef3102828e13ae44 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 17 Sep 2014 13:52:08 -0700 Subject: Use helper functions to simplify code. --- js/stmd.js | 122 ++++++++++++++++++++++++------------------------------------- 1 file changed, 47 insertions(+), 75 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index c5268d8..72e0306 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -260,6 +260,18 @@ can_close: can_close }; }; + var Emph = function(ils) { + return {t: 'Emph', c: ils}; + } + + var Strong = function(ils) { + return {t: 'Strong', c: ils}; + } + + var Str = function(s) { + return {t: 'Str', c: s}; + } + // Attempt to parse emphasis or strong emphasis. var parseEmphasis = function() { var startpos = this.pos; @@ -285,7 +297,7 @@ if (numdelims >= 4 || !res.can_open) { this.pos += numdelims; - return [{t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)}]; + return [Str(this.subject.slice(startpos, startpos + numdelims))]; } this.pos += numdelims; @@ -317,7 +329,7 @@ case 1: // ***a if (numdelims === 3 && can_close) { this.pos += 3; - return [{t: 'Strong', c: [{t: 'Emph', c: first}]}]; + return [Strong([Emph(first)])]; } else if (numdelims === 2 && can_close) { this.pos += 2; current = second; @@ -333,7 +345,7 @@ case 2: // **a if (numdelims === 2 && can_close) { this.pos += 2; - return [{t: 'Strong', c: first}]; + return [Strong(first)]; } else if (numdelims === 1 && can_open) { this.pos += 1; current = second; @@ -344,7 +356,7 @@ case 3: // *a if (numdelims === 1 && can_close) { this.pos += 1; - return [{t: 'Emph', c: first}]; + return [Emph(first)]; } else if (numdelims === 2 && can_open) { this.pos += 2; current = second; @@ -355,86 +367,59 @@ case 4: // ***a**b if (numdelims === 3 && can_close) { this.pos += 3; - return [{t: 'Strong', - c: [{t: 'Emph', - c: first.concat( - [{t: 'Str', c: c+c}], - second)}]}]; + return [Strong([Emph(first.concat([Str(c+c)], second))])]; } else if (numdelims === 2 && can_close) { this.pos += 2; - return [{t: 'Strong', - c: [{t: 'Str', c: c+c+c}].concat( - first, - [{t: 'Strong', c: second}])}]; + return [Strong([Str(c+c+c)].concat( + first, + [Strong(second)]))]; } else if (numdelims === 1 && can_close) { this.pos += 1; - return [{t: 'Emph', - c: [{t: 'Strong', c: first}].concat(second)}]; + return [Emph([Strong(first)].concat(second))]; } break; case 5: // ***a*b if (numdelims === 3 && can_close) { this.pos += 3; - return [{t: 'Strong', - c: [{t: 'Emph', - c: first.concat( - [{t: 'Str', c: c}], - second)}]}]; + return [Strong([Emph(first.concat([Str(c)], second))])]; } else if (numdelims === 2 && can_close) { this.pos += 2; - return [{t: 'Strong', - c: [{t: 'Emph', c: first}].concat(second)}]; + return [Strong([Emph(first)].concat(second))]; } else if (numdelims === 1 && can_close) { this.pos += 1; - return [{t: 'Strong', - c: [{t: 'Str', c: c+c+c}].concat( - first, - [{t: 'Emph', c: second}])}]; + return [Strong([Str(c+c+c)].concat( + first, + [Emph(second)]))]; } break; case 6: // ***a** b if (numdelims === 3 && can_close) { this.pos += 3; - return [{t: 'Strong', - c: [{t: 'Emph', - c: first.concat( - [{t: 'Str', c: c+c}], - second)}]}]; + return [Strong([Emph(first.concat([Str(c+c)], second))])]; } else if (numdelims === 1 && can_close) { this.pos += 1; - return [{t: 'Emph', - c: [{t: 'Strong', c: first}].concat(second)}]; + return [Emph([Strong(first)].concat(second))]; } break; case 7: // ***a* b if (numdelims === 3 && can_close) { this.pos += 3; - return [{t: 'Strong', - c: [{t: 'Emph', - c: first.concat( - [{t: 'Str', c: c}], - second)}]}]; + return [Strong([Emph(first.concat([Str(c)], second))])]; } else if (numdelims === 2 && can_close) { this.pos += 2; - return [{t: 'Strong', - c: [{t: 'Emph', c: first}].concat(second)}]; + return [Strong([Emph(first)].concat(second))]; } break; case 8: // **a *b if (numdelims === 3 && can_close) { this.pos += 3; - return [{t: 'Strong', - c: first.concat([{t: 'Emph', - c: second}])}]; + return [Strong(first.concat([Emph(second)]))]; } else if (numdelims === 2 && can_close) { this.pos += 2; - return [{t: 'Strong', - c: first.concat( - [{t: 'Str', c: c}], - second)}]; + return [Strong(first.concat([Str(c)], second))]; } else if (numdelims === 1 && can_close) { this.pos += 1; - first = first.concat([{t: 'Emph', c: second}]); + first.push(Emph(second)); current = first; state = 2; continue; @@ -443,21 +428,16 @@ case 9: // *a **b if (numdelims === 3 && can_close) { this.pos += 3; - return [{t: 'Emph', - c: first.concat([{t: 'Strong', - c: second}])}]; + return [(Emph(first.concat([Strong(second)])))]; } else if (numdelims === 2 && can_close) { this.pos += 2; - first = first.concat([{t: 'Strong', c: second}]); + first.push(Strong(second)); current = first; state = 3; continue; } else if (numdelims === 1 && can_close) { this.pos += 1; - return [{t: 'Emph', - c: first.concat( - [{t: 'Str', c: c+c}], - second)}]; + return [Emph(first.concat([Str(c+c)], second))]; } break; default: @@ -475,33 +455,25 @@ switch (state) { case 1: // ***a - return [{t: 'Str', c: c+c+c}].concat(first); + return [Str(c+c+c)].concat(first); case 2: // **a - return [{t: 'Str', c: c+c}].concat(first); + return [Str(c+c)].concat(first); case 3: // *a - return [{t: 'Str', c: c}].concat(first); + return [Str(c)].concat(first); case 4: // ***a**b case 6: // ***a** b - return [{t: 'Str', c: c+c+c}] - .concat(first, - [{t: 'Str', c: c+c}], - second); + return [Str(c+c+c)] + .concat(first, [Str(c+c)], second); case 5: // ***a*b case 7: // ***a* b - return [{t: 'Str', c: c+c+c}] - .concat(first, - [{t: 'Str', c: c}], - second); + return [Str(c+c+c)] + .concat(first, [Str(c)], second); case 8: // **a *b - return [{t: 'Str', c: c+c}] - .concat(first, - [{t: 'Str', c: c}], - second); + return [Str(c+c)] + .concat(first, [Str(c)], second); case 9: // *a **b - return [{t: 'Str', c: c}] - .concat(first, - [{t: 'Str', c: c+c}], - second); + return [Str(c)] + .concat(first, [Str(c+c)], second); default: console.log("Unknown state, parseEmphasis"); // shouldn't happen -- cgit v1.2.3 From 518eaeca38dfc6f840907f6bcc1ce28826801888 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 24 Sep 2014 22:22:51 -0700 Subject: Makefile: Use ?= so variables can be set on command line. --- Makefile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 55b6645..6abaa97 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,9 @@ -CFLAGS=-g -O3 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS) -LDFLAGS=-g -O3 -Wall -Werror -SRCDIR=src -DATADIR=data +CFLAGS?=-g -O3 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS) +LDFLAGS?=-g -O3 -Wall -Werror +SRCDIR?=src +DATADIR?=data -PROG=./stmd +PROG?=./stmd .PHONY: all oldtests test spec benchjs testjs all: $(SRCDIR)/case_fold_switch.c $(PROG) -- cgit v1.2.3 From 7f4b2f7f3949f807d5dafe2219280a0f1419b0e2 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 24 Sep 2014 22:23:09 -0700 Subject: Fixed bug that causes hang on bare `<` inside link label. --- js/stmd.js | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 72e0306..552fe16 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -534,9 +534,8 @@ this.parseBackticks(); break; case '<': - if (!(this.parseAutolink())) { - this.parseHtmlTag(); - } + this.parseAutolink() || this.parseHtmlTag() || + this.pos++; break; case '[': // nested [] nest_level++; -- cgit v1.2.3 From de2a35a4dcb3b051df328ec2c204f08c77a5ad3d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 26 Sep 2014 10:45:51 -0700 Subject: Simple fallback if we don't match emphasis. The other approach led to wrong results on: *hi _there* --- js/stmd.js | 31 ++++++------------------------- 1 file changed, 6 insertions(+), 25 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 552fe16..589ac03 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -302,6 +302,9 @@ this.pos += numdelims; + var fallbackpos = this.pos; + var fallback = Str(this.subject.slice(startpos, fallbackpos)); + var next_inline; var first = []; var second = []; @@ -453,31 +456,9 @@ } - switch (state) { - case 1: // ***a - return [Str(c+c+c)].concat(first); - case 2: // **a - return [Str(c+c)].concat(first); - case 3: // *a - return [Str(c)].concat(first); - case 4: // ***a**b - case 6: // ***a** b - return [Str(c+c+c)] - .concat(first, [Str(c+c)], second); - case 5: // ***a*b - case 7: // ***a* b - return [Str(c+c+c)] - .concat(first, [Str(c)], second); - case 8: // **a *b - return [Str(c+c)] - .concat(first, [Str(c)], second); - case 9: // *a **b - return [Str(c)] - .concat(first, [Str(c+c)], second); - default: - console.log("Unknown state, parseEmphasis"); - // shouldn't happen - } + // we didn't match emphasis: fallback + this.pos = fallbackpos; + return [fallback]; }; -- cgit v1.2.3 From 50d87813fc96ea8d5c2610f3fad134f8d4f8e286 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 26 Sep 2014 10:47:46 -0700 Subject: Removed memoization code. --- js/stmd.js | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 589ac03..5a09875 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -735,13 +735,6 @@ // and returning the inline parsed. var parseInline = function() { var startpos = this.pos; - /* - var memoized = this.memo[startpos]; - if (memoized) { - this.pos = memoized.endpos; - return memoized.inline; - } - */ var c = this.peek(); if (!c) { return null; @@ -782,12 +775,6 @@ this.pos += 1; res = [{t: 'Str', c: c}]; } - /* - if (res) { - this.memo[startpos] = { inline: res, - endpos: this.pos }; - } - */ return res; }; @@ -796,7 +783,6 @@ this.subject = s; this.pos = 0; this.refmap = refmap || {}; - // this.memo = {}; this.last_emphasis_closer = null; var inlines = []; var next_inline; @@ -814,7 +800,6 @@ last_emphasis_closer: null, // used by parseEmphasis method pos: 0, refmap: {}, - // memo: {}, match: match, peek: peek, spnl: spnl, -- cgit v1.2.3 From 151cb9e51b25bfd644e1920c078ca894fc9e7e9d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 26 Sep 2014 11:01:20 -0700 Subject: Used last_emphasis_closer to avoid unneeded scans for closer. This doesn't seem to help much. --- js/stmd.js | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/js/stmd.js b/js/stmd.js index 5a09875..287a0c9 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -312,6 +312,7 @@ var state = 0; var can_close = false; var can_open = false; + var last_emphasis_closer = null; if (numdelims === 3) { state = 1; @@ -322,11 +323,17 @@ } while (true) { + if (this.last_emphasis_closer[c] < this.pos) { + break; + } res = this.scanDelims(c); if (res) { numdelims = res.numdelims; can_close = res.can_close; + if (can_close) { + last_emphasis_closer = this.pos; + } can_open = res.can_open; switch (state) { case 1: // ***a @@ -458,6 +465,9 @@ // we didn't match emphasis: fallback this.pos = fallbackpos; + if (last_emphasis_closer) { + this.last_emphasis_closer[c] = last_emphasis_closer; + } return [fallback]; }; @@ -783,7 +793,7 @@ this.subject = s; this.pos = 0; this.refmap = refmap || {}; - this.last_emphasis_closer = null; + this.last_emphasis_closer = { '*': s.length, '_': s.length }; var inlines = []; var next_inline; while ((next_inline = this.parseInline())) { -- cgit v1.2.3 From 78ad57d6919c20831c8f6d3455a72d431afd1715 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 26 Sep 2014 11:05:10 -0700 Subject: Restored memoization code. --- js/stmd.js | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/js/stmd.js b/js/stmd.js index 287a0c9..3da719f 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -745,6 +745,13 @@ // and returning the inline parsed. var parseInline = function() { var startpos = this.pos; + + var memoized = this.memo[startpos]; + if (memoized) { + this.pos = memoized.endpos; + return memoized.inline; + } + var c = this.peek(); if (!c) { return null; @@ -785,6 +792,12 @@ this.pos += 1; res = [{t: 'Str', c: c}]; } + + if (res) { + this.memo[startpos] = { inline: res, + endpos: this.pos }; + } + return res; }; @@ -793,6 +806,7 @@ this.subject = s; this.pos = 0; this.refmap = refmap || {}; + this.memo = {}; this.last_emphasis_closer = { '*': s.length, '_': s.length }; var inlines = []; var next_inline; @@ -810,6 +824,7 @@ last_emphasis_closer: null, // used by parseEmphasis method pos: 0, refmap: {}, + memo: {}, match: match, peek: peek, spnl: spnl, -- cgit v1.2.3 From 2d43050a1c62a3e6a7ef5e0d286828adc72e4bb4 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 26 Sep 2014 11:11:01 -0700 Subject: Only memoize during inline parsing. This cuts the performance hit. With memoization, we get roughly constant behavior in the fuzztest. Without it, not. --- js/stmd.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 3da719f..221dbef 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -455,7 +455,7 @@ } } - if ((next_inline = this.parseInline())) { + if ((next_inline = this.parseInline(true))) { Array.prototype.push.apply(current, next_inline); } else { break; @@ -743,10 +743,10 @@ // Parse the next inline element in subject, advancing subject position // and returning the inline parsed. - var parseInline = function() { + var parseInline = function(memoize) { var startpos = this.pos; - var memoized = this.memo[startpos]; + var memoized = memoize && this.memo[startpos]; if (memoized) { this.pos = memoized.endpos; return memoized.inline; @@ -793,7 +793,7 @@ res = [{t: 'Str', c: c}]; } - if (res) { + if (res && memoize) { this.memo[startpos] = { inline: res, endpos: this.pos }; } -- cgit v1.2.3 From de1e28217f0da80b928bca0ca09541c0401314ee Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 29 Sep 2014 22:58:22 -0700 Subject: Use charAt for browser compatibility. --- js/stmd.js | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 221dbef..b9ce5ee 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -140,7 +140,7 @@ // Returns the character at the current subject position, or null if // there are no more characters. var peek = function() { - return this.subject[this.pos] || null; + return this.subject.charAt(this.pos) || null; }; // Parse zero or more space characters, including at most one newline @@ -183,13 +183,13 @@ var parseBackslash = function() { var subj = this.subject, pos = this.pos; - if (subj[pos] === '\\') { - if (subj[pos + 1] === '\n') { + if (subj.charAt(pos) === '\\') { + if (subj.charAt(pos + 1) === '\n') { this.pos = this.pos + 2; return [{ t: 'Hardbreak' }]; - } else if (reEscapable.test(subj[pos + 1])) { + } else if (reEscapable.test(subj.charAt(pos + 1))) { this.pos = this.pos + 2; - return [{ t: 'Str', c: subj[pos + 1] }]; + return [{ t: 'Str', c: subj.charAt(pos + 1) }]; } else { this.pos++; return [{t: 'Str', c: '\\'}]; @@ -239,7 +239,7 @@ var startpos = this.pos; char_before = this.pos === 0 ? '\n' : - this.subject[this.pos - 1]; + this.subject.charAt(this.pos - 1); while (this.peek() === c) { numdelims++; @@ -587,7 +587,7 @@ ((dest = this.parseLinkDestination()) !== null) && this.spnl() && // make sure there's a space before the title: - (/^\s/.test(this.subject[this.pos - 1]) && + (/^\s/.test(this.subject.charAt(this.pos - 1)) && (title = this.parseLinkTitle() || '') || true) && this.spnl() && this.match(/^\)/)) { @@ -1034,10 +1034,10 @@ switch (container.t) { case 'BlockQuote': - var matched = indent <= 3 && ln[first_nonspace] === '>'; + var matched = indent <= 3 && ln.charAt(first_nonspace) === '>'; if (matched) { offset = first_nonspace + 1; - if (ln[offset] === ' ') { + if (ln.charAt(offset) === ' ') { offset++; } } else { @@ -1077,7 +1077,7 @@ case 'FencedCode': // skip optional spaces of fence offset i = container.fence_offset; - while (i > 0 && ln[offset] === ' ') { + while (i > 0 && ln.charAt(offset) === ' ') { offset++; i--; } @@ -1154,11 +1154,11 @@ break; } - } else if (ln[first_nonspace] === '>') { + } else if (ln.charAt(first_nonspace) === '>') { // blockquote offset = first_nonspace + 1; // optional following space - if (ln[offset] === ' ') { + if (ln.charAt(offset) === ' ') { offset++; } closeUnmatchedBlocks(this); @@ -1291,7 +1291,7 @@ case 'FencedCode': // check for closing code fence: match = (indent <= 3 && - ln[first_nonspace] == container.fence_char && + ln.charAt(first_nonspace) == container.fence_char && ln.slice(first_nonspace).match(/^(?:`{3,}|~{3,})(?= *$)/)); if (match && match[0].length >= container.fence_length) { // don't add closing fence to container; instead, close it: @@ -1350,7 +1350,7 @@ block.string_content = block.strings.join('\n').replace(/^ */m,''); // try parsing the beginning as link reference definitions: - while (block.string_content[0] === '[' && + while (block.string_content.charAt(0) === '[' && (pos = this.inlineParser.parseReference(block.string_content, this.refmap))) { block.string_content = block.string_content.slice(pos); -- cgit v1.2.3 From 5e6a28c965d6b036b413500a070059585ddfdbe9 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 29 Sep 2014 22:46:52 -0700 Subject: Escape URIs. --- js/stmd.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index b9ce5ee..30eceb2 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -207,12 +207,12 @@ dest = m.slice(1,-1); return [{t: 'Link', label: [{ t: 'Str', c: dest }], - destination: 'mailto:' + dest }]; + destination: 'mailto:' + encodeURI(dest) }]; } else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) { dest = m.slice(1,-1); return [{ t: 'Link', label: [{ t: 'Str', c: dest }], - destination: dest }]; + destination: encodeURI(dest) }]; } else { return null; } @@ -489,11 +489,11 @@ var parseLinkDestination = function() { var res = this.match(reLinkDestinationBraces); if (res) { // chop off surrounding <..>: - return unescape(res.substr(1, res.length - 2)); + return encodeURI(unescape(res.substr(1, res.length - 2))); } else { res = this.match(reLinkDestination); if (res !== null) { - return unescape(res); + return encodeURI(unescape(res)); } else { return null; } -- cgit v1.2.3 From 8cabf96510bb17f80d0b849f7e97ebe54c779eb7 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 29 Sep 2014 23:05:02 -0700 Subject: Rename unescape -> unescapeBS to avoid confusion with built-in. --- js/stmd.js | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 30eceb2..97120ed 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -77,7 +77,7 @@ // UTILITY FUNCTIONS // Replace backslash escapes with literal characters. - var unescape = function(s) { + var unescapeBS = function(s) { return s.replace(reAllEscapedChar, '$1'); }; @@ -478,7 +478,7 @@ var title = this.match(reLinkTitle); if (title) { // chop off quotes from title and unescape: - return unescape(title.substr(1, title.length - 2)); + return unescapeBS(title.substr(1, title.length - 2)); } else { return null; } @@ -489,11 +489,11 @@ var parseLinkDestination = function() { var res = this.match(reLinkDestinationBraces); if (res) { // chop off surrounding <..>: - return encodeURI(unescape(res.substr(1, res.length - 2))); + return encodeURI(unescapeBS(res.substr(1, res.length - 2))); } else { res = this.match(reLinkDestination); if (res !== null) { - return encodeURI(unescape(res)); + return encodeURI(unescapeBS(res)); } else { return null; } @@ -1373,7 +1373,7 @@ case 'FencedCode': // first line becomes info string - block.info = unescape(block.strings[0].trim()); + block.info = unescapeBS(block.strings[0].trim()); if (block.strings.length == 1) { block.string_content = ''; } else { -- cgit v1.2.3 From 840a6a326f5885137922517c80bce0a1005d5c71 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 30 Sep 2014 21:34:47 -0700 Subject: Added entity decoding. AST now contains parses entities as Str objects with unicode characters, not as 'Entity'. (Like the new C parser.) --- js/stmd.js | 2144 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 2141 insertions(+), 3 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 97120ed..2a63d23 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -11,6 +11,2132 @@ (function(exports) { + var entities = { AAacute: 'Á', + aacute: 'á', + Abreve: 'Ă', + abreve: 'ă', + ac: '∾', + acd: '∿', + acE: '∾', + Acirc: 'Â', + acirc: 'â', + acute: '´', + Acy: 'А', + acy: 'а', + AElig: 'Æ', + aelig: 'æ', + af: '⁡', + Afr: '𝔄', + afr: '𝔞', + Agrave: 'À', + agrave: 'à', + alefsym: 'ℵ', + aleph: 'ℵ', + Alpha: 'Α', + alpha: 'α', + Amacr: 'Ā', + amacr: 'ā', + amalg: '⨿', + amp: '&', + AMP: '&', + andand: '⩕', + And: '⩓', + and: '∧', + andd: '⩜', + andslope: '⩘', + andv: '⩚', + ang: '∠', + ange: '⦤', + angle: '∠', + angmsdaa: '⦨', + angmsdab: '⦩', + angmsdac: '⦪', + angmsdad: '⦫', + angmsdae: '⦬', + angmsdaf: '⦭', + angmsdag: '⦮', + angmsdah: '⦯', + angmsd: '∡', + angrt: '∟', + angrtvb: '⊾', + angrtvbd: '⦝', + angsph: '∢', + angst: 'Å', + angzarr: '⍼', + Aogon: 'Ą', + aogon: 'ą', + Aopf: '𝔸', + aopf: '𝕒', + apacir: '⩯', + ap: '≈', + apE: '⩰', + ape: '≊', + apid: '≋', + apos: '\'', + ApplyFunction: '⁡', + approx: '≈', + approxeq: '≊', + Aring: 'Å', + aring: 'å', + Ascr: '𝒜', + ascr: '𝒶', + Assign: '≔', + ast: '*', + asymp: '≈', + asympeq: '≍', + Atilde: 'Ã', + atilde: 'ã', + Auml: 'Ä', + auml: 'ä', + awconint: '∳', + awint: '⨑', + backcong: '≌', + backepsilon: '϶', + backprime: '‵', + backsim: '∽', + backsimeq: '⋍', + Backslash: '∖', + Barv: '⫧', + barvee: '⊽', + barwed: '⌅', + Barwed: '⌆', + barwedge: '⌅', + bbrk: '⎵', + bbrktbrk: '⎶', + bcong: '≌', + Bcy: 'Б', + bcy: 'б', + bdquo: '„', + becaus: '∵', + because: '∵', + Because: '∵', + bemptyv: '⦰', + bepsi: '϶', + bernou: 'ℬ', + Bernoullis: 'ℬ', + Beta: 'Β', + beta: 'β', + beth: 'ℶ', + between: '≬', + Bfr: '𝔅', + bfr: '𝔟', + bigcap: '⋂', + bigcirc: '◯', + bigcup: '⋃', + bigodot: '⨀', + bigoplus: '⨁', + bigotimes: '⨂', + bigsqcup: '⨆', + bigstar: '★', + bigtriangledown: '▽', + bigtriangleup: '△', + biguplus: '⨄', + bigvee: '⋁', + bigwedge: '⋀', + bkarow: '⤍', + blacklozenge: '⧫', + blacksquare: '▪', + blacktriangle: '▴', + blacktriangledown: '▾', + blacktriangleleft: '◂', + blacktriangleright: '▸', + blank: '␣', + blk12: '▒', + blk14: '░', + blk34: '▓', + block: '█', + bne: '=', + bnequiv: '≡', + bNot: '⫭', + bnot: '⌐', + Bopf: '𝔹', + bopf: '𝕓', + bot: '⊥', + bottom: '⊥', + bowtie: '⋈', + boxbox: '⧉', + boxdl: '┐', + boxdL: '╕', + boxDl: '╖', + boxDL: '╗', + boxdr: '┌', + boxdR: '╒', + boxDr: '╓', + boxDR: '╔', + boxh: '─', + boxH: '═', + boxhd: '┬', + boxHd: '╤', + boxhD: '╥', + boxHD: '╦', + boxhu: '┴', + boxHu: '╧', + boxhU: '╨', + boxHU: '╩', + boxminus: '⊟', + boxplus: '⊞', + boxtimes: '⊠', + boxul: '┘', + boxuL: '╛', + boxUl: '╜', + boxUL: '╝', + boxur: '└', + boxuR: '╘', + boxUr: '╙', + boxUR: '╚', + boxv: '│', + boxV: '║', + boxvh: '┼', + boxvH: '╪', + boxVh: '╫', + boxVH: '╬', + boxvl: '┤', + boxvL: '╡', + boxVl: '╢', + boxVL: '╣', + boxvr: '├', + boxvR: '╞', + boxVr: '╟', + boxVR: '╠', + bprime: '‵', + breve: '˘', + Breve: '˘', + brvbar: '¦', + bscr: '𝒷', + Bscr: 'ℬ', + bsemi: '⁏', + bsim: '∽', + bsime: '⋍', + bsolb: '⧅', + bsol: '\\', + bsolhsub: '⟈', + bull: '•', + bullet: '•', + bump: '≎', + bumpE: '⪮', + bumpe: '≏', + Bumpeq: '≎', + bumpeq: '≏', + Cacute: 'Ć', + cacute: 'ć', + capand: '⩄', + capbrcup: '⩉', + capcap: '⩋', + cap: '∩', + Cap: '⋒', + capcup: '⩇', + capdot: '⩀', + CapitalDifferentialD: 'ⅅ', + caps: '∩', + caret: '⁁', + caron: 'ˇ', + Cayleys: 'ℭ', + ccaps: '⩍', + Ccaron: 'Č', + ccaron: 'č', + Ccedil: 'Ç', + ccedil: 'ç', + Ccirc: 'Ĉ', + ccirc: 'ĉ', + Cconint: '∰', + ccups: '⩌', + ccupssm: '⩐', + Cdot: 'Ċ', + cdot: 'ċ', + cedil: '¸', + Cedilla: '¸', + cemptyv: '⦲', + cent: '¢', + centerdot: '·', + CenterDot: '·', + cfr: '𝔠', + Cfr: 'ℭ', + CHcy: 'Ч', + chcy: 'ч', + check: '✓', + checkmark: '✓', + Chi: 'Χ', + chi: 'χ', + circ: 'ˆ', + circeq: '≗', + circlearrowleft: '↺', + circlearrowright: '↻', + circledast: '⊛', + circledcirc: '⊚', + circleddash: '⊝', + CircleDot: '⊙', + circledR: '®', + circledS: 'Ⓢ', + CircleMinus: '⊖', + CirclePlus: '⊕', + CircleTimes: '⊗', + cir: '○', + cirE: '⧃', + cire: '≗', + cirfnint: '⨐', + cirmid: '⫯', + cirscir: '⧂', + ClockwiseContourIntegral: '∲', + CloseCurlyDoubleQuote: '”', + CloseCurlyQuote: '’', + clubs: '♣', + clubsuit: '♣', + colon: ':', + Colon: '∷', + Colone: '⩴', + colone: '≔', + coloneq: '≔', + comma: ',', + commat: '@', + comp: '∁', + compfn: '∘', + complement: '∁', + complexes: 'ℂ', + cong: '≅', + congdot: '⩭', + Congruent: '≡', + conint: '∮', + Conint: '∯', + ContourIntegral: '∮', + copf: '𝕔', + Copf: 'ℂ', + coprod: '∐', + Coproduct: '∐', + copy: '©', + COPY: '©', + copysr: '℗', + CounterClockwiseContourIntegral: '∳', + crarr: '↵', + cross: '✗', + Cross: '⨯', + Cscr: '𝒞', + cscr: '𝒸', + csub: '⫏', + csube: '⫑', + csup: '⫐', + csupe: '⫒', + ctdot: '⋯', + cudarrl: '⤸', + cudarrr: '⤵', + cuepr: '⋞', + cuesc: '⋟', + cularr: '↶', + cularrp: '⤽', + cupbrcap: '⩈', + cupcap: '⩆', + CupCap: '≍', + cup: '∪', + Cup: '⋓', + cupcup: '⩊', + cupdot: '⊍', + cupor: '⩅', + cups: '∪', + curarr: '↷', + curarrm: '⤼', + curlyeqprec: '⋞', + curlyeqsucc: '⋟', + curlyvee: '⋎', + curlywedge: '⋏', + curren: '¤', + curvearrowleft: '↶', + curvearrowright: '↷', + cuvee: '⋎', + cuwed: '⋏', + cwconint: '∲', + cwint: '∱', + cylcty: '⌭', + dagger: '†', + Dagger: '‡', + daleth: 'ℸ', + darr: '↓', + Darr: '↡', + dArr: '⇓', + dash: '‐', + Dashv: '⫤', + dashv: '⊣', + dbkarow: '⤏', + dblac: '˝', + Dcaron: 'Ď', + dcaron: 'ď', + Dcy: 'Д', + dcy: 'д', + ddagger: '‡', + ddarr: '⇊', + DD: 'ⅅ', + dd: 'ⅆ', + DDotrahd: '⤑', + ddotseq: '⩷', + deg: '°', + Del: '∇', + Delta: 'Δ', + delta: 'δ', + demptyv: '⦱', + dfisht: '⥿', + Dfr: '𝔇', + dfr: '𝔡', + dHar: '⥥', + dharl: '⇃', + dharr: '⇂', + DiacriticalAcute: '´', + DiacriticalDot: '˙', + DiacriticalDoubleAcute: '˝', + DiacriticalGrave: '`', + DiacriticalTilde: '˜', + diam: '⋄', + diamond: '⋄', + Diamond: '⋄', + diamondsuit: '♦', + diams: '♦', + die: '¨', + DifferentialD: 'ⅆ', + digamma: 'ϝ', + disin: '⋲', + div: '÷', + divide: '÷', + divideontimes: '⋇', + divonx: '⋇', + DJcy: 'Ђ', + djcy: 'ђ', + dlcorn: '⌞', + dlcrop: '⌍', + dollar: '$', + Dopf: '𝔻', + dopf: '𝕕', + Dot: '¨', + dot: '˙', + DotDot: '⃜', + doteq: '≐', + doteqdot: '≑', + DotEqual: '≐', + dotminus: '∸', + dotplus: '∔', + dotsquare: '⊡', + doublebarwedge: '⌆', + DoubleContourIntegral: '∯', + DoubleDot: '¨', + DoubleDownArrow: '⇓', + DoubleLeftArrow: '⇐', + DoubleLeftRightArrow: '⇔', + DoubleLeftTee: '⫤', + DoubleLongLeftArrow: '⟸', + DoubleLongLeftRightArrow: '⟺', + DoubleLongRightArrow: '⟹', + DoubleRightArrow: '⇒', + DoubleRightTee: '⊨', + DoubleUpArrow: '⇑', + DoubleUpDownArrow: '⇕', + DoubleVerticalBar: '∥', + DownArrowBar: '⤓', + downarrow: '↓', + DownArrow: '↓', + Downarrow: '⇓', + DownArrowUpArrow: '⇵', + DownBreve: '̑', + downdownarrows: '⇊', + downharpoonleft: '⇃', + downharpoonright: '⇂', + DownLeftRightVector: '⥐', + DownLeftTeeVector: '⥞', + DownLeftVectorBar: '⥖', + DownLeftVector: '↽', + DownRightTeeVector: '⥟', + DownRightVectorBar: '⥗', + DownRightVector: '⇁', + DownTeeArrow: '↧', + DownTee: '⊤', + drbkarow: '⤐', + drcorn: '⌟', + drcrop: '⌌', + Dscr: '𝒟', + dscr: '𝒹', + DScy: 'Ѕ', + dscy: 'ѕ', + dsol: '⧶', + Dstrok: 'Đ', + dstrok: 'đ', + dtdot: '⋱', + dtri: '▿', + dtrif: '▾', + duarr: '⇵', + duhar: '⥯', + dwangle: '⦦', + DZcy: 'Џ', + dzcy: 'џ', + dzigrarr: '⟿', + Eacute: 'É', + eacute: 'é', + easter: '⩮', + Ecaron: 'Ě', + ecaron: 'ě', + Ecirc: 'Ê', + ecirc: 'ê', + ecir: '≖', + ecolon: '≕', + Ecy: 'Э', + ecy: 'э', + eDDot: '⩷', + Edot: 'Ė', + edot: 'ė', + eDot: '≑', + ee: 'ⅇ', + efDot: '≒', + Efr: '𝔈', + efr: '𝔢', + eg: '⪚', + Egrave: 'È', + egrave: 'è', + egs: '⪖', + egsdot: '⪘', + el: '⪙', + Element: '∈', + elinters: '⏧', + ell: 'ℓ', + els: '⪕', + elsdot: '⪗', + Emacr: 'Ē', + emacr: 'ē', + empty: '∅', + emptyset: '∅', + EmptySmallSquare: '◻', + emptyv: '∅', + EmptyVerySmallSquare: '▫', + emsp13: ' ', + emsp14: ' ', + emsp: ' ', + ENG: 'Ŋ', + eng: 'ŋ', + ensp: ' ', + Eogon: 'Ę', + eogon: 'ę', + Eopf: '𝔼', + eopf: '𝕖', + epar: '⋕', + eparsl: '⧣', + eplus: '⩱', + epsi: 'ε', + Epsilon: 'Ε', + epsilon: 'ε', + epsiv: 'ϵ', + eqcirc: '≖', + eqcolon: '≕', + eqsim: '≂', + eqslantgtr: '⪖', + eqslantless: '⪕', + Equal: '⩵', + equals: '=', + EqualTilde: '≂', + equest: '≟', + Equilibrium: '⇌', + equiv: '≡', + equivDD: '⩸', + eqvparsl: '⧥', + erarr: '⥱', + erDot: '≓', + escr: 'ℯ', + Escr: 'ℰ', + esdot: '≐', + Esim: '⩳', + esim: '≂', + Eta: 'Η', + eta: 'η', + ETH: 'Ð', + eth: 'ð', + Euml: 'Ë', + euml: 'ë', + euro: '€', + excl: '!', + exist: '∃', + Exists: '∃', + expectation: 'ℰ', + exponentiale: 'ⅇ', + ExponentialE: 'ⅇ', + fallingdotseq: '≒', + Fcy: 'Ф', + fcy: 'ф', + female: '♀', + ffilig: 'ffi', + fflig: 'ff', + ffllig: 'ffl', + Ffr: '𝔉', + ffr: '𝔣', + filig: 'fi', + FilledSmallSquare: '◼', + FilledVerySmallSquare: '▪', + fjlig: 'f', + flat: '♭', + fllig: 'fl', + fltns: '▱', + fnof: 'ƒ', + Fopf: '𝔽', + fopf: '𝕗', + forall: '∀', + ForAll: '∀', + fork: '⋔', + forkv: '⫙', + Fouriertrf: 'ℱ', + fpartint: '⨍', + frac12: '½', + frac13: '⅓', + frac14: '¼', + frac15: '⅕', + frac16: '⅙', + frac18: '⅛', + frac23: '⅔', + frac25: '⅖', + frac34: '¾', + frac35: '⅗', + frac38: '⅜', + frac45: '⅘', + frac56: '⅚', + frac58: '⅝', + frac78: '⅞', + frasl: '⁄', + frown: '⌢', + fscr: '𝒻', + Fscr: 'ℱ', + gacute: 'ǵ', + Gamma: 'Γ', + gamma: 'γ', + Gammad: 'Ϝ', + gammad: 'ϝ', + gap: '⪆', + Gbreve: 'Ğ', + gbreve: 'ğ', + Gcedil: 'Ģ', + Gcirc: 'Ĝ', + gcirc: 'ĝ', + Gcy: 'Г', + gcy: 'г', + Gdot: 'Ġ', + gdot: 'ġ', + ge: '≥', + gE: '≧', + gEl: '⪌', + gel: '⋛', + geq: '≥', + geqq: '≧', + geqslant: '⩾', + gescc: '⪩', + ges: '⩾', + gesdot: '⪀', + gesdoto: '⪂', + gesdotol: '⪄', + gesl: '⋛', + gesles: '⪔', + Gfr: '𝔊', + gfr: '𝔤', + gg: '≫', + Gg: '⋙', + ggg: '⋙', + gimel: 'ℷ', + GJcy: 'Ѓ', + gjcy: 'ѓ', + gla: '⪥', + gl: '≷', + glE: '⪒', + glj: '⪤', + gnap: '⪊', + gnapprox: '⪊', + gne: '⪈', + gnE: '≩', + gneq: '⪈', + gneqq: '≩', + gnsim: '⋧', + Gopf: '𝔾', + gopf: '𝕘', + grave: '`', + GreaterEqual: '≥', + GreaterEqualLess: '⋛', + GreaterFullEqual: '≧', + GreaterGreater: '⪢', + GreaterLess: '≷', + GreaterSlantEqual: '⩾', + GreaterTilde: '≳', + Gscr: '𝒢', + gscr: 'ℊ', + gsim: '≳', + gsime: '⪎', + gsiml: '⪐', + gtcc: '⪧', + gtcir: '⩺', + gt: '>', + GT: '>', + Gt: '≫', + gtdot: '⋗', + gtlPar: '⦕', + gtquest: '⩼', + gtrapprox: '⪆', + gtrarr: '⥸', + gtrdot: '⋗', + gtreqless: '⋛', + gtreqqless: '⪌', + gtrless: '≷', + gtrsim: '≳', + gvertneqq: '≩', + gvnE: '≩', + Hacek: 'ˇ', + hairsp: ' ', + half: '½', + hamilt: 'ℋ', + HARDcy: 'Ъ', + hardcy: 'ъ', + harrcir: '⥈', + harr: '↔', + hArr: '⇔', + harrw: '↭', + Hat: '^', + hbar: 'ℏ', + Hcirc: 'Ĥ', + hcirc: 'ĥ', + hearts: '♥', + heartsuit: '♥', + hellip: '…', + hercon: '⊹', + hfr: '𝔥', + Hfr: 'ℌ', + HilbertSpace: 'ℋ', + hksearow: '⤥', + hkswarow: '⤦', + hoarr: '⇿', + homtht: '∻', + hookleftarrow: '↩', + hookrightarrow: '↪', + hopf: '𝕙', + Hopf: 'ℍ', + horbar: '―', + HorizontalLine: '─', + hscr: '𝒽', + Hscr: 'ℋ', + hslash: 'ℏ', + Hstrok: 'Ħ', + hstrok: 'ħ', + HumpDownHump: '≎', + HumpEqual: '≏', + hybull: '⁃', + hyphen: '‐', + Iacute: 'Í', + iacute: 'í', + ic: '⁣', + Icirc: 'Î', + icirc: 'î', + Icy: 'И', + icy: 'и', + Idot: 'İ', + IEcy: 'Е', + iecy: 'е', + iexcl: '¡', + iff: '⇔', + ifr: '𝔦', + Ifr: 'ℑ', + Igrave: 'Ì', + igrave: 'ì', + ii: 'ⅈ', + iiiint: '⨌', + iiint: '∭', + iinfin: '⧜', + iiota: '℩', + IJlig: 'IJ', + ijlig: 'ij', + Imacr: 'Ī', + imacr: 'ī', + image: 'ℑ', + ImaginaryI: 'ⅈ', + imagline: 'ℐ', + imagpart: 'ℑ', + imath: 'ı', + Im: 'ℑ', + imof: '⊷', + imped: 'Ƶ', + Implies: '⇒', + incare: '℅', + in: '∈', + infin: '∞', + infintie: '⧝', + inodot: 'ı', + intcal: '⊺', + int: '∫', + Int: '∬', + integers: 'ℤ', + Integral: '∫', + intercal: '⊺', + Intersection: '⋂', + intlarhk: '⨗', + intprod: '⨼', + InvisibleComma: '⁣', + InvisibleTimes: '⁢', + IOcy: 'Ё', + iocy: 'ё', + Iogon: 'Į', + iogon: 'į', + Iopf: '𝕀', + iopf: '𝕚', + Iota: 'Ι', + iota: 'ι', + iprod: '⨼', + iquest: '¿', + iscr: '𝒾', + Iscr: 'ℐ', + isin: '∈', + isindot: '⋵', + isinE: '⋹', + isins: '⋴', + isinsv: '⋳', + isinv: '∈', + it: '⁢', + Itilde: 'Ĩ', + itilde: 'ĩ', + Iukcy: 'І', + iukcy: 'і', + Iuml: 'Ï', + iuml: 'ï', + Jcirc: 'Ĵ', + jcirc: 'ĵ', + Jcy: 'Й', + jcy: 'й', + Jfr: '𝔍', + jfr: '𝔧', + jmath: 'ȷ', + Jopf: '𝕁', + jopf: '𝕛', + Jscr: '𝒥', + jscr: '𝒿', + Jsercy: 'Ј', + jsercy: 'ј', + Jukcy: 'Є', + jukcy: 'є', + Kappa: 'Κ', + kappa: 'κ', + kappav: 'ϰ', + Kcedil: 'Ķ', + kcedil: 'ķ', + Kcy: 'К', + kcy: 'к', + Kfr: '𝔎', + kfr: '𝔨', + kgreen: 'ĸ', + KHcy: 'Х', + khcy: 'х', + KJcy: 'Ќ', + kjcy: 'ќ', + Kopf: '𝕂', + kopf: '𝕜', + Kscr: '𝒦', + kscr: '𝓀', + lAarr: '⇚', + Lacute: 'Ĺ', + lacute: 'ĺ', + laemptyv: '⦴', + lagran: 'ℒ', + Lambda: 'Λ', + lambda: 'λ', + lang: '⟨', + Lang: '⟪', + langd: '⦑', + langle: '⟨', + lap: '⪅', + Laplacetrf: 'ℒ', + laquo: '«', + larrb: '⇤', + larrbfs: '⤟', + larr: '←', + Larr: '↞', + lArr: '⇐', + larrfs: '⤝', + larrhk: '↩', + larrlp: '↫', + larrpl: '⤹', + larrsim: '⥳', + larrtl: '↢', + latail: '⤙', + lAtail: '⤛', + lat: '⪫', + late: '⪭', + lates: '⪭', + lbarr: '⤌', + lBarr: '⤎', + lbbrk: '❲', + lbrace: '{', + lbrack: '[', + lbrke: '⦋', + lbrksld: '⦏', + lbrkslu: '⦍', + Lcaron: 'Ľ', + lcaron: 'ľ', + Lcedil: 'Ļ', + lcedil: 'ļ', + lceil: '⌈', + lcub: '{', + Lcy: 'Л', + lcy: 'л', + ldca: '⤶', + ldquo: '“', + ldquor: '„', + ldrdhar: '⥧', + ldrushar: '⥋', + ldsh: '↲', + le: '≤', + lE: '≦', + LeftAngleBracket: '⟨', + LeftArrowBar: '⇤', + leftarrow: '←', + LeftArrow: '←', + Leftarrow: '⇐', + LeftArrowRightArrow: '⇆', + leftarrowtail: '↢', + LeftCeiling: '⌈', + LeftDoubleBracket: '⟦', + LeftDownTeeVector: '⥡', + LeftDownVectorBar: '⥙', + LeftDownVector: '⇃', + LeftFloor: '⌊', + leftharpoondown: '↽', + leftharpoonup: '↼', + leftleftarrows: '⇇', + leftrightarrow: '↔', + LeftRightArrow: '↔', + Leftrightarrow: '⇔', + leftrightarrows: '⇆', + leftrightharpoons: '⇋', + leftrightsquigarrow: '↭', + LeftRightVector: '⥎', + LeftTeeArrow: '↤', + LeftTee: '⊣', + LeftTeeVector: '⥚', + leftthreetimes: '⋋', + LeftTriangleBar: '⧏', + LeftTriangle: '⊲', + LeftTriangleEqual: '⊴', + LeftUpDownVector: '⥑', + LeftUpTeeVector: '⥠', + LeftUpVectorBar: '⥘', + LeftUpVector: '↿', + LeftVectorBar: '⥒', + LeftVector: '↼', + lEg: '⪋', + leg: '⋚', + leq: '≤', + leqq: '≦', + leqslant: '⩽', + lescc: '⪨', + les: '⩽', + lesdot: '⩿', + lesdoto: '⪁', + lesdotor: '⪃', + lesg: '⋚', + lesges: '⪓', + lessapprox: '⪅', + lessdot: '⋖', + lesseqgtr: '⋚', + lesseqqgtr: '⪋', + LessEqualGreater: '⋚', + LessFullEqual: '≦', + LessGreater: '≶', + lessgtr: '≶', + LessLess: '⪡', + lesssim: '≲', + LessSlantEqual: '⩽', + LessTilde: '≲', + lfisht: '⥼', + lfloor: '⌊', + Lfr: '𝔏', + lfr: '𝔩', + lg: '≶', + lgE: '⪑', + lHar: '⥢', + lhard: '↽', + lharu: '↼', + lharul: '⥪', + lhblk: '▄', + LJcy: 'Љ', + ljcy: 'љ', + llarr: '⇇', + ll: '≪', + Ll: '⋘', + llcorner: '⌞', + Lleftarrow: '⇚', + llhard: '⥫', + lltri: '◺', + Lmidot: 'Ŀ', + lmidot: 'ŀ', + lmoustache: '⎰', + lmoust: '⎰', + lnap: '⪉', + lnapprox: '⪉', + lne: '⪇', + lnE: '≨', + lneq: '⪇', + lneqq: '≨', + lnsim: '⋦', + loang: '⟬', + loarr: '⇽', + lobrk: '⟦', + longleftarrow: '⟵', + LongLeftArrow: '⟵', + Longleftarrow: '⟸', + longleftrightarrow: '⟷', + LongLeftRightArrow: '⟷', + Longleftrightarrow: '⟺', + longmapsto: '⟼', + longrightarrow: '⟶', + LongRightArrow: '⟶', + Longrightarrow: '⟹', + looparrowleft: '↫', + looparrowright: '↬', + lopar: '⦅', + Lopf: '𝕃', + lopf: '𝕝', + loplus: '⨭', + lotimes: '⨴', + lowast: '∗', + lowbar: '_', + LowerLeftArrow: '↙', + LowerRightArrow: '↘', + loz: '◊', + lozenge: '◊', + lozf: '⧫', + lpar: '(', + lparlt: '⦓', + lrarr: '⇆', + lrcorner: '⌟', + lrhar: '⇋', + lrhard: '⥭', + lrm: '‎', + lrtri: '⊿', + lsaquo: '‹', + lscr: '𝓁', + Lscr: 'ℒ', + lsh: '↰', + Lsh: '↰', + lsim: '≲', + lsime: '⪍', + lsimg: '⪏', + lsqb: '[', + lsquo: '‘', + lsquor: '‚', + Lstrok: 'Ł', + lstrok: 'ł', + ltcc: '⪦', + ltcir: '⩹', + lt: '<', + LT: '<', + Lt: '≪', + ltdot: '⋖', + lthree: '⋋', + ltimes: '⋉', + ltlarr: '⥶', + ltquest: '⩻', + ltri: '◃', + ltrie: '⊴', + ltrif: '◂', + ltrPar: '⦖', + lurdshar: '⥊', + luruhar: '⥦', + lvertneqq: '≨', + lvnE: '≨', + macr: '¯', + male: '♂', + malt: '✠', + maltese: '✠', + Map: '⤅', + map: '↦', + mapsto: '↦', + mapstodown: '↧', + mapstoleft: '↤', + mapstoup: '↥', + marker: '▮', + mcomma: '⨩', + Mcy: 'М', + mcy: 'м', + mdash: '—', + mDDot: '∺', + measuredangle: '∡', + MediumSpace: ' ', + Mellintrf: 'ℳ', + Mfr: '𝔐', + mfr: '𝔪', + mho: '℧', + micro: 'µ', + midast: '*', + midcir: '⫰', + mid: '∣', + middot: '·', + minusb: '⊟', + minus: '−', + minusd: '∸', + minusdu: '⨪', + MinusPlus: '∓', + mlcp: '⫛', + mldr: '…', + mnplus: '∓', + models: '⊧', + Mopf: '𝕄', + mopf: '𝕞', + mp: '∓', + mscr: '𝓂', + Mscr: 'ℳ', + mstpos: '∾', + Mu: 'Μ', + mu: 'μ', + multimap: '⊸', + mumap: '⊸', + nabla: '∇', + Nacute: 'Ń', + nacute: 'ń', + nang: '∠', + nap: '≉', + napE: '⩰', + napid: '≋', + napos: 'ʼn', + napprox: '≉', + natural: '♮', + naturals: 'ℕ', + natur: '♮', + nbsp: ' ', + nbump: '≎', + nbumpe: '≏', + ncap: '⩃', + Ncaron: 'Ň', + ncaron: 'ň', + Ncedil: 'Ņ', + ncedil: 'ņ', + ncong: '≇', + ncongdot: '⩭', + ncup: '⩂', + Ncy: 'Н', + ncy: 'н', + ndash: '–', + nearhk: '⤤', + nearr: '↗', + neArr: '⇗', + nearrow: '↗', + ne: '≠', + nedot: '≐', + NegativeMediumSpace: '​', + NegativeThickSpace: '​', + NegativeThinSpace: '​', + NegativeVeryThinSpace: '​', + nequiv: '≢', + nesear: '⤨', + nesim: '≂', + NestedGreaterGreater: '≫', + NestedLessLess: '≪', + NewLine: '\n', + nexist: '∄', + nexists: '∄', + Nfr: '𝔑', + nfr: '𝔫', + ngE: '≧', + nge: '≱', + ngeq: '≱', + ngeqq: '≧', + ngeqslant: '⩾', + nges: '⩾', + nGg: '⋙', + ngsim: '≵', + nGt: '≫', + ngt: '≯', + ngtr: '≯', + nGtv: '≫', + nharr: '↮', + nhArr: '⇎', + nhpar: '⫲', + ni: '∋', + nis: '⋼', + nisd: '⋺', + niv: '∋', + NJcy: 'Њ', + njcy: 'њ', + nlarr: '↚', + nlArr: '⇍', + nldr: '‥', + nlE: '≦', + nle: '≰', + nleftarrow: '↚', + nLeftarrow: '⇍', + nleftrightarrow: '↮', + nLeftrightarrow: '⇎', + nleq: '≰', + nleqq: '≦', + nleqslant: '⩽', + nles: '⩽', + nless: '≮', + nLl: '⋘', + nlsim: '≴', + nLt: '≪', + nlt: '≮', + nltri: '⋪', + nltrie: '⋬', + nLtv: '≪', + nmid: '∤', + NoBreak: '⁠', + NonBreakingSpace: ' ', + nopf: '𝕟', + Nopf: 'ℕ', + Not: '⫬', + not: '¬', + NotCongruent: '≢', + NotCupCap: '≭', + NotDoubleVerticalBar: '∦', + NotElement: '∉', + NotEqual: '≠', + NotEqualTilde: '≂', + NotExists: '∄', + NotGreater: '≯', + NotGreaterEqual: '≱', + NotGreaterFullEqual: '≧', + NotGreaterGreater: '≫', + NotGreaterLess: '≹', + NotGreaterSlantEqual: '⩾', + NotGreaterTilde: '≵', + NotHumpDownHump: '≎', + NotHumpEqual: '≏', + notin: '∉', + notindot: '⋵', + notinE: '⋹', + notinva: '∉', + notinvb: '⋷', + notinvc: '⋶', + NotLeftTriangleBar: '⧏', + NotLeftTriangle: '⋪', + NotLeftTriangleEqual: '⋬', + NotLess: '≮', + NotLessEqual: '≰', + NotLessGreater: '≸', + NotLessLess: '≪', + NotLessSlantEqual: '⩽', + NotLessTilde: '≴', + NotNestedGreaterGreater: '⪢', + NotNestedLessLess: '⪡', + notni: '∌', + notniva: '∌', + notnivb: '⋾', + notnivc: '⋽', + NotPrecedes: '⊀', + NotPrecedesEqual: '⪯', + NotPrecedesSlantEqual: '⋠', + NotReverseElement: '∌', + NotRightTriangleBar: '⧐', + NotRightTriangle: '⋫', + NotRightTriangleEqual: '⋭', + NotSquareSubset: '⊏', + NotSquareSubsetEqual: '⋢', + NotSquareSuperset: '⊐', + NotSquareSupersetEqual: '⋣', + NotSubset: '⊂', + NotSubsetEqual: '⊈', + NotSucceeds: '⊁', + NotSucceedsEqual: '⪰', + NotSucceedsSlantEqual: '⋡', + NotSucceedsTilde: '≿', + NotSuperset: '⊃', + NotSupersetEqual: '⊉', + NotTilde: '≁', + NotTildeEqual: '≄', + NotTildeFullEqual: '≇', + NotTildeTilde: '≉', + NotVerticalBar: '∤', + nparallel: '∦', + npar: '∦', + nparsl: '⫽', + npart: '∂', + npolint: '⨔', + npr: '⊀', + nprcue: '⋠', + nprec: '⊀', + npreceq: '⪯', + npre: '⪯', + nrarrc: '⤳', + nrarr: '↛', + nrArr: '⇏', + nrarrw: '↝', + nrightarrow: '↛', + nRightarrow: '⇏', + nrtri: '⋫', + nrtrie: '⋭', + nsc: '⊁', + nsccue: '⋡', + nsce: '⪰', + Nscr: '𝒩', + nscr: '𝓃', + nshortmid: '∤', + nshortparallel: '∦', + nsim: '≁', + nsime: '≄', + nsimeq: '≄', + nsmid: '∤', + nspar: '∦', + nsqsube: '⋢', + nsqsupe: '⋣', + nsub: '⊄', + nsubE: '⫅', + nsube: '⊈', + nsubset: '⊂', + nsubseteq: '⊈', + nsubseteqq: '⫅', + nsucc: '⊁', + nsucceq: '⪰', + nsup: '⊅', + nsupE: '⫆', + nsupe: '⊉', + nsupset: '⊃', + nsupseteq: '⊉', + nsupseteqq: '⫆', + ntgl: '≹', + Ntilde: 'Ñ', + ntilde: 'ñ', + ntlg: '≸', + ntriangleleft: '⋪', + ntrianglelefteq: '⋬', + ntriangleright: '⋫', + ntrianglerighteq: '⋭', + Nu: 'Ν', + nu: 'ν', + num: '#', + numero: '№', + numsp: ' ', + nvap: '≍', + nvdash: '⊬', + nvDash: '⊭', + nVdash: '⊮', + nVDash: '⊯', + nvge: '≥', + nvgt: '>', + nvHarr: '⤄', + nvinfin: '⧞', + nvlArr: '⤂', + nvle: '≤', + nvlt: '>', + nvltrie: '⊴', + nvrArr: '⤃', + nvrtrie: '⊵', + nvsim: '∼', + nwarhk: '⤣', + nwarr: '↖', + nwArr: '⇖', + nwarrow: '↖', + nwnear: '⤧', + Oacute: 'Ó', + oacute: 'ó', + oast: '⊛', + Ocirc: 'Ô', + ocirc: 'ô', + ocir: '⊚', + Ocy: 'О', + ocy: 'о', + odash: '⊝', + Odblac: 'Ő', + odblac: 'ő', + odiv: '⨸', + odot: '⊙', + odsold: '⦼', + OElig: 'Œ', + oelig: 'œ', + ofcir: '⦿', + Ofr: '𝔒', + ofr: '𝔬', + ogon: '˛', + Ograve: 'Ò', + ograve: 'ò', + ogt: '⧁', + ohbar: '⦵', + ohm: 'Ω', + oint: '∮', + olarr: '↺', + olcir: '⦾', + olcross: '⦻', + oline: '‾', + olt: '⧀', + Omacr: 'Ō', + omacr: 'ō', + Omega: 'Ω', + omega: 'ω', + Omicron: 'Ο', + omicron: 'ο', + omid: '⦶', + ominus: '⊖', + Oopf: '𝕆', + oopf: '𝕠', + opar: '⦷', + OpenCurlyDoubleQuote: '“', + OpenCurlyQuote: '‘', + operp: '⦹', + oplus: '⊕', + orarr: '↻', + Or: '⩔', + or: '∨', + ord: '⩝', + order: 'ℴ', + orderof: 'ℴ', + ordf: 'ª', + ordm: 'º', + origof: '⊶', + oror: '⩖', + orslope: '⩗', + orv: '⩛', + oS: 'Ⓢ', + Oscr: '𝒪', + oscr: 'ℴ', + Oslash: 'Ø', + oslash: 'ø', + osol: '⊘', + Otilde: 'Õ', + otilde: 'õ', + otimesas: '⨶', + Otimes: '⨷', + otimes: '⊗', + Ouml: 'Ö', + ouml: 'ö', + ovbar: '⌽', + OverBar: '‾', + OverBrace: '⏞', + OverBracket: '⎴', + OverParenthesis: '⏜', + para: '¶', + parallel: '∥', + par: '∥', + parsim: '⫳', + parsl: '⫽', + part: '∂', + PartialD: '∂', + Pcy: 'П', + pcy: 'п', + percnt: '%', + period: '.', + permil: '‰', + perp: '⊥', + pertenk: '‱', + Pfr: '𝔓', + pfr: '𝔭', + Phi: 'Φ', + phi: 'φ', + phiv: 'ϕ', + phmmat: 'ℳ', + phone: '☎', + Pi: 'Π', + pi: 'π', + pitchfork: '⋔', + piv: 'ϖ', + planck: 'ℏ', + planckh: 'ℎ', + plankv: 'ℏ', + plusacir: '⨣', + plusb: '⊞', + pluscir: '⨢', + plus: '+', + plusdo: '∔', + plusdu: '⨥', + pluse: '⩲', + PlusMinus: '±', + plusmn: '±', + plussim: '⨦', + plustwo: '⨧', + pm: '±', + Poincareplane: 'ℌ', + pointint: '⨕', + popf: '𝕡', + Popf: 'ℙ', + pound: '£', + prap: '⪷', + Pr: '⪻', + pr: '≺', + prcue: '≼', + precapprox: '⪷', + prec: '≺', + preccurlyeq: '≼', + Precedes: '≺', + PrecedesEqual: '⪯', + PrecedesSlantEqual: '≼', + PrecedesTilde: '≾', + preceq: '⪯', + precnapprox: '⪹', + precneqq: '⪵', + precnsim: '⋨', + pre: '⪯', + prE: '⪳', + precsim: '≾', + prime: '′', + Prime: '″', + primes: 'ℙ', + prnap: '⪹', + prnE: '⪵', + prnsim: '⋨', + prod: '∏', + Product: '∏', + profalar: '⌮', + profline: '⌒', + profsurf: '⌓', + prop: '∝', + Proportional: '∝', + Proportion: '∷', + propto: '∝', + prsim: '≾', + prurel: '⊰', + Pscr: '𝒫', + pscr: '𝓅', + Psi: 'Ψ', + psi: 'ψ', + puncsp: ' ', + Qfr: '𝔔', + qfr: '𝔮', + qint: '⨌', + qopf: '𝕢', + Qopf: 'ℚ', + qprime: '⁗', + Qscr: '𝒬', + qscr: '𝓆', + quaternions: 'ℍ', + quatint: '⨖', + quest: '?', + questeq: '≟', + quot: '"', + QUOT: '"', + rAarr: '⇛', + race: '∽', + Racute: 'Ŕ', + racute: 'ŕ', + radic: '√', + raemptyv: '⦳', + rang: '⟩', + Rang: '⟫', + rangd: '⦒', + range: '⦥', + rangle: '⟩', + raquo: '»', + rarrap: '⥵', + rarrb: '⇥', + rarrbfs: '⤠', + rarrc: '⤳', + rarr: '→', + Rarr: '↠', + rArr: '⇒', + rarrfs: '⤞', + rarrhk: '↪', + rarrlp: '↬', + rarrpl: '⥅', + rarrsim: '⥴', + Rarrtl: '⤖', + rarrtl: '↣', + rarrw: '↝', + ratail: '⤚', + rAtail: '⤜', + ratio: '∶', + rationals: 'ℚ', + rbarr: '⤍', + rBarr: '⤏', + RBarr: '⤐', + rbbrk: '❳', + rbrace: '}', + rbrack: ']', + rbrke: '⦌', + rbrksld: '⦎', + rbrkslu: '⦐', + Rcaron: 'Ř', + rcaron: 'ř', + Rcedil: 'Ŗ', + rcedil: 'ŗ', + rceil: '⌉', + rcub: '}', + Rcy: 'Р', + rcy: 'р', + rdca: '⤷', + rdldhar: '⥩', + rdquo: '”', + rdquor: '”', + rdsh: '↳', + real: 'ℜ', + realine: 'ℛ', + realpart: 'ℜ', + reals: 'ℝ', + Re: 'ℜ', + rect: '▭', + reg: '®', + REG: '®', + ReverseElement: '∋', + ReverseEquilibrium: '⇋', + ReverseUpEquilibrium: '⥯', + rfisht: '⥽', + rfloor: '⌋', + rfr: '𝔯', + Rfr: 'ℜ', + rHar: '⥤', + rhard: '⇁', + rharu: '⇀', + rharul: '⥬', + Rho: 'Ρ', + rho: 'ρ', + rhov: 'ϱ', + RightAngleBracket: '⟩', + RightArrowBar: '⇥', + rightarrow: '→', + RightArrow: '→', + Rightarrow: '⇒', + RightArrowLeftArrow: '⇄', + rightarrowtail: '↣', + RightCeiling: '⌉', + RightDoubleBracket: '⟧', + RightDownTeeVector: '⥝', + RightDownVectorBar: '⥕', + RightDownVector: '⇂', + RightFloor: '⌋', + rightharpoondown: '⇁', + rightharpoonup: '⇀', + rightleftarrows: '⇄', + rightleftharpoons: '⇌', + rightrightarrows: '⇉', + rightsquigarrow: '↝', + RightTeeArrow: '↦', + RightTee: '⊢', + RightTeeVector: '⥛', + rightthreetimes: '⋌', + RightTriangleBar: '⧐', + RightTriangle: '⊳', + RightTriangleEqual: '⊵', + RightUpDownVector: '⥏', + RightUpTeeVector: '⥜', + RightUpVectorBar: '⥔', + RightUpVector: '↾', + RightVectorBar: '⥓', + RightVector: '⇀', + ring: '˚', + risingdotseq: '≓', + rlarr: '⇄', + rlhar: '⇌', + rlm: '‏', + rmoustache: '⎱', + rmoust: '⎱', + rnmid: '⫮', + roang: '⟭', + roarr: '⇾', + robrk: '⟧', + ropar: '⦆', + ropf: '𝕣', + Ropf: 'ℝ', + roplus: '⨮', + rotimes: '⨵', + RoundImplies: '⥰', + rpar: ')', + rpargt: '⦔', + rppolint: '⨒', + rrarr: '⇉', + Rrightarrow: '⇛', + rsaquo: '›', + rscr: '𝓇', + Rscr: 'ℛ', + rsh: '↱', + Rsh: '↱', + rsqb: ']', + rsquo: '’', + rsquor: '’', + rthree: '⋌', + rtimes: '⋊', + rtri: '▹', + rtrie: '⊵', + rtrif: '▸', + rtriltri: '⧎', + RuleDelayed: '⧴', + ruluhar: '⥨', + rx: '℞', + Sacute: 'Ś', + sacute: 'ś', + sbquo: '‚', + scap: '⪸', + Scaron: 'Š', + scaron: 'š', + Sc: '⪼', + sc: '≻', + sccue: '≽', + sce: '⪰', + scE: '⪴', + Scedil: 'Ş', + scedil: 'ş', + Scirc: 'Ŝ', + scirc: 'ŝ', + scnap: '⪺', + scnE: '⪶', + scnsim: '⋩', + scpolint: '⨓', + scsim: '≿', + Scy: 'С', + scy: 'с', + sdotb: '⊡', + sdot: '⋅', + sdote: '⩦', + searhk: '⤥', + searr: '↘', + seArr: '⇘', + searrow: '↘', + sect: '§', + semi: ';', + seswar: '⤩', + setminus: '∖', + setmn: '∖', + sext: '✶', + Sfr: '𝔖', + sfr: '𝔰', + sfrown: '⌢', + sharp: '♯', + SHCHcy: 'Щ', + shchcy: 'щ', + SHcy: 'Ш', + shcy: 'ш', + ShortDownArrow: '↓', + ShortLeftArrow: '←', + shortmid: '∣', + shortparallel: '∥', + ShortRightArrow: '→', + ShortUpArrow: '↑', + shy: '­', + Sigma: 'Σ', + sigma: 'σ', + sigmaf: 'ς', + sigmav: 'ς', + sim: '∼', + simdot: '⩪', + sime: '≃', + simeq: '≃', + simg: '⪞', + simgE: '⪠', + siml: '⪝', + simlE: '⪟', + simne: '≆', + simplus: '⨤', + simrarr: '⥲', + slarr: '←', + SmallCircle: '∘', + smallsetminus: '∖', + smashp: '⨳', + smeparsl: '⧤', + smid: '∣', + smile: '⌣', + smt: '⪪', + smte: '⪬', + smtes: '⪬', + SOFTcy: 'Ь', + softcy: 'ь', + solbar: '⌿', + solb: '⧄', + sol: '/', + Sopf: '𝕊', + sopf: '𝕤', + spades: '♠', + spadesuit: '♠', + spar: '∥', + sqcap: '⊓', + sqcaps: '⊓', + sqcup: '⊔', + sqcups: '⊔', + Sqrt: '√', + sqsub: '⊏', + sqsube: '⊑', + sqsubset: '⊏', + sqsubseteq: '⊑', + sqsup: '⊐', + sqsupe: '⊒', + sqsupset: '⊐', + sqsupseteq: '⊒', + square: '□', + Square: '□', + SquareIntersection: '⊓', + SquareSubset: '⊏', + SquareSubsetEqual: '⊑', + SquareSuperset: '⊐', + SquareSupersetEqual: '⊒', + SquareUnion: '⊔', + squarf: '▪', + squ: '□', + squf: '▪', + srarr: '→', + Sscr: '𝒮', + sscr: '𝓈', + ssetmn: '∖', + ssmile: '⌣', + sstarf: '⋆', + Star: '⋆', + star: '☆', + starf: '★', + straightepsilon: 'ϵ', + straightphi: 'ϕ', + strns: '¯', + sub: '⊂', + Sub: '⋐', + subdot: '⪽', + subE: '⫅', + sube: '⊆', + subedot: '⫃', + submult: '⫁', + subnE: '⫋', + subne: '⊊', + subplus: '⪿', + subrarr: '⥹', + subset: '⊂', + Subset: '⋐', + subseteq: '⊆', + subseteqq: '⫅', + SubsetEqual: '⊆', + subsetneq: '⊊', + subsetneqq: '⫋', + subsim: '⫇', + subsub: '⫕', + subsup: '⫓', + succapprox: '⪸', + succ: '≻', + succcurlyeq: '≽', + Succeeds: '≻', + SucceedsEqual: '⪰', + SucceedsSlantEqual: '≽', + SucceedsTilde: '≿', + succeq: '⪰', + succnapprox: '⪺', + succneqq: '⪶', + succnsim: '⋩', + succsim: '≿', + SuchThat: '∋', + sum: '∑', + Sum: '∑', + sung: '♪', + sup1: '¹', + sup2: '²', + sup3: '³', + sup: '⊃', + Sup: '⋑', + supdot: '⪾', + supdsub: '⫘', + supE: '⫆', + supe: '⊇', + supedot: '⫄', + Superset: '⊃', + SupersetEqual: '⊇', + suphsol: '⟉', + suphsub: '⫗', + suplarr: '⥻', + supmult: '⫂', + supnE: '⫌', + supne: '⊋', + supplus: '⫀', + supset: '⊃', + Supset: '⋑', + supseteq: '⊇', + supseteqq: '⫆', + supsetneq: '⊋', + supsetneqq: '⫌', + supsim: '⫈', + supsub: '⫔', + supsup: '⫖', + swarhk: '⤦', + swarr: '↙', + swArr: '⇙', + swarrow: '↙', + swnwar: '⤪', + szlig: 'ß', + Tab: ' ', + target: '⌖', + Tau: 'Τ', + tau: 'τ', + tbrk: '⎴', + Tcaron: 'Ť', + tcaron: 'ť', + Tcedil: 'Ţ', + tcedil: 'ţ', + Tcy: 'Т', + tcy: 'т', + tdot: '⃛', + telrec: '⌕', + Tfr: '𝔗', + tfr: '𝔱', + there4: '∴', + therefore: '∴', + Therefore: '∴', + Theta: 'Θ', + theta: 'θ', + thetasym: 'ϑ', + thetav: 'ϑ', + thickapprox: '≈', + thicksim: '∼', + ThickSpace: ' ', + ThinSpace: ' ', + thinsp: ' ', + thkap: '≈', + thksim: '∼', + THORN: 'Þ', + thorn: 'þ', + tilde: '˜', + Tilde: '∼', + TildeEqual: '≃', + TildeFullEqual: '≅', + TildeTilde: '≈', + timesbar: '⨱', + timesb: '⊠', + times: '×', + timesd: '⨰', + tint: '∭', + toea: '⤨', + topbot: '⌶', + topcir: '⫱', + top: '⊤', + Topf: '𝕋', + topf: '𝕥', + topfork: '⫚', + tosa: '⤩', + tprime: '‴', + trade: '™', + TRADE: '™', + triangle: '▵', + triangledown: '▿', + triangleleft: '◃', + trianglelefteq: '⊴', + triangleq: '≜', + triangleright: '▹', + trianglerighteq: '⊵', + tridot: '◬', + trie: '≜', + triminus: '⨺', + TripleDot: '⃛', + triplus: '⨹', + trisb: '⧍', + tritime: '⨻', + trpezium: '⏢', + Tscr: '𝒯', + tscr: '𝓉', + TScy: 'Ц', + tscy: 'ц', + TSHcy: 'Ћ', + tshcy: 'ћ', + Tstrok: 'Ŧ', + tstrok: 'ŧ', + twixt: '≬', + twoheadleftarrow: '↞', + twoheadrightarrow: '↠', + Uacute: 'Ú', + uacute: 'ú', + uarr: '↑', + Uarr: '↟', + uArr: '⇑', + Uarrocir: '⥉', + Ubrcy: 'Ў', + ubrcy: 'ў', + Ubreve: 'Ŭ', + ubreve: 'ŭ', + Ucirc: 'Û', + ucirc: 'û', + Ucy: 'У', + ucy: 'у', + udarr: '⇅', + Udblac: 'Ű', + udblac: 'ű', + udhar: '⥮', + ufisht: '⥾', + Ufr: '𝔘', + ufr: '𝔲', + Ugrave: 'Ù', + ugrave: 'ù', + uHar: '⥣', + uharl: '↿', + uharr: '↾', + uhblk: '▀', + ulcorn: '⌜', + ulcorner: '⌜', + ulcrop: '⌏', + ultri: '◸', + Umacr: 'Ū', + umacr: 'ū', + uml: '¨', + UnderBar: '_', + UnderBrace: '⏟', + UnderBracket: '⎵', + UnderParenthesis: '⏝', + Union: '⋃', + UnionPlus: '⊎', + Uogon: 'Ų', + uogon: 'ų', + Uopf: '𝕌', + uopf: '𝕦', + UpArrowBar: '⤒', + uparrow: '↑', + UpArrow: '↑', + Uparrow: '⇑', + UpArrowDownArrow: '⇅', + updownarrow: '↕', + UpDownArrow: '↕', + Updownarrow: '⇕', + UpEquilibrium: '⥮', + upharpoonleft: '↿', + upharpoonright: '↾', + uplus: '⊎', + UpperLeftArrow: '↖', + UpperRightArrow: '↗', + upsi: 'υ', + Upsi: 'ϒ', + upsih: 'ϒ', + Upsilon: 'Υ', + upsilon: 'υ', + UpTeeArrow: '↥', + UpTee: '⊥', + upuparrows: '⇈', + urcorn: '⌝', + urcorner: '⌝', + urcrop: '⌎', + Uring: 'Ů', + uring: 'ů', + urtri: '◹', + Uscr: '𝒰', + uscr: '𝓊', + utdot: '⋰', + Utilde: 'Ũ', + utilde: 'ũ', + utri: '▵', + utrif: '▴', + uuarr: '⇈', + Uuml: 'Ü', + uuml: 'ü', + uwangle: '⦧', + vangrt: '⦜', + varepsilon: 'ϵ', + varkappa: 'ϰ', + varnothing: '∅', + varphi: 'ϕ', + varpi: 'ϖ', + varpropto: '∝', + varr: '↕', + vArr: '⇕', + varrho: 'ϱ', + varsigma: 'ς', + varsubsetneq: '⊊', + varsubsetneqq: '⫋', + varsupsetneq: '⊋', + varsupsetneqq: '⫌', + vartheta: 'ϑ', + vartriangleleft: '⊲', + vartriangleright: '⊳', + vBar: '⫨', + Vbar: '⫫', + vBarv: '⫩', + Vcy: 'В', + vcy: 'в', + vdash: '⊢', + vDash: '⊨', + Vdash: '⊩', + VDash: '⊫', + Vdashl: '⫦', + veebar: '⊻', + vee: '∨', + Vee: '⋁', + veeeq: '≚', + vellip: '⋮', + verbar: '|', + Verbar: '‖', + vert: '|', + Vert: '‖', + VerticalBar: '∣', + VerticalLine: '|', + VerticalSeparator: '❘', + VerticalTilde: '≀', + VeryThinSpace: ' ', + Vfr: '𝔙', + vfr: '𝔳', + vltri: '⊲', + vnsub: '⊂', + vnsup: '⊃', + Vopf: '𝕍', + vopf: '𝕧', + vprop: '∝', + vrtri: '⊳', + Vscr: '𝒱', + vscr: '𝓋', + vsubnE: '⫋', + vsubne: '⊊', + vsupnE: '⫌', + vsupne: '⊋', + Vvdash: '⊪', + vzigzag: '⦚', + Wcirc: 'Ŵ', + wcirc: 'ŵ', + wedbar: '⩟', + wedge: '∧', + Wedge: '⋀', + wedgeq: '≙', + weierp: '℘', + Wfr: '𝔚', + wfr: '𝔴', + Wopf: '𝕎', + wopf: '𝕨', + wp: '℘', + wr: '≀', + wreath: '≀', + Wscr: '𝒲', + wscr: '𝓌', + xcap: '⋂', + xcirc: '◯', + xcup: '⋃', + xdtri: '▽', + Xfr: '𝔛', + xfr: '𝔵', + xharr: '⟷', + xhArr: '⟺', + Xi: 'Ξ', + xi: 'ξ', + xlarr: '⟵', + xlArr: '⟸', + xmap: '⟼', + xnis: '⋻', + xodot: '⨀', + Xopf: '𝕏', + xopf: '𝕩', + xoplus: '⨁', + xotime: '⨂', + xrarr: '⟶', + xrArr: '⟹', + Xscr: '𝒳', + xscr: '𝓍', + xsqcup: '⨆', + xuplus: '⨄', + xutri: '△', + xvee: '⋁', + xwedge: '⋀', + Yacute: 'Ý', + yacute: 'ý', + YAcy: 'Я', + yacy: 'я', + Ycirc: 'Ŷ', + ycirc: 'ŷ', + Ycy: 'Ы', + ycy: 'ы', + yen: '¥', + Yfr: '𝔜', + yfr: '𝔶', + YIcy: 'Ї', + yicy: 'ї', + Yopf: '𝕐', + yopf: '𝕪', + Yscr: '𝒴', + yscr: '𝓎', + YUcy: 'Ю', + yucy: 'ю', + yuml: 'ÿ', + Yuml: 'Ÿ', + Zacute: 'Ź', + zacute: 'ź', + Zcaron: 'Ž', + zcaron: 'ž', + Zcy: 'З', + zcy: 'з', + Zdot: 'Ż', + zdot: 'ż', + zeetrf: 'ℨ', + ZeroWidthSpace: '​', + Zeta: 'Ζ', + zeta: 'ζ', + zfr: '𝔷', + Zfr: 'ℨ', + ZHcy: 'Ж', + zhcy: 'ж', + zigrarr: '⇝', + zopf: '𝕫', + Zopf: 'ℤ', + Zscr: '𝒵', + zscr: '𝓏', + zwj: '‍', + zwnj: '‌' }; + // Some regexps used in inline parser: var ESCAPABLE = '[!"#$%&\'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]'; @@ -635,7 +2761,21 @@ var parseEntity = function() { var m; if ((m = this.match(/^&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});/i))) { - return [{ t: 'Entity', c: m }]; + var isNumeric = /^&#/.test(m); + var isHex = /^&#[Xx]/.test(m); + var uchar; + if (isNumeric) { + var num; + if (isHex) { + num = parseInt(m.slice(3,-1), 16); + } else { + num = parseInt(m.slice(2,-1), 10); + } + uchar = String.fromCharCode(num); + } else { + uchar = entities[m.slice(1,-1)]; + } + return [{ t: 'Str', c: uchar || m }]; } else { return null; } @@ -1515,8 +3655,6 @@ return inTags('strong', [], this.renderInlines(inline.c)); case 'Html': return inline.c; - case 'Entity': - return inline.c; case 'Link': attrs = [['href', this.escape(inline.destination, true)]]; if (inline.title) { -- cgit v1.2.3 From 40f5a3d6f904b6b9558d51b0133f6a406eafc21a Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 30 Sep 2014 21:39:57 -0700 Subject: unescape URI before escaping. If we already have %-encoded characters in the URI, we want to preserve them. --- js/stmd.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 2a63d23..e113794 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -2333,12 +2333,12 @@ dest = m.slice(1,-1); return [{t: 'Link', label: [{ t: 'Str', c: dest }], - destination: 'mailto:' + encodeURI(dest) }]; + destination: 'mailto:' + encodeURI(unescape(dest)) }]; } else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) { dest = m.slice(1,-1); return [{ t: 'Link', label: [{ t: 'Str', c: dest }], - destination: encodeURI(dest) }]; + destination: encodeURI(unescape(dest)) }]; } else { return null; } @@ -2615,11 +2615,11 @@ var parseLinkDestination = function() { var res = this.match(reLinkDestinationBraces); if (res) { // chop off surrounding <..>: - return encodeURI(unescapeBS(res.substr(1, res.length - 2))); + return encodeURI(unescape(unescapeBS(res.substr(1, res.length - 2)))); } else { res = this.match(reLinkDestination); if (res !== null) { - return encodeURI(unescapeBS(res)); + return encodeURI(unescape(unescapeBS(res))); } else { return null; } -- cgit v1.2.3 From 669ea14fdbf12c25693706502f8dae6b1cf4e033 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 30 Sep 2014 21:51:31 -0700 Subject: Unescape entities as well as backslashes in titles, URLs. This way URLs with entities will be properly percent encoded as in the C implementation. --- js/stmd.js | 55 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index e113794..04d7360 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -2167,6 +2167,7 @@ PROCESSINGINSTRUCTION + "|" + DECLARATION + "|" + CDATA + ")"; var HTMLBLOCKOPEN = "<(?:" + BLOCKTAGNAME + "[\\s/>]" + "|" + "/" + BLOCKTAGNAME + "[\\s>]" + "|" + "[?!])"; + var ENTITY = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});" var reHtmlTag = new RegExp('^' + HTMLTAG, 'i'); @@ -2195,16 +2196,38 @@ var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/; + var reEntityHere = new RegExp('^' + ENTITY, 'i'); + + var reEntity = new RegExp(ENTITY, 'gi'); + // Matches a character with a special meaning in markdown, // or a string of non-special characters. Note: we match // clumps of _ or * or `, because they need to be handled in groups. var reMain = /^(?:[_*`\n]+|[\[\]\\!<&*_]|(?: *[^\n `\[\]\\!<&*_]+)+|[ \n]+)/m; // UTILITY FUNCTIONS + var entityToChar = function(m) { + var isNumeric = /^&#/.test(m); + var isHex = /^&#[Xx]/.test(m); + var uchar; + if (isNumeric) { + var num; + if (isHex) { + num = parseInt(m.slice(3,-1), 16); + } else { + num = parseInt(m.slice(2,-1), 10); + } + uchar = String.fromCharCode(num); + } else { + uchar = entities[m.slice(1,-1)]; + } + return (uchar || m); + } - // Replace backslash escapes with literal characters. - var unescapeBS = function(s) { - return s.replace(reAllEscapedChar, '$1'); + // Replace entities and backslash escapes with literal characters. + var unescapeEntBS = function(s) { + return s.replace(reAllEscapedChar, '$1') + .replace(reEntity, entityToChar);; }; // Returns true if string contains only space characters. @@ -2604,7 +2627,7 @@ var title = this.match(reLinkTitle); if (title) { // chop off quotes from title and unescape: - return unescapeBS(title.substr(1, title.length - 2)); + return unescapeEntBS(title.substr(1, title.length - 2)); } else { return null; } @@ -2615,11 +2638,11 @@ var parseLinkDestination = function() { var res = this.match(reLinkDestinationBraces); if (res) { // chop off surrounding <..>: - return encodeURI(unescape(unescapeBS(res.substr(1, res.length - 2)))); + return encodeURI(unescape(unescapeEntBS(res.substr(1, res.length - 2)))); } else { res = this.match(reLinkDestination); if (res !== null) { - return encodeURI(unescape(unescapeBS(res))); + return encodeURI(unescape(unescapeEntBS(res))); } else { return null; } @@ -2760,22 +2783,8 @@ // Attempt to parse an entity, return Entity object if successful. var parseEntity = function() { var m; - if ((m = this.match(/^&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});/i))) { - var isNumeric = /^&#/.test(m); - var isHex = /^&#[Xx]/.test(m); - var uchar; - if (isNumeric) { - var num; - if (isHex) { - num = parseInt(m.slice(3,-1), 16); - } else { - num = parseInt(m.slice(2,-1), 10); - } - uchar = String.fromCharCode(num); - } else { - uchar = entities[m.slice(1,-1)]; - } - return [{ t: 'Str', c: uchar || m }]; + if ((m = this.match(reEntityHere))) { + return [{ t: 'Str', c: entityToChar(m) }]; } else { return null; } @@ -3513,7 +3522,7 @@ case 'FencedCode': // first line becomes info string - block.info = unescapeBS(block.strings[0].trim()); + block.info = unescapeEntBS(block.strings[0].trim()); if (block.strings.length == 1) { block.string_content = ''; } else { -- cgit v1.2.3 From fb0c0cc2741120e3706c7698b15a510c40fc71c0 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 2 Oct 2014 10:33:49 -0700 Subject: Changed peek() to return char code. Test char codes instead of strings. Small optimization (about 1% speed boost). --- js/stmd.js | 106 +++++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 64 insertions(+), 42 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 04d7360..788809b 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -2137,6 +2137,22 @@ zwj: '‍', zwnj: '‌' }; + // Constants for character codes: + + var C_NEWLINE = 10; + var C_SPACE = 32; + var C_ASTERISK = 42; + var C_UNDERSCORE = 95; + var C_BACKTICK = 96; + var C_OPEN_BRACKET = 91; + var C_CLOSE_BRACKET = 93; + var C_LESSTHAN = 60; + var C_BANG = 33; + var C_BACKSLASH = 92; + var C_AMPERSAND = 38; + var C_OPEN_PAREN = 40; + var C_COLON = 58; + // Some regexps used in inline parser: var ESCAPABLE = '[!"#$%&\'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]'; @@ -2286,10 +2302,14 @@ } }; - // Returns the character at the current subject position, or null if + // Returns the code for the character at the current subject position, or -1 // there are no more characters. var peek = function() { - return this.subject.charAt(this.pos) || null; + if (this.pos < this.subject.length) { + return this.subject.charCodeAt(this.pos); + } else { + return -1; + } }; // Parse zero or more space characters, including at most one newline @@ -2377,29 +2397,34 @@ } }; - // Scan a sequence of characters == c, and return information about + // Scan a sequence of characters with code cc, and return information about // the number of delimiters and whether they are positioned such that // they can open and/or close emphasis or strong emphasis. A utility // function for strong/emph parsing. - var scanDelims = function(c) { + var scanDelims = function(cc) { var numdelims = 0; var first_close_delims = 0; - var char_before, char_after; + var char_before, char_after, cc_after; var startpos = this.pos; char_before = this.pos === 0 ? '\n' : this.subject.charAt(this.pos - 1); - while (this.peek() === c) { + while (this.peek() === cc) { numdelims++; this.pos++; } - char_after = this.peek() || '\n'; + cc_after = this.peek(); + if (cc_after === -1) { + char_after = '\n'; + } else { + char_after = String.fromCharCode(cc_after); + } var can_open = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_after)); var can_close = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_before)); - if (c === '_') { + if (cc === C_UNDERSCORE) { can_open = can_open && !((/[a-z0-9]/i).test(char_before)); can_close = can_close && !((/[a-z0-9]/i).test(char_after)); } @@ -2422,21 +2447,18 @@ } // Attempt to parse emphasis or strong emphasis. - var parseEmphasis = function() { + var parseEmphasis = function(cc) { var startpos = this.pos; var c ; var first_close = 0; - c = this.peek(); - if (!(c === '*' || c === '_')) { - return null; - } + c = String.fromCharCode(cc); var numdelims; var delimpos; var inlines = []; // Get opening delimiters. - res = this.scanDelims(c); + res = this.scanDelims(cc); numdelims = res.numdelims; if (numdelims === 0) { @@ -2472,10 +2494,10 @@ } while (true) { - if (this.last_emphasis_closer[c] < this.pos) { + if (this.last_emphasis_closer[cc] < this.pos) { break; } - res = this.scanDelims(c); + res = this.scanDelims(cc); if (res) { numdelims = res.numdelims; @@ -2615,7 +2637,7 @@ // we didn't match emphasis: fallback this.pos = fallbackpos; if (last_emphasis_closer) { - this.last_emphasis_closer[c] = last_emphasis_closer; + this.last_emphasis_closer[cc] = last_emphasis_closer; } return [fallback]; @@ -2651,7 +2673,7 @@ // Attempt to parse a link label, returning number of characters parsed. var parseLinkLabel = function() { - if (this.peek() != '[') { + if (this.peek() != C_OPEN_BRACKET) { return 0; } var startpos = this.pos; @@ -2668,36 +2690,36 @@ } this.pos++; // advance past [ var c; - while ((c = this.peek()) && (c != ']' || nest_level > 0)) { + while ((c = this.peek()) && c != -1 && (c != C_CLOSE_BRACKET || nest_level > 0)) { switch (c) { - case '`': + case C_BACKTICK: this.parseBackticks(); break; - case '<': + case C_LESSTHAN: this.parseAutolink() || this.parseHtmlTag() || this.pos++; break; - case '[': // nested [] + case C_OPEN_BRACKET: // nested [] nest_level++; this.pos++; break; - case ']': // nested [] + case C_CLOSE_BRACKET: // nested [] nest_level--; this.pos++; break; - case '\\': + case C_BACKSLASH: this.parseBackslash(); break; default: this.parseString(); } } - if (c === ']') { + if (c === C_CLOSE_BRACKET) { this.label_nest_level = 0; this.pos++; // advance past ] return this.pos - startpos; } else { - if (!c) { + if (c === -1) { this.label_nest_level = nest_level; } this.pos = startpos; @@ -2730,7 +2752,7 @@ // if we got this far, we've parsed a label. // Try to parse an explicit link: [label](url "title") - if (this.peek() == '(') { + if (this.peek() == C_OPEN_PAREN) { this.pos++; if (this.spnl() && ((dest = this.parseLinkDestination()) !== null) && @@ -2851,7 +2873,7 @@ } // colon: - if (this.peek() === ':') { + if (this.peek() === C_COLON) { this.pos++; } else { this.pos = startpos; @@ -2902,35 +2924,35 @@ } var c = this.peek(); - if (!c) { + if (c === -1) { return null; } var res; switch(c) { - case '\n': - case ' ': + case C_NEWLINE: + case C_SPACE: res = this.parseNewline(); break; - case '\\': + case C_BACKSLASH: res = this.parseBackslash(); break; - case '`': + case C_BACKTICK: res = this.parseBackticks(); break; - case '*': - case '_': - res = this.parseEmphasis(); + case C_ASTERISK: + case C_UNDERSCORE: + res = this.parseEmphasis(c); break; - case '[': + case C_OPEN_BRACKET: res = this.parseLink(); break; - case '!': + case C_BANG: res = this.parseImage(); break; - case '<': + case C_LESSTHAN: res = this.parseAutolink() || this.parseHtmlTag(); break; - case '&': + case C_AMPERSAND: res = this.parseEntity(); break; default: @@ -2939,7 +2961,7 @@ } if (res === null) { this.pos += 1; - res = [{t: 'Str', c: c}]; + res = [{t: 'Str', c: String.fromCharCode(c)}]; } if (res && memoize) { @@ -2956,7 +2978,7 @@ this.pos = 0; this.refmap = refmap || {}; this.memo = {}; - this.last_emphasis_closer = { '*': s.length, '_': s.length }; + this.last_emphasis_closer = { C_ASTERISK: s.length, C_UNDERSCORE: s.length }; var inlines = []; var next_inline; while ((next_inline = this.parseInline())) { -- cgit v1.2.3 From 189685f5a0527e90f4ff31623d219415e2735fac Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 2 Oct 2014 11:23:51 -0700 Subject: Eliminated unnecessary variable. --- js/stmd.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 788809b..f4ccdf4 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -3205,8 +3205,7 @@ switch (container.t) { case 'BlockQuote': - var matched = indent <= 3 && ln.charAt(first_nonspace) === '>'; - if (matched) { + if (indent <= 3 && ln.charAt(first_nonspace) === '>') { offset = first_nonspace + 1; if (ln.charAt(offset) === ' ') { offset++; -- cgit v1.2.3 From 67e76295cbc15e258c6ac579b082e410b4aaca6a Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 2 Oct 2014 11:28:18 -0700 Subject: Char code optimizations in block parsers. --- js/stmd.js | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index f4ccdf4..fc8d4a7 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -2147,6 +2147,7 @@ var C_OPEN_BRACKET = 91; var C_CLOSE_BRACKET = 93; var C_LESSTHAN = 60; + var C_GREATERTHAN = 62; var C_BANG = 33; var C_BACKSLASH = 92; var C_AMPERSAND = 38; @@ -2352,7 +2353,7 @@ var parseBackslash = function() { var subj = this.subject, pos = this.pos; - if (subj.charAt(pos) === '\\') { + if (subj.charCodeAt(pos) === C_BACKSLASH) { if (subj.charAt(pos + 1) === '\n') { this.pos = this.pos + 2; return [{ t: 'Hardbreak' }]; @@ -3205,9 +3206,9 @@ switch (container.t) { case 'BlockQuote': - if (indent <= 3 && ln.charAt(first_nonspace) === '>') { + if (indent <= 3 && ln.charCodeAt(first_nonspace) === C_GREATERTHAN) { offset = first_nonspace + 1; - if (ln.charAt(offset) === ' ') { + if (ln.charCodeAt(offset) === C_SPACE) { offset++; } } else { @@ -3247,7 +3248,7 @@ case 'FencedCode': // skip optional spaces of fence offset i = container.fence_offset; - while (i > 0 && ln.charAt(offset) === ' ') { + while (i > 0 && ln.charCodeAt(offset) === C_SPACE) { offset++; i--; } @@ -3324,11 +3325,11 @@ break; } - } else if (ln.charAt(first_nonspace) === '>') { + } else if (ln.charCodeAt(first_nonspace) === C_GREATERTHAN) { // blockquote offset = first_nonspace + 1; // optional following space - if (ln.charAt(offset) === ' ') { + if (ln.charCodeAt(offset) === C_SPACE) { offset++; } closeUnmatchedBlocks(this); @@ -3520,7 +3521,7 @@ block.string_content = block.strings.join('\n').replace(/^ */m,''); // try parsing the beginning as link reference definitions: - while (block.string_content.charAt(0) === '[' && + while (block.string_content.charCodeAt(0) === C_OPEN_BRACKET && (pos = this.inlineParser.parseReference(block.string_content, this.refmap))) { block.string_content = block.string_content.slice(pos); -- cgit v1.2.3 From 9c0b2f51a2e560a3932bb060ecfbfb50879548de Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 2 Oct 2014 12:59:13 -0700 Subject: Fixed rendering bug for blockquotes. --- js/stmd.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/js/stmd.js b/js/stmd.js index fc8d4a7..4ca38cc 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -3735,7 +3735,7 @@ case 'BlockQuote': var filling = this.renderBlocks(block.children); return inTags('blockquote', [], filling === '' ? this.innersep : - this.innersep + this.renderBlocks(block.children) + this.innersep); + this.innersep + filling + this.innersep); case 'ListItem': return inTags('li', [], this.renderBlocks(block.children, in_tight_list).trim()); case 'List': -- cgit v1.2.3 From 3c9ce6fa7434d3ffc1ea8d988e7f77d98d4cc3a2 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 2 Oct 2014 13:45:37 -0700 Subject: Changed inline parsers to be monomorphic and modify inlines param. They all return true or false now, instead of the inlines parsed. Performance optimization. --- js/stmd.js | 233 ++++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 131 insertions(+), 102 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 4ca38cc..efccad8 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -2325,7 +2325,7 @@ // Attempt to parse backticks, returning either a backtick code span or a // literal sequence of backticks. - var parseBackticks = function() { + var parseBackticks = function(inlines) { var startpos = this.pos; var ticks = this.match(/^`+/); if (!ticks) { @@ -2336,65 +2336,73 @@ var match; while (!foundCode && (match = this.match(/`+/m))) { if (match == ticks) { - return [{ t: 'Code', c: this.subject.slice(afterOpenTicks, + inlines.push({ t: 'Code', c: this.subject.slice(afterOpenTicks, this.pos - ticks.length) .replace(/[ \n]+/g,' ') - .trim() }]; + .trim() }); + return true; } } // If we got here, we didn't match a closing backtick sequence. this.pos = afterOpenTicks; - return [{ t: 'Str', c: ticks }]; + inlines.push({ t: 'Str', c: ticks }); + return true; }; // Parse a backslash-escaped special character, adding either the escaped // character, a hard line break (if the backslash is followed by a newline), // or a literal backslash to the 'inlines' list. - var parseBackslash = function() { + var parseBackslash = function(inlines) { var subj = this.subject, pos = this.pos; if (subj.charCodeAt(pos) === C_BACKSLASH) { if (subj.charAt(pos + 1) === '\n') { this.pos = this.pos + 2; - return [{ t: 'Hardbreak' }]; + inlines.push({ t: 'Hardbreak' }); } else if (reEscapable.test(subj.charAt(pos + 1))) { this.pos = this.pos + 2; - return [{ t: 'Str', c: subj.charAt(pos + 1) }]; + inlines.push({ t: 'Str', c: subj.charAt(pos + 1) }); } else { this.pos++; - return [{t: 'Str', c: '\\'}]; + inlines.push({t: 'Str', c: '\\'}); } + return true; } else { - return null; + return false; } }; // Attempt to parse an autolink (URL or email in pointy brackets). - var parseAutolink = function() { + var parseAutolink = function(inlines) { var m; var dest; if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink dest = m.slice(1,-1); - return [{t: 'Link', + inlines.push( + {t: 'Link', label: [{ t: 'Str', c: dest }], - destination: 'mailto:' + encodeURI(unescape(dest)) }]; + destination: 'mailto:' + encodeURI(unescape(dest)) }); + return true; } else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) { dest = m.slice(1,-1); - return [{ t: 'Link', + inlines.push({ + t: 'Link', label: [{ t: 'Str', c: dest }], - destination: encodeURI(unescape(dest)) }]; + destination: encodeURI(unescape(dest)) }); + return true; } else { - return null; + return false; } }; // Attempt to parse a raw HTML tag. - var parseHtmlTag = function() { + var parseHtmlTag = function(inlines) { var m = this.match(reHtmlTag); if (m) { - return [{ t: 'Html', c: m }]; + inlines.push({ t: 'Html', c: m }); + return true; } else { - return null; + return false; } }; @@ -2448,7 +2456,7 @@ } // Attempt to parse emphasis or strong emphasis. - var parseEmphasis = function(cc) { + var parseEmphasis = function(cc,inlines) { var startpos = this.pos; var c ; var first_close = 0; @@ -2456,7 +2464,6 @@ var numdelims; var delimpos; - var inlines = []; // Get opening delimiters. res = this.scanDelims(cc); @@ -2464,18 +2471,18 @@ if (numdelims === 0) { this.pos = startpos; - return null; + return false; } if (numdelims >= 4 || !res.can_open) { this.pos += numdelims; - return [Str(this.subject.slice(startpos, startpos + numdelims))]; + inlines.push(Str(this.subject.slice(startpos, startpos + numdelims))); + return true; } this.pos += numdelims; var fallbackpos = this.pos; - var fallback = Str(this.subject.slice(startpos, fallbackpos)); var next_inline; var first = []; @@ -2495,7 +2502,7 @@ } while (true) { - if (this.last_emphasis_closer[cc] < this.pos) { + if (this.last_emphasis_closer[c] < this.pos) { break; } res = this.scanDelims(cc); @@ -2511,7 +2518,8 @@ case 1: // ***a if (numdelims === 3 && can_close) { this.pos += 3; - return [Strong([Emph(first)])]; + inlines.push(Strong([Emph(first)])); + return true; } else if (numdelims === 2 && can_close) { this.pos += 2; current = second; @@ -2527,7 +2535,8 @@ case 2: // **a if (numdelims === 2 && can_close) { this.pos += 2; - return [Strong(first)]; + inlines.push(Strong(first)); + return true; } else if (numdelims === 1 && can_open) { this.pos += 1; current = second; @@ -2538,7 +2547,8 @@ case 3: // *a if (numdelims === 1 && can_close) { this.pos += 1; - return [Emph(first)]; + inlines.push(Emph(first)); + return true; } else if (numdelims === 2 && can_open) { this.pos += 2; current = second; @@ -2549,56 +2559,68 @@ case 4: // ***a**b if (numdelims === 3 && can_close) { this.pos += 3; - return [Strong([Emph(first.concat([Str(c+c)], second))])]; + inlines.push(Strong([Emph(first.concat([Str(c+c)], second))])); + return true; } else if (numdelims === 2 && can_close) { this.pos += 2; - return [Strong([Str(c+c+c)].concat( + inlines.push(Strong([Str(c+c+c)].concat( first, - [Strong(second)]))]; + [Strong(second)]))); + return true; } else if (numdelims === 1 && can_close) { this.pos += 1; - return [Emph([Strong(first)].concat(second))]; + inlines.push(Emph([Strong(first)].concat(second))); + return true; } break; case 5: // ***a*b if (numdelims === 3 && can_close) { this.pos += 3; - return [Strong([Emph(first.concat([Str(c)], second))])]; + inlines.push(Strong([Emph(first.concat([Str(c)], second))])); + return true; } else if (numdelims === 2 && can_close) { this.pos += 2; - return [Strong([Emph(first)].concat(second))]; + inlines.push(Strong([Emph(first)].concat(second))); + return true; } else if (numdelims === 1 && can_close) { this.pos += 1; - return [Strong([Str(c+c+c)].concat( + inlines.push(Strong([Str(c+c+c)].concat( first, - [Emph(second)]))]; + [Emph(second)]))); + return true; } break; case 6: // ***a** b if (numdelims === 3 && can_close) { this.pos += 3; - return [Strong([Emph(first.concat([Str(c+c)], second))])]; + inlines.push(Strong([Emph(first.concat([Str(c+c)], second))])); + return true; } else if (numdelims === 1 && can_close) { this.pos += 1; - return [Emph([Strong(first)].concat(second))]; + inlines.push(Emph([Strong(first)].concat(second))); + return true; } break; case 7: // ***a* b if (numdelims === 3 && can_close) { this.pos += 3; - return [Strong([Emph(first.concat([Str(c)], second))])]; + inlines.push(Strong([Emph(first.concat([Str(c)], second))])); + return true; } else if (numdelims === 2 && can_close) { this.pos += 2; - return [Strong([Emph(first)].concat(second))]; + inlines.push(Strong([Emph(first)].concat(second))); + return true; } break; case 8: // **a *b if (numdelims === 3 && can_close) { this.pos += 3; - return [Strong(first.concat([Emph(second)]))]; + inlines.push(Strong(first.concat([Emph(second)]))); + return true; } else if (numdelims === 2 && can_close) { this.pos += 2; - return [Strong(first.concat([Str(c)], second))]; + inlines.push(Strong(first.concat([Str(c)], second))); + return true; } else if (numdelims === 1 && can_close) { this.pos += 1; first.push(Emph(second)); @@ -2610,7 +2632,8 @@ case 9: // *a **b if (numdelims === 3 && can_close) { this.pos += 3; - return [(Emph(first.concat([Strong(second)])))]; + inlines.push(Emph(first.concat([Strong(second)]))); + return true; } else if (numdelims === 2 && can_close) { this.pos += 2; first.push(Strong(second)); @@ -2619,7 +2642,8 @@ continue; } else if (numdelims === 1 && can_close) { this.pos += 1; - return [Emph(first.concat([Str(c+c)], second))]; + inlines.push(Emph(first.concat([Str(c+c)], second))); + return true; } break; default: @@ -2627,9 +2651,7 @@ } } - if ((next_inline = this.parseInline(true))) { - Array.prototype.push.apply(current, next_inline); - } else { + if (!(this.parseInline(current,true))) { break; } @@ -2638,9 +2660,10 @@ // we didn't match emphasis: fallback this.pos = fallbackpos; if (last_emphasis_closer) { - this.last_emphasis_closer[cc] = last_emphasis_closer; + this.last_emphasis_closer[c] = last_emphasis_closer; } - return [fallback]; + inlines.push(Str(this.subject.slice(startpos, fallbackpos))); + return true; }; @@ -2694,10 +2717,10 @@ while ((c = this.peek()) && c != -1 && (c != C_CLOSE_BRACKET || nest_level > 0)) { switch (c) { case C_BACKTICK: - this.parseBackticks(); + this.parseBackticks([]); break; case C_LESSTHAN: - this.parseAutolink() || this.parseHtmlTag() || + this.parseAutolink([]) || this.parseHtmlTag([]) || this.pos++; break; case C_OPEN_BRACKET: // nested [] @@ -2709,10 +2732,10 @@ this.pos++; break; case C_BACKSLASH: - this.parseBackslash(); + this.parseBackslash([]); break; default: - this.parseString(); + this.parseString([]); } } if (c === C_CLOSE_BRACKET) { @@ -2737,7 +2760,7 @@ }; // Attempt to parse a link. If successful, return the link. - var parseLink = function() { + var parseLink = function(inlines) { var startpos = this.pos; var reflabel; var n; @@ -2746,7 +2769,7 @@ n = this.parseLinkLabel(); if (n === 0) { - return null; + return false; } var afterlabel = this.pos; var rawlabel = this.subject.substr(startpos, n); @@ -2763,13 +2786,14 @@ (title = this.parseLinkTitle() || '') || true) && this.spnl() && this.match(/^\)/)) { - return [{ t: 'Link', + inlines.push({ t: 'Link', destination: dest, title: title, - label: parseRawLabel(rawlabel) }]; + label: parseRawLabel(rawlabel) }); + return true; } else { this.pos = startpos; - return null; + return false; } } // If we're here, it wasn't an explicit link. Try to parse a reference link. @@ -2790,67 +2814,72 @@ // lookup rawlabel in refmap var link = this.refmap[normalizeReference(reflabel)]; if (link) { - return [{t: 'Link', + inlines.push({t: 'Link', destination: link.destination, title: link.title, - label: parseRawLabel(rawlabel) }]; + label: parseRawLabel(rawlabel) }); + return true; } else { this.pos = startpos; - return null; + return false; } // Nothing worked, rewind: this.pos = startpos; - return null; + return false; }; // Attempt to parse an entity, return Entity object if successful. - var parseEntity = function() { + var parseEntity = function(inlines) { var m; if ((m = this.match(reEntityHere))) { - return [{ t: 'Str', c: entityToChar(m) }]; + inlines.push({ t: 'Str', c: entityToChar(m) }); + return true; } else { - return null; + return false; } }; // Parse a run of ordinary characters, or a single character with // a special meaning in markdown, as a plain string, adding to inlines. - var parseString = function() { + var parseString = function(inlines) { var m; if ((m = this.match(reMain))) { - return [{ t: 'Str', c: m }]; + inlines.push({ t: 'Str', c: m }); + return true; } else { - return null; + return false; } }; // Parse a newline. If it was preceded by two spaces, return a hard // line break; otherwise a soft line break. - var parseNewline = function() { + var parseNewline = function(inlines) { var m = this.match(/^ *\n/); if (m) { if (m.length > 2) { - return [{ t: 'Hardbreak' }]; + inlines.push({ t: 'Hardbreak' }); } else if (m.length > 0) { - return [{ t: 'Softbreak' }]; + inlines.push({ t: 'Softbreak' }); } + return true; } - return null; + return false; }; // Attempt to parse an image. If the opening '!' is not followed // by a link, return a literal '!'. - var parseImage = function() { + var parseImage = function(inlines) { if (this.match(/^!/)) { - var link = this.parseLink(); + var link = this.parseLink(inlines); if (link) { - link[0].t = 'Image'; - return link; + inlines[inlines.length - 1].t = 'Image'; + return true; } else { - return [{ t: 'Str', c: '!' }]; + inlines.push({ t: 'Str', c: '!' }); + return true; } } else { - return null; + return false; } }; @@ -2913,64 +2942,66 @@ return this.pos - startpos; }; - // Parse the next inline element in subject, advancing subject position - // and returning the inline parsed. - var parseInline = function(memoize) { + // Parse the next inline element in subject, advancing subject position. + // If memoize is set, memoize the result. + // On success, add the result to the inlines list, and return true. + // On failure, return false. + var parseInline = function(inlines, memoize) { var startpos = this.pos; - + var origlen = inlines.length; var memoized = memoize && this.memo[startpos]; if (memoized) { this.pos = memoized.endpos; - return memoized.inline; + Array.prototype.push.apply(inlines, memoized.inline); + return true; } var c = this.peek(); if (c === -1) { - return null; + return false; } var res; switch(c) { case C_NEWLINE: case C_SPACE: - res = this.parseNewline(); + res = this.parseNewline(inlines); break; case C_BACKSLASH: - res = this.parseBackslash(); + res = this.parseBackslash(inlines); break; case C_BACKTICK: - res = this.parseBackticks(); + res = this.parseBackticks(inlines); break; case C_ASTERISK: case C_UNDERSCORE: - res = this.parseEmphasis(c); + res = this.parseEmphasis(c, inlines); break; case C_OPEN_BRACKET: - res = this.parseLink(); + res = this.parseLink(inlines); break; case C_BANG: - res = this.parseImage(); + res = this.parseImage(inlines); break; case C_LESSTHAN: - res = this.parseAutolink() || this.parseHtmlTag(); + res = this.parseAutolink(inlines) || this.parseHtmlTag(inlines); break; case C_AMPERSAND: - res = this.parseEntity(); + res = this.parseEntity(inlines); break; default: - res = this.parseString(); + res = this.parseString(inlines); break; } - if (res === null) { + if (!res) { this.pos += 1; - res = [{t: 'Str', c: String.fromCharCode(c)}]; + inlines.push({t: 'Str', c: String.fromCharCode(c)}); } - if (res && memoize) { - this.memo[startpos] = { inline: res, + if (memoize) { + this.memo[startpos] = { inline: inlines.slice(origlen), endpos: this.pos }; } - - return res; + return true; }; // Parse s as a list of inlines, using refmap to resolve references. @@ -2979,11 +3010,9 @@ this.pos = 0; this.refmap = refmap || {}; this.memo = {}; - this.last_emphasis_closer = { C_ASTERISK: s.length, C_UNDERSCORE: s.length }; + this.last_emphasis_closer = { '*': s.length, '_': s.length }; var inlines = []; - var next_inline; - while ((next_inline = this.parseInline())) { - Array.prototype.push.apply(inlines, next_inline); + while (this.parseInline(inlines, false)) { } return inlines; }; -- cgit v1.2.3 From ac611d51c9de9aa719b42b9463e6f28d6e7d74a4 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 2 Oct 2014 16:00:13 -0700 Subject: Use integers instead of strings for tags. Use === whenever possible to compare them. --- js/stmd.js | 238 ++++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 131 insertions(+), 107 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index efccad8..23caf31 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -2137,6 +2137,30 @@ zwj: '‍', zwnj: '‌' }; + // Constants for inline and block types: + + var I_STR = 1; + var I_SOFT_BREAK = 2; + var I_HARD_BREAK = 3; + var I_EMPH = 4; + var I_STRONG = 5; + var I_HTML = 6; + var I_LINK = 7; + var I_IMAGE = 8; + var I_CODE = 9; + var B_DOCUMENT = 10; + var B_PARAGRAPH = 11; + var B_BLOCK_QUOTE = 12; + var B_LIST_ITEM = 13; + var B_LIST = 14; + var B_ATX_HEADER = 15; + var B_SETEXT_HEADER = 16; + var B_INDENTED_CODE = 17; + var B_FENCED_CODE = 18; + var B_HTML_BLOCK = 19; + var B_REFERENCE_DEF = 20; + var B_HORIZONTAL_RULE = 21; + // Constants for character codes: var C_NEWLINE = 10; @@ -2273,7 +2297,7 @@ // Convert tabs to spaces on each line using a 4-space tab stop. var detabLine = function(text) { - if (text.indexOf('\t') == -1) { + if (text.indexOf('\t') === -1) { return text; } else { var lastStop = 0; @@ -2335,8 +2359,8 @@ var foundCode = false; var match; while (!foundCode && (match = this.match(/`+/m))) { - if (match == ticks) { - inlines.push({ t: 'Code', c: this.subject.slice(afterOpenTicks, + if (match === ticks) { + inlines.push({ t: I_CODE, c: this.subject.slice(afterOpenTicks, this.pos - ticks.length) .replace(/[ \n]+/g,' ') .trim() }); @@ -2345,7 +2369,7 @@ } // If we got here, we didn't match a closing backtick sequence. this.pos = afterOpenTicks; - inlines.push({ t: 'Str', c: ticks }); + inlines.push({ t: I_STR, c: ticks }); return true; }; @@ -2358,13 +2382,13 @@ if (subj.charCodeAt(pos) === C_BACKSLASH) { if (subj.charAt(pos + 1) === '\n') { this.pos = this.pos + 2; - inlines.push({ t: 'Hardbreak' }); + inlines.push({ t: I_HARD_BREAK }); } else if (reEscapable.test(subj.charAt(pos + 1))) { this.pos = this.pos + 2; - inlines.push({ t: 'Str', c: subj.charAt(pos + 1) }); + inlines.push({ t: I_STR, c: subj.charAt(pos + 1) }); } else { this.pos++; - inlines.push({t: 'Str', c: '\\'}); + inlines.push({t: I_STR, c: '\\'}); } return true; } else { @@ -2379,15 +2403,15 @@ if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink dest = m.slice(1,-1); inlines.push( - {t: 'Link', - label: [{ t: 'Str', c: dest }], + {t: I_LINK, + label: [{ t: I_STR, c: dest }], destination: 'mailto:' + encodeURI(unescape(dest)) }); return true; } else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) { dest = m.slice(1,-1); inlines.push({ - t: 'Link', - label: [{ t: 'Str', c: dest }], + t: I_LINK, + label: [{ t: I_STR, c: dest }], destination: encodeURI(unescape(dest)) }); return true; } else { @@ -2399,7 +2423,7 @@ var parseHtmlTag = function(inlines) { var m = this.match(reHtmlTag); if (m) { - inlines.push({ t: 'Html', c: m }); + inlines.push({ t: I_HTML, c: m }); return true; } else { return false; @@ -2444,15 +2468,15 @@ }; var Emph = function(ils) { - return {t: 'Emph', c: ils}; + return {t: I_EMPH, c: ils}; } var Strong = function(ils) { - return {t: 'Strong', c: ils}; + return {t: I_STRONG, c: ils}; } var Str = function(s) { - return {t: 'Str', c: s}; + return {t: I_STR, c: s}; } // Attempt to parse emphasis or strong emphasis. @@ -2776,7 +2800,7 @@ // if we got this far, we've parsed a label. // Try to parse an explicit link: [label](url "title") - if (this.peek() == C_OPEN_PAREN) { + if (this.peek() === C_OPEN_PAREN) { this.pos++; if (this.spnl() && ((dest = this.parseLinkDestination()) !== null) && @@ -2786,7 +2810,7 @@ (title = this.parseLinkTitle() || '') || true) && this.spnl() && this.match(/^\)/)) { - inlines.push({ t: 'Link', + inlines.push({ t: I_LINK, destination: dest, title: title, label: parseRawLabel(rawlabel) }); @@ -2802,7 +2826,7 @@ this.spnl(); var beforelabel = this.pos; n = this.parseLinkLabel(); - if (n == 2) { + if (n === 2) { // empty second label reflabel = rawlabel; } else if (n > 0) { @@ -2814,7 +2838,7 @@ // lookup rawlabel in refmap var link = this.refmap[normalizeReference(reflabel)]; if (link) { - inlines.push({t: 'Link', + inlines.push({t: I_LINK, destination: link.destination, title: link.title, label: parseRawLabel(rawlabel) }); @@ -2832,7 +2856,7 @@ var parseEntity = function(inlines) { var m; if ((m = this.match(reEntityHere))) { - inlines.push({ t: 'Str', c: entityToChar(m) }); + inlines.push({ t: I_STR, c: entityToChar(m) }); return true; } else { return false; @@ -2844,7 +2868,7 @@ var parseString = function(inlines) { var m; if ((m = this.match(reMain))) { - inlines.push({ t: 'Str', c: m }); + inlines.push({ t: I_STR, c: m }); return true; } else { return false; @@ -2857,9 +2881,9 @@ var m = this.match(/^ *\n/); if (m) { if (m.length > 2) { - inlines.push({ t: 'Hardbreak' }); + inlines.push({ t: I_HARD_BREAK }); } else if (m.length > 0) { - inlines.push({ t: 'Softbreak' }); + inlines.push({ t: I_SOFT_BREAK }); } return true; } @@ -2872,10 +2896,10 @@ if (this.match(/^!/)) { var link = this.parseLink(inlines); if (link) { - inlines[inlines.length - 1].t = 'Image'; + inlines[inlines.length - 1].t = I_IMAGE; return true; } else { - inlines.push({ t: 'Str', c: '!' }); + inlines.push({ t: I_STR, c: '!' }); return true; } } else { @@ -2994,7 +3018,7 @@ } if (!res) { this.pos += 1; - inlines.push({t: 'Str', c: String.fromCharCode(c)}); + inlines.push({t: I_STR, c: String.fromCharCode(c)}); } if (memoize) { @@ -3071,17 +3095,17 @@ // Returns true if parent block can contain child block. var canContain = function(parent_type, child_type) { - return ( parent_type == 'Document' || - parent_type == 'BlockQuote' || - parent_type == 'ListItem' || - (parent_type == 'List' && child_type == 'ListItem') ); + return ( parent_type === B_DOCUMENT || + parent_type === B_BLOCK_QUOTE || + parent_type === B_LIST_ITEM || + (parent_type === B_LIST && child_type === B_LIST_ITEM) ); }; // Returns true if block type can accept lines of text. var acceptsLines = function(block_type) { - return ( block_type == 'Paragraph' || - block_type == 'IndentedCode' || - block_type == 'FencedCode' ); + return ( block_type === B_PARAGRAPH || + block_type === B_INDENTED_CODE || + block_type === B_FENCED_CODE ); }; // Returns true if block ends with a blank line, descending if needed @@ -3090,7 +3114,7 @@ if (block.last_line_blank) { return true; } - if ((block.t == 'List' || block.t == 'ListItem') && block.children.length > 0) { + if ((block.t === B_LIST || block.t === B_LIST_ITEM) && block.children.length > 0) { return endsWithBlankLine(block.children[block.children.length - 1]); } else { return false; @@ -3105,7 +3129,7 @@ var b = block; var last_list = null; do { - if (b.t === 'List') { + if (b.t === B_LIST) { last_list = b; } b = b.parent; @@ -3234,7 +3258,7 @@ indent = first_nonspace - offset; switch (container.t) { - case 'BlockQuote': + case B_BLOCK_QUOTE: if (indent <= 3 && ln.charCodeAt(first_nonspace) === C_GREATERTHAN) { offset = first_nonspace + 1; if (ln.charCodeAt(offset) === C_SPACE) { @@ -3245,7 +3269,7 @@ } break; - case 'ListItem': + case B_LIST_ITEM: if (indent >= container.list_data.marker_offset + container.list_data.padding) { offset += container.list_data.marker_offset + @@ -3257,7 +3281,7 @@ } break; - case 'IndentedCode': + case B_INDENTED_CODE: if (indent >= CODE_INDENT) { offset += CODE_INDENT; } else if (blank) { @@ -3267,14 +3291,14 @@ } break; - case 'ATXHeader': - case 'SetextHeader': - case 'HorizontalRule': + case B_ATX_HEADER: + case B_SETEXT_HEADER: + case B_HORIZONTAL_RULE: // a header can never container > 1 line, so fail to match: all_matched = false; break; - case 'FencedCode': + case B_FENCED_CODE: // skip optional spaces of fence offset i = container.fence_offset; while (i > 0 && ln.charCodeAt(offset) === C_SPACE) { @@ -3283,13 +3307,13 @@ } break; - case 'HtmlBlock': + case B_HTML_BLOCK: if (blank) { all_matched = false; } break; - case 'Paragraph': + case B_PARAGRAPH: if (blank) { container.last_line_blank = true; all_matched = false; @@ -3328,9 +3352,9 @@ // Unless last matched container is a code block, try new container starts, // adding children to the last matched container: - while (container.t != 'FencedCode' && - container.t != 'IndentedCode' && - container.t != 'HtmlBlock' && + while (container.t != B_FENCED_CODE && + container.t != B_INDENTED_CODE && + container.t != B_HTML_BLOCK && // this is a little performance optimization: matchAt(/^[ #`~*+_=<>0-9-]/,ln,offset) !== null) { @@ -3346,10 +3370,10 @@ if (indent >= CODE_INDENT) { // indented code - if (this.tip.t != 'Paragraph' && !blank) { + if (this.tip.t != B_PARAGRAPH && !blank) { offset += CODE_INDENT; closeUnmatchedBlocks(this); - container = this.addChild('IndentedCode', line_number, offset); + container = this.addChild(B_INDENTED_CODE, line_number, offset); } else { // indent > 4 in a lazy paragraph continuation break; } @@ -3362,13 +3386,13 @@ offset++; } closeUnmatchedBlocks(this); - container = this.addChild('BlockQuote', line_number, offset); + container = this.addChild(B_BLOCK_QUOTE, line_number, offset); } else if ((match = ln.slice(first_nonspace).match(/^#{1,6}(?: +|$)/))) { // ATX header offset = first_nonspace + match[0].length; closeUnmatchedBlocks(this); - container = this.addChild('ATXHeader', line_number, first_nonspace); + container = this.addChild(B_ATX_HEADER, line_number, first_nonspace); container.level = match[0].trim().length; // number of #s // remove trailing ###s: container.strings = @@ -3379,7 +3403,7 @@ // fenced code block var fence_length = match[0].length; closeUnmatchedBlocks(this); - container = this.addChild('FencedCode', line_number, first_nonspace); + container = this.addChild(B_FENCED_CODE, line_number, first_nonspace); container.fence_length = fence_length; container.fence_char = match[0][0]; container.fence_offset = first_nonspace - offset; @@ -3389,23 +3413,23 @@ } else if (matchAt(reHtmlBlockOpen, ln, first_nonspace) !== null) { // html block closeUnmatchedBlocks(this); - container = this.addChild('HtmlBlock', line_number, first_nonspace); + container = this.addChild(B_HTML_BLOCK, line_number, first_nonspace); // note, we don't adjust offset because the tag is part of the text break; - } else if (container.t == 'Paragraph' && + } else if (container.t == B_PARAGRAPH && container.strings.length === 1 && ((match = ln.slice(first_nonspace).match(/^(?:=+|-+) *$/)))) { // setext header line closeUnmatchedBlocks(this); - container.t = 'SetextHeader'; // convert Paragraph to SetextHeader + container.t = B_SETEXT_HEADER; // convert Paragraph to SetextHeader container.level = match[0][0] === '=' ? 1 : 2; offset = ln.length; } else if (matchAt(reHrule, ln, first_nonspace) !== null) { // hrule closeUnmatchedBlocks(this); - container = this.addChild('HorizontalRule', line_number, first_nonspace); + container = this.addChild(B_HORIZONTAL_RULE, line_number, first_nonspace); offset = ln.length - 1; break; @@ -3416,14 +3440,14 @@ offset = first_nonspace + data.padding; // add the list if needed - if (container.t !== 'List' || + if (container.t !== B_LIST || !(listsMatch(container.list_data, data))) { - container = this.addChild('List', line_number, first_nonspace); + container = this.addChild(B_LIST, line_number, first_nonspace); container.list_data = data; } // add the list item - container = this.addChild('ListItem', line_number, first_nonspace); + container = this.addChild(B_LIST_ITEM, line_number, first_nonspace); container.list_data = data; } else { @@ -3453,7 +3477,7 @@ // First check for a lazy paragraph continuation: if (this.tip !== last_matched_container && !blank && - this.tip.t == 'Paragraph' && + this.tip.t == B_PARAGRAPH && this.tip.strings.length > 0) { // lazy paragraph continuation @@ -3470,9 +3494,9 @@ // lists or breaking out of lists. We also don't set last_line_blank // on an empty list item. container.last_line_blank = blank && - !(container.t == 'BlockQuote' || - container.t == 'FencedCode' || - (container.t == 'ListItem' && + !(container.t == B_BLOCK_QUOTE || + container.t == B_FENCED_CODE || + (container.t == B_LIST_ITEM && container.children.length === 0 && container.start_line == line_number)); @@ -3483,12 +3507,12 @@ } switch (container.t) { - case 'IndentedCode': - case 'HtmlBlock': + case B_INDENTED_CODE: + case B_HTML_BLOCK: this.addLine(ln, offset); break; - case 'FencedCode': + case B_FENCED_CODE: // check for closing code fence: match = (indent <= 3 && ln.charAt(first_nonspace) == container.fence_char && @@ -3501,9 +3525,9 @@ } break; - case 'ATXHeader': - case 'SetextHeader': - case 'HorizontalRule': + case B_ATX_HEADER: + case B_SETEXT_HEADER: + case B_HORIZONTAL_RULE: // nothing to do; we already added the contents. break; @@ -3512,10 +3536,10 @@ this.addLine(ln, first_nonspace); } else if (blank) { // do nothing - } else if (container.t != 'HorizontalRule' && - container.t != 'SetextHeader') { + } else if (container.t != B_HORIZONTAL_RULE && + container.t != B_SETEXT_HEADER) { // create paragraph container for line - container = this.addChild('Paragraph', line_number, first_nonspace); + container = this.addChild(B_PARAGRAPH, line_number, first_nonspace); this.addLine(ln, first_nonspace); } else { console.log("Line " + line_number.toString() + @@ -3546,7 +3570,7 @@ } switch (block.t) { - case 'Paragraph': + case B_PARAGRAPH: block.string_content = block.strings.join('\n').replace(/^ */m,''); // try parsing the beginning as link reference definitions: @@ -3555,23 +3579,23 @@ this.refmap))) { block.string_content = block.string_content.slice(pos); if (isBlank(block.string_content)) { - block.t = 'ReferenceDef'; + block.t = B_REFERENCE_DEF; break; } } break; - case 'ATXHeader': - case 'SetextHeader': - case 'HtmlBlock': + case B_ATX_HEADER: + case B_SETEXT_HEADER: + case B_HTML_BLOCK: block.string_content = block.strings.join('\n'); break; - case 'IndentedCode': + case B_INDENTED_CODE: block.string_content = block.strings.join('\n').replace(/(\n *)*$/,'\n'); break; - case 'FencedCode': + case B_FENCED_CODE: // first line becomes info string block.info = unescapeEntBS(block.strings[0].trim()); if (block.strings.length == 1) { @@ -3581,7 +3605,7 @@ } break; - case 'List': + case B_LIST: block.tight = true; // tight by default var numitems = block.children.length; @@ -3622,9 +3646,9 @@ // into inline content where appropriate. var processInlines = function(block) { switch(block.t) { - case 'Paragraph': - case 'SetextHeader': - case 'ATXHeader': + case B_PARAGRAPH: + case B_SETEXT_HEADER: + case B_ATX_HEADER: block.inline_content = this.inlineParser.parse(block.string_content.trim(), this.refmap); block.string_content = ""; @@ -3643,7 +3667,7 @@ // The main parsing function. Returns a parsed document AST. var parse = function(input) { - this.doc = makeBlock('Document', 1, 1); + this.doc = makeBlock(B_DOCUMENT, 1, 1); this.tip = this.doc; this.refmap = {}; var lines = input.replace(/\n$/,'').split(/\r\n|\n|\r/); @@ -3662,7 +3686,7 @@ // The DocParser object. function DocParser(){ return { - doc: makeBlock('Document', 1, 1), + doc: makeBlock(B_DOCUMENT, 1, 1), tip: this.doc, refmap: {}, inlineParser: new InlineParser(), @@ -3703,32 +3727,32 @@ var renderInline = function(inline) { var attrs; switch (inline.t) { - case 'Str': + case I_STR: return this.escape(inline.c); - case 'Softbreak': + case I_SOFT_BREAK: return this.softbreak; - case 'Hardbreak': + case I_HARD_BREAK: return inTags('br',[],"",true) + '\n'; - case 'Emph': + case I_EMPH: return inTags('em', [], this.renderInlines(inline.c)); - case 'Strong': + case I_STRONG: return inTags('strong', [], this.renderInlines(inline.c)); - case 'Html': + case I_HTML: return inline.c; - case 'Link': + case I_LINK: attrs = [['href', this.escape(inline.destination, true)]]; if (inline.title) { attrs.push(['title', this.escape(inline.title, true)]); } return inTags('a', attrs, this.renderInlines(inline.label)); - case 'Image': + case I_IMAGE: attrs = [['src', this.escape(inline.destination, true)], ['alt', this.escape(this.renderInlines(inline.label))]]; if (inline.title) { attrs.push(['title', this.escape(inline.title, true)]); } return inTags('img', attrs, "", true); - case 'Code': + case I_CODE: return inTags('code', [], this.escape(inline.c)); default: console.log("Unknown inline type " + inline.t); @@ -3751,48 +3775,48 @@ var attr; var info_words; switch (block.t) { - case 'Document': + case B_DOCUMENT: var whole_doc = this.renderBlocks(block.children); return (whole_doc === '' ? '' : whole_doc + '\n'); - case 'Paragraph': + case B_PARAGRAPH: if (in_tight_list) { return this.renderInlines(block.inline_content); } else { return inTags('p', [], this.renderInlines(block.inline_content)); } break; - case 'BlockQuote': + case B_BLOCK_QUOTE: var filling = this.renderBlocks(block.children); return inTags('blockquote', [], filling === '' ? this.innersep : this.innersep + filling + this.innersep); - case 'ListItem': + case B_LIST_ITEM: return inTags('li', [], this.renderBlocks(block.children, in_tight_list).trim()); - case 'List': + case B_LIST: tag = block.list_data.type == 'Bullet' ? 'ul' : 'ol'; attr = (!block.list_data.start || block.list_data.start == 1) ? [] : [['start', block.list_data.start.toString()]]; return inTags(tag, attr, this.innersep + this.renderBlocks(block.children, block.tight) + this.innersep); - case 'ATXHeader': - case 'SetextHeader': + case B_ATX_HEADER: + case B_SETEXT_HEADER: tag = 'h' + block.level; return inTags(tag, [], this.renderInlines(block.inline_content)); - case 'IndentedCode': + case B_INDENTED_CODE: return inTags('pre', [], inTags('code', [], this.escape(block.string_content))); - case 'FencedCode': + case B_FENCED_CODE: info_words = block.info.split(/ +/); attr = info_words.length === 0 || info_words[0].length === 0 ? [] : [['class','language-' + this.escape(info_words[0],true)]]; return inTags('pre', [], inTags('code', attr, this.escape(block.string_content))); - case 'HtmlBlock': + case B_HTML_BLOCK: return block.string_content; - case 'ReferenceDef': + case B_REFERENCE_DEF: return ""; - case 'HorizontalRule': + case B_HORIZONTAL_RULE: return inTags('hr',[],"",true); default: console.log("Unknown block type " + block.t); @@ -3804,7 +3828,7 @@ var renderBlocks = function(blocks, in_tight_list) { var result = []; for (var i=0; i < blocks.length; i++) { - if (blocks[i].t !== 'ReferenceDef') { + if (blocks[i].t !== B_REFERENCE_DEF) { result.push(this.renderBlock(blocks[i], in_tight_list)); } } -- cgit v1.2.3 From db25de09f5dc931c0e2b31ce0ccdb58052f3105f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 2 Oct 2014 17:53:53 -0700 Subject: Use numerical constants. Performance optimization, but at cost of code clarity. --- js/stmd.js | 270 ++++++++++++++++++++++++++++++------------------------------- 1 file changed, 135 insertions(+), 135 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 23caf31..3c4eab0 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -2137,8 +2137,7 @@ zwj: '‍', zwnj: '‌' }; - // Constants for inline and block types: - + /* Constants for inline and block types var I_STR = 1; var I_SOFT_BREAK = 2; var I_HARD_BREAK = 3; @@ -2160,9 +2159,9 @@ var B_HTML_BLOCK = 19; var B_REFERENCE_DEF = 20; var B_HORIZONTAL_RULE = 21; + */ - // Constants for character codes: - + /* Constants for character codes: var C_NEWLINE = 10; var C_SPACE = 32; var C_ASTERISK = 42; @@ -2177,6 +2176,7 @@ var C_AMPERSAND = 38; var C_OPEN_PAREN = 40; var C_COLON = 58; + */ // Some regexps used in inline parser: @@ -2360,7 +2360,7 @@ var match; while (!foundCode && (match = this.match(/`+/m))) { if (match === ticks) { - inlines.push({ t: I_CODE, c: this.subject.slice(afterOpenTicks, + inlines.push({ t: 9, c: this.subject.slice(afterOpenTicks, this.pos - ticks.length) .replace(/[ \n]+/g,' ') .trim() }); @@ -2369,7 +2369,7 @@ } // If we got here, we didn't match a closing backtick sequence. this.pos = afterOpenTicks; - inlines.push({ t: I_STR, c: ticks }); + inlines.push({ t: 1, c: ticks }); return true; }; @@ -2379,16 +2379,16 @@ var parseBackslash = function(inlines) { var subj = this.subject, pos = this.pos; - if (subj.charCodeAt(pos) === C_BACKSLASH) { + if (subj.charCodeAt(pos) === 92) { if (subj.charAt(pos + 1) === '\n') { this.pos = this.pos + 2; - inlines.push({ t: I_HARD_BREAK }); + inlines.push({ t: 3 }); } else if (reEscapable.test(subj.charAt(pos + 1))) { this.pos = this.pos + 2; - inlines.push({ t: I_STR, c: subj.charAt(pos + 1) }); + inlines.push({ t: 1, c: subj.charAt(pos + 1) }); } else { this.pos++; - inlines.push({t: I_STR, c: '\\'}); + inlines.push({t: 1, c: '\\'}); } return true; } else { @@ -2403,15 +2403,15 @@ if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink dest = m.slice(1,-1); inlines.push( - {t: I_LINK, - label: [{ t: I_STR, c: dest }], + {t: 7, + label: [{ t: 1, c: dest }], destination: 'mailto:' + encodeURI(unescape(dest)) }); return true; } else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) { dest = m.slice(1,-1); inlines.push({ - t: I_LINK, - label: [{ t: I_STR, c: dest }], + t: 7, + label: [{ t: 1, c: dest }], destination: encodeURI(unescape(dest)) }); return true; } else { @@ -2423,7 +2423,7 @@ var parseHtmlTag = function(inlines) { var m = this.match(reHtmlTag); if (m) { - inlines.push({ t: I_HTML, c: m }); + inlines.push({ t: 6, c: m }); return true; } else { return false; @@ -2457,7 +2457,7 @@ var can_open = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_after)); var can_close = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_before)); - if (cc === C_UNDERSCORE) { + if (cc === 95) { can_open = can_open && !((/[a-z0-9]/i).test(char_before)); can_close = can_close && !((/[a-z0-9]/i).test(char_after)); } @@ -2468,15 +2468,15 @@ }; var Emph = function(ils) { - return {t: I_EMPH, c: ils}; + return {t: 4, c: ils}; } var Strong = function(ils) { - return {t: I_STRONG, c: ils}; + return {t: 5, c: ils}; } var Str = function(s) { - return {t: I_STR, c: s}; + return {t: 1, c: s}; } // Attempt to parse emphasis or strong emphasis. @@ -2721,7 +2721,7 @@ // Attempt to parse a link label, returning number of characters parsed. var parseLinkLabel = function() { - if (this.peek() != C_OPEN_BRACKET) { + if (this.peek() != 91) { return 0; } var startpos = this.pos; @@ -2738,31 +2738,31 @@ } this.pos++; // advance past [ var c; - while ((c = this.peek()) && c != -1 && (c != C_CLOSE_BRACKET || nest_level > 0)) { + while ((c = this.peek()) && c != -1 && (c != 93 || nest_level > 0)) { switch (c) { - case C_BACKTICK: + case 96: this.parseBackticks([]); break; - case C_LESSTHAN: + case 60: this.parseAutolink([]) || this.parseHtmlTag([]) || this.pos++; break; - case C_OPEN_BRACKET: // nested [] + case 91: // nested [] nest_level++; this.pos++; break; - case C_CLOSE_BRACKET: // nested [] + case 93: // nested [] nest_level--; this.pos++; break; - case C_BACKSLASH: + case 92: this.parseBackslash([]); break; default: this.parseString([]); } } - if (c === C_CLOSE_BRACKET) { + if (c === 93) { this.label_nest_level = 0; this.pos++; // advance past ] return this.pos - startpos; @@ -2800,7 +2800,7 @@ // if we got this far, we've parsed a label. // Try to parse an explicit link: [label](url "title") - if (this.peek() === C_OPEN_PAREN) { + if (this.peek() === 40) { this.pos++; if (this.spnl() && ((dest = this.parseLinkDestination()) !== null) && @@ -2810,7 +2810,7 @@ (title = this.parseLinkTitle() || '') || true) && this.spnl() && this.match(/^\)/)) { - inlines.push({ t: I_LINK, + inlines.push({ t: 7, destination: dest, title: title, label: parseRawLabel(rawlabel) }); @@ -2838,7 +2838,7 @@ // lookup rawlabel in refmap var link = this.refmap[normalizeReference(reflabel)]; if (link) { - inlines.push({t: I_LINK, + inlines.push({t: 7, destination: link.destination, title: link.title, label: parseRawLabel(rawlabel) }); @@ -2856,7 +2856,7 @@ var parseEntity = function(inlines) { var m; if ((m = this.match(reEntityHere))) { - inlines.push({ t: I_STR, c: entityToChar(m) }); + inlines.push({ t: 1, c: entityToChar(m) }); return true; } else { return false; @@ -2868,7 +2868,7 @@ var parseString = function(inlines) { var m; if ((m = this.match(reMain))) { - inlines.push({ t: I_STR, c: m }); + inlines.push({ t: 1, c: m }); return true; } else { return false; @@ -2881,9 +2881,9 @@ var m = this.match(/^ *\n/); if (m) { if (m.length > 2) { - inlines.push({ t: I_HARD_BREAK }); + inlines.push({ t: 3 }); } else if (m.length > 0) { - inlines.push({ t: I_SOFT_BREAK }); + inlines.push({ t: 2 }); } return true; } @@ -2896,10 +2896,10 @@ if (this.match(/^!/)) { var link = this.parseLink(inlines); if (link) { - inlines[inlines.length - 1].t = I_IMAGE; + inlines[inlines.length - 1].t = 8; return true; } else { - inlines.push({ t: I_STR, c: '!' }); + inlines.push({ t: 1, c: '!' }); return true; } } else { @@ -2927,7 +2927,7 @@ } // colon: - if (this.peek() === C_COLON) { + if (this.peek() === 58) { this.pos++; } else { this.pos = startpos; @@ -2986,30 +2986,30 @@ } var res; switch(c) { - case C_NEWLINE: - case C_SPACE: + case 10: + case 32: res = this.parseNewline(inlines); break; - case C_BACKSLASH: + case 92: res = this.parseBackslash(inlines); break; - case C_BACKTICK: + case 96: res = this.parseBackticks(inlines); break; - case C_ASTERISK: - case C_UNDERSCORE: + case 42: + case 95: res = this.parseEmphasis(c, inlines); break; - case C_OPEN_BRACKET: + case 91: res = this.parseLink(inlines); break; - case C_BANG: + case 33: res = this.parseImage(inlines); break; - case C_LESSTHAN: + case 60: res = this.parseAutolink(inlines) || this.parseHtmlTag(inlines); break; - case C_AMPERSAND: + case 38: res = this.parseEntity(inlines); break; default: @@ -3018,7 +3018,7 @@ } if (!res) { this.pos += 1; - inlines.push({t: I_STR, c: String.fromCharCode(c)}); + inlines.push({t: 1, c: String.fromCharCode(c)}); } if (memoize) { @@ -3095,17 +3095,17 @@ // Returns true if parent block can contain child block. var canContain = function(parent_type, child_type) { - return ( parent_type === B_DOCUMENT || - parent_type === B_BLOCK_QUOTE || - parent_type === B_LIST_ITEM || - (parent_type === B_LIST && child_type === B_LIST_ITEM) ); + return ( parent_type === 10 || + parent_type === 12 || + parent_type === 13 || + (parent_type === 14 && child_type === 13) ); }; // Returns true if block type can accept lines of text. var acceptsLines = function(block_type) { - return ( block_type === B_PARAGRAPH || - block_type === B_INDENTED_CODE || - block_type === B_FENCED_CODE ); + return ( block_type === 11 || + block_type === 17 || + block_type === 18 ); }; // Returns true if block ends with a blank line, descending if needed @@ -3114,7 +3114,7 @@ if (block.last_line_blank) { return true; } - if ((block.t === B_LIST || block.t === B_LIST_ITEM) && block.children.length > 0) { + if ((block.t === 14 || block.t === 13) && block.children.length > 0) { return endsWithBlankLine(block.children[block.children.length - 1]); } else { return false; @@ -3129,7 +3129,7 @@ var b = block; var last_list = null; do { - if (b.t === B_LIST) { + if (b.t === 14) { last_list = b; } b = b.parent; @@ -3258,10 +3258,10 @@ indent = first_nonspace - offset; switch (container.t) { - case B_BLOCK_QUOTE: - if (indent <= 3 && ln.charCodeAt(first_nonspace) === C_GREATERTHAN) { + case 12: + if (indent <= 3 && ln.charCodeAt(first_nonspace) === 62) { offset = first_nonspace + 1; - if (ln.charCodeAt(offset) === C_SPACE) { + if (ln.charCodeAt(offset) === 32) { offset++; } } else { @@ -3269,7 +3269,7 @@ } break; - case B_LIST_ITEM: + case 13: if (indent >= container.list_data.marker_offset + container.list_data.padding) { offset += container.list_data.marker_offset + @@ -3281,7 +3281,7 @@ } break; - case B_INDENTED_CODE: + case 17: if (indent >= CODE_INDENT) { offset += CODE_INDENT; } else if (blank) { @@ -3291,29 +3291,29 @@ } break; - case B_ATX_HEADER: - case B_SETEXT_HEADER: - case B_HORIZONTAL_RULE: + case 15: + case 16: + case 21: // a header can never container > 1 line, so fail to match: all_matched = false; break; - case B_FENCED_CODE: + case 18: // skip optional spaces of fence offset i = container.fence_offset; - while (i > 0 && ln.charCodeAt(offset) === C_SPACE) { + while (i > 0 && ln.charCodeAt(offset) === 32) { offset++; i--; } break; - case B_HTML_BLOCK: + case 19: if (blank) { all_matched = false; } break; - case B_PARAGRAPH: + case 11: if (blank) { container.last_line_blank = true; all_matched = false; @@ -3352,9 +3352,9 @@ // Unless last matched container is a code block, try new container starts, // adding children to the last matched container: - while (container.t != B_FENCED_CODE && - container.t != B_INDENTED_CODE && - container.t != B_HTML_BLOCK && + while (container.t != 18 && + container.t != 17 && + container.t != 19 && // this is a little performance optimization: matchAt(/^[ #`~*+_=<>0-9-]/,ln,offset) !== null) { @@ -3370,29 +3370,29 @@ if (indent >= CODE_INDENT) { // indented code - if (this.tip.t != B_PARAGRAPH && !blank) { + if (this.tip.t != 11 && !blank) { offset += CODE_INDENT; closeUnmatchedBlocks(this); - container = this.addChild(B_INDENTED_CODE, line_number, offset); + container = this.addChild(17, line_number, offset); } else { // indent > 4 in a lazy paragraph continuation break; } - } else if (ln.charCodeAt(first_nonspace) === C_GREATERTHAN) { + } else if (ln.charCodeAt(first_nonspace) === 62) { // blockquote offset = first_nonspace + 1; // optional following space - if (ln.charCodeAt(offset) === C_SPACE) { + if (ln.charCodeAt(offset) === 32) { offset++; } closeUnmatchedBlocks(this); - container = this.addChild(B_BLOCK_QUOTE, line_number, offset); + container = this.addChild(12, line_number, offset); } else if ((match = ln.slice(first_nonspace).match(/^#{1,6}(?: +|$)/))) { // ATX header offset = first_nonspace + match[0].length; closeUnmatchedBlocks(this); - container = this.addChild(B_ATX_HEADER, line_number, first_nonspace); + container = this.addChild(15, line_number, first_nonspace); container.level = match[0].trim().length; // number of #s // remove trailing ###s: container.strings = @@ -3403,7 +3403,7 @@ // fenced code block var fence_length = match[0].length; closeUnmatchedBlocks(this); - container = this.addChild(B_FENCED_CODE, line_number, first_nonspace); + container = this.addChild(18, line_number, first_nonspace); container.fence_length = fence_length; container.fence_char = match[0][0]; container.fence_offset = first_nonspace - offset; @@ -3413,23 +3413,23 @@ } else if (matchAt(reHtmlBlockOpen, ln, first_nonspace) !== null) { // html block closeUnmatchedBlocks(this); - container = this.addChild(B_HTML_BLOCK, line_number, first_nonspace); + container = this.addChild(19, line_number, first_nonspace); // note, we don't adjust offset because the tag is part of the text break; - } else if (container.t == B_PARAGRAPH && + } else if (container.t == 11 && container.strings.length === 1 && ((match = ln.slice(first_nonspace).match(/^(?:=+|-+) *$/)))) { // setext header line closeUnmatchedBlocks(this); - container.t = B_SETEXT_HEADER; // convert Paragraph to SetextHeader + container.t = 16; // convert Paragraph to SetextHeader container.level = match[0][0] === '=' ? 1 : 2; offset = ln.length; } else if (matchAt(reHrule, ln, first_nonspace) !== null) { // hrule closeUnmatchedBlocks(this); - container = this.addChild(B_HORIZONTAL_RULE, line_number, first_nonspace); + container = this.addChild(21, line_number, first_nonspace); offset = ln.length - 1; break; @@ -3440,14 +3440,14 @@ offset = first_nonspace + data.padding; // add the list if needed - if (container.t !== B_LIST || + if (container.t !== 14 || !(listsMatch(container.list_data, data))) { - container = this.addChild(B_LIST, line_number, first_nonspace); + container = this.addChild(14, line_number, first_nonspace); container.list_data = data; } // add the list item - container = this.addChild(B_LIST_ITEM, line_number, first_nonspace); + container = this.addChild(13, line_number, first_nonspace); container.list_data = data; } else { @@ -3477,7 +3477,7 @@ // First check for a lazy paragraph continuation: if (this.tip !== last_matched_container && !blank && - this.tip.t == B_PARAGRAPH && + this.tip.t == 11 && this.tip.strings.length > 0) { // lazy paragraph continuation @@ -3494,9 +3494,9 @@ // lists or breaking out of lists. We also don't set last_line_blank // on an empty list item. container.last_line_blank = blank && - !(container.t == B_BLOCK_QUOTE || - container.t == B_FENCED_CODE || - (container.t == B_LIST_ITEM && + !(container.t == 12 || + container.t == 18 || + (container.t == 13 && container.children.length === 0 && container.start_line == line_number)); @@ -3507,12 +3507,12 @@ } switch (container.t) { - case B_INDENTED_CODE: - case B_HTML_BLOCK: + case 17: + case 19: this.addLine(ln, offset); break; - case B_FENCED_CODE: + case 18: // check for closing code fence: match = (indent <= 3 && ln.charAt(first_nonspace) == container.fence_char && @@ -3525,9 +3525,9 @@ } break; - case B_ATX_HEADER: - case B_SETEXT_HEADER: - case B_HORIZONTAL_RULE: + case 15: + case 16: + case 21: // nothing to do; we already added the contents. break; @@ -3536,10 +3536,10 @@ this.addLine(ln, first_nonspace); } else if (blank) { // do nothing - } else if (container.t != B_HORIZONTAL_RULE && - container.t != B_SETEXT_HEADER) { + } else if (container.t != 21 && + container.t != 16) { // create paragraph container for line - container = this.addChild(B_PARAGRAPH, line_number, first_nonspace); + container = this.addChild(11, line_number, first_nonspace); this.addLine(ln, first_nonspace); } else { console.log("Line " + line_number.toString() + @@ -3570,32 +3570,32 @@ } switch (block.t) { - case B_PARAGRAPH: + case 11: block.string_content = block.strings.join('\n').replace(/^ */m,''); // try parsing the beginning as link reference definitions: - while (block.string_content.charCodeAt(0) === C_OPEN_BRACKET && + while (block.string_content.charCodeAt(0) === 91 && (pos = this.inlineParser.parseReference(block.string_content, this.refmap))) { block.string_content = block.string_content.slice(pos); if (isBlank(block.string_content)) { - block.t = B_REFERENCE_DEF; + block.t = 20; break; } } break; - case B_ATX_HEADER: - case B_SETEXT_HEADER: - case B_HTML_BLOCK: + case 15: + case 16: + case 19: block.string_content = block.strings.join('\n'); break; - case B_INDENTED_CODE: + case 17: block.string_content = block.strings.join('\n').replace(/(\n *)*$/,'\n'); break; - case B_FENCED_CODE: + case 18: // first line becomes info string block.info = unescapeEntBS(block.strings[0].trim()); if (block.strings.length == 1) { @@ -3605,7 +3605,7 @@ } break; - case B_LIST: + case 14: block.tight = true; // tight by default var numitems = block.children.length; @@ -3646,9 +3646,9 @@ // into inline content where appropriate. var processInlines = function(block) { switch(block.t) { - case B_PARAGRAPH: - case B_SETEXT_HEADER: - case B_ATX_HEADER: + case 11: + case 16: + case 15: block.inline_content = this.inlineParser.parse(block.string_content.trim(), this.refmap); block.string_content = ""; @@ -3667,7 +3667,7 @@ // The main parsing function. Returns a parsed document AST. var parse = function(input) { - this.doc = makeBlock(B_DOCUMENT, 1, 1); + this.doc = makeBlock(10, 1, 1); this.tip = this.doc; this.refmap = {}; var lines = input.replace(/\n$/,'').split(/\r\n|\n|\r/); @@ -3686,7 +3686,7 @@ // The DocParser object. function DocParser(){ return { - doc: makeBlock(B_DOCUMENT, 1, 1), + doc: makeBlock(10, 1, 1), tip: this.doc, refmap: {}, inlineParser: new InlineParser(), @@ -3727,32 +3727,32 @@ var renderInline = function(inline) { var attrs; switch (inline.t) { - case I_STR: + case 1: return this.escape(inline.c); - case I_SOFT_BREAK: + case 2: return this.softbreak; - case I_HARD_BREAK: + case 3: return inTags('br',[],"",true) + '\n'; - case I_EMPH: + case 4: return inTags('em', [], this.renderInlines(inline.c)); - case I_STRONG: + case 5: return inTags('strong', [], this.renderInlines(inline.c)); - case I_HTML: + case 6: return inline.c; - case I_LINK: + case 7: attrs = [['href', this.escape(inline.destination, true)]]; if (inline.title) { attrs.push(['title', this.escape(inline.title, true)]); } return inTags('a', attrs, this.renderInlines(inline.label)); - case I_IMAGE: + case 8: attrs = [['src', this.escape(inline.destination, true)], ['alt', this.escape(this.renderInlines(inline.label))]]; if (inline.title) { attrs.push(['title', this.escape(inline.title, true)]); } return inTags('img', attrs, "", true); - case I_CODE: + case 9: return inTags('code', [], this.escape(inline.c)); default: console.log("Unknown inline type " + inline.t); @@ -3775,48 +3775,48 @@ var attr; var info_words; switch (block.t) { - case B_DOCUMENT: + case 10: var whole_doc = this.renderBlocks(block.children); return (whole_doc === '' ? '' : whole_doc + '\n'); - case B_PARAGRAPH: + case 11: if (in_tight_list) { return this.renderInlines(block.inline_content); } else { return inTags('p', [], this.renderInlines(block.inline_content)); } break; - case B_BLOCK_QUOTE: + case 12: var filling = this.renderBlocks(block.children); return inTags('blockquote', [], filling === '' ? this.innersep : this.innersep + filling + this.innersep); - case B_LIST_ITEM: + case 13: return inTags('li', [], this.renderBlocks(block.children, in_tight_list).trim()); - case B_LIST: + case 14: tag = block.list_data.type == 'Bullet' ? 'ul' : 'ol'; attr = (!block.list_data.start || block.list_data.start == 1) ? [] : [['start', block.list_data.start.toString()]]; return inTags(tag, attr, this.innersep + this.renderBlocks(block.children, block.tight) + this.innersep); - case B_ATX_HEADER: - case B_SETEXT_HEADER: + case 15: + case 16: tag = 'h' + block.level; return inTags(tag, [], this.renderInlines(block.inline_content)); - case B_INDENTED_CODE: + case 17: return inTags('pre', [], inTags('code', [], this.escape(block.string_content))); - case B_FENCED_CODE: + case 18: info_words = block.info.split(/ +/); attr = info_words.length === 0 || info_words[0].length === 0 ? [] : [['class','language-' + this.escape(info_words[0],true)]]; return inTags('pre', [], inTags('code', attr, this.escape(block.string_content))); - case B_HTML_BLOCK: + case 19: return block.string_content; - case B_REFERENCE_DEF: + case 20: return ""; - case B_HORIZONTAL_RULE: + case 21: return inTags('hr',[],"",true); default: console.log("Unknown block type " + block.t); @@ -3828,7 +3828,7 @@ var renderBlocks = function(blocks, in_tight_list) { var result = []; for (var i=0; i < blocks.length; i++) { - if (blocks[i].t !== B_REFERENCE_DEF) { + if (blocks[i].t !== 20) { result.push(this.renderBlock(blocks[i], in_tight_list)); } } -- cgit v1.2.3 From 6dfc19a529c64d17ec673196d2d549acc809bd54 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 2 Oct 2014 17:54:14 -0700 Subject: Revert "Use numerical constants." This reverts commit db25de09f5dc931c0e2b31ce0ccdb58052f3105f. --- js/stmd.js | 270 ++++++++++++++++++++++++++++++------------------------------- 1 file changed, 135 insertions(+), 135 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 3c4eab0..23caf31 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -2137,7 +2137,8 @@ zwj: '‍', zwnj: '‌' }; - /* Constants for inline and block types + // Constants for inline and block types: + var I_STR = 1; var I_SOFT_BREAK = 2; var I_HARD_BREAK = 3; @@ -2159,9 +2160,9 @@ var B_HTML_BLOCK = 19; var B_REFERENCE_DEF = 20; var B_HORIZONTAL_RULE = 21; - */ - /* Constants for character codes: + // Constants for character codes: + var C_NEWLINE = 10; var C_SPACE = 32; var C_ASTERISK = 42; @@ -2176,7 +2177,6 @@ var C_AMPERSAND = 38; var C_OPEN_PAREN = 40; var C_COLON = 58; - */ // Some regexps used in inline parser: @@ -2360,7 +2360,7 @@ var match; while (!foundCode && (match = this.match(/`+/m))) { if (match === ticks) { - inlines.push({ t: 9, c: this.subject.slice(afterOpenTicks, + inlines.push({ t: I_CODE, c: this.subject.slice(afterOpenTicks, this.pos - ticks.length) .replace(/[ \n]+/g,' ') .trim() }); @@ -2369,7 +2369,7 @@ } // If we got here, we didn't match a closing backtick sequence. this.pos = afterOpenTicks; - inlines.push({ t: 1, c: ticks }); + inlines.push({ t: I_STR, c: ticks }); return true; }; @@ -2379,16 +2379,16 @@ var parseBackslash = function(inlines) { var subj = this.subject, pos = this.pos; - if (subj.charCodeAt(pos) === 92) { + if (subj.charCodeAt(pos) === C_BACKSLASH) { if (subj.charAt(pos + 1) === '\n') { this.pos = this.pos + 2; - inlines.push({ t: 3 }); + inlines.push({ t: I_HARD_BREAK }); } else if (reEscapable.test(subj.charAt(pos + 1))) { this.pos = this.pos + 2; - inlines.push({ t: 1, c: subj.charAt(pos + 1) }); + inlines.push({ t: I_STR, c: subj.charAt(pos + 1) }); } else { this.pos++; - inlines.push({t: 1, c: '\\'}); + inlines.push({t: I_STR, c: '\\'}); } return true; } else { @@ -2403,15 +2403,15 @@ if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink dest = m.slice(1,-1); inlines.push( - {t: 7, - label: [{ t: 1, c: dest }], + {t: I_LINK, + label: [{ t: I_STR, c: dest }], destination: 'mailto:' + encodeURI(unescape(dest)) }); return true; } else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) { dest = m.slice(1,-1); inlines.push({ - t: 7, - label: [{ t: 1, c: dest }], + t: I_LINK, + label: [{ t: I_STR, c: dest }], destination: encodeURI(unescape(dest)) }); return true; } else { @@ -2423,7 +2423,7 @@ var parseHtmlTag = function(inlines) { var m = this.match(reHtmlTag); if (m) { - inlines.push({ t: 6, c: m }); + inlines.push({ t: I_HTML, c: m }); return true; } else { return false; @@ -2457,7 +2457,7 @@ var can_open = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_after)); var can_close = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_before)); - if (cc === 95) { + if (cc === C_UNDERSCORE) { can_open = can_open && !((/[a-z0-9]/i).test(char_before)); can_close = can_close && !((/[a-z0-9]/i).test(char_after)); } @@ -2468,15 +2468,15 @@ }; var Emph = function(ils) { - return {t: 4, c: ils}; + return {t: I_EMPH, c: ils}; } var Strong = function(ils) { - return {t: 5, c: ils}; + return {t: I_STRONG, c: ils}; } var Str = function(s) { - return {t: 1, c: s}; + return {t: I_STR, c: s}; } // Attempt to parse emphasis or strong emphasis. @@ -2721,7 +2721,7 @@ // Attempt to parse a link label, returning number of characters parsed. var parseLinkLabel = function() { - if (this.peek() != 91) { + if (this.peek() != C_OPEN_BRACKET) { return 0; } var startpos = this.pos; @@ -2738,31 +2738,31 @@ } this.pos++; // advance past [ var c; - while ((c = this.peek()) && c != -1 && (c != 93 || nest_level > 0)) { + while ((c = this.peek()) && c != -1 && (c != C_CLOSE_BRACKET || nest_level > 0)) { switch (c) { - case 96: + case C_BACKTICK: this.parseBackticks([]); break; - case 60: + case C_LESSTHAN: this.parseAutolink([]) || this.parseHtmlTag([]) || this.pos++; break; - case 91: // nested [] + case C_OPEN_BRACKET: // nested [] nest_level++; this.pos++; break; - case 93: // nested [] + case C_CLOSE_BRACKET: // nested [] nest_level--; this.pos++; break; - case 92: + case C_BACKSLASH: this.parseBackslash([]); break; default: this.parseString([]); } } - if (c === 93) { + if (c === C_CLOSE_BRACKET) { this.label_nest_level = 0; this.pos++; // advance past ] return this.pos - startpos; @@ -2800,7 +2800,7 @@ // if we got this far, we've parsed a label. // Try to parse an explicit link: [label](url "title") - if (this.peek() === 40) { + if (this.peek() === C_OPEN_PAREN) { this.pos++; if (this.spnl() && ((dest = this.parseLinkDestination()) !== null) && @@ -2810,7 +2810,7 @@ (title = this.parseLinkTitle() || '') || true) && this.spnl() && this.match(/^\)/)) { - inlines.push({ t: 7, + inlines.push({ t: I_LINK, destination: dest, title: title, label: parseRawLabel(rawlabel) }); @@ -2838,7 +2838,7 @@ // lookup rawlabel in refmap var link = this.refmap[normalizeReference(reflabel)]; if (link) { - inlines.push({t: 7, + inlines.push({t: I_LINK, destination: link.destination, title: link.title, label: parseRawLabel(rawlabel) }); @@ -2856,7 +2856,7 @@ var parseEntity = function(inlines) { var m; if ((m = this.match(reEntityHere))) { - inlines.push({ t: 1, c: entityToChar(m) }); + inlines.push({ t: I_STR, c: entityToChar(m) }); return true; } else { return false; @@ -2868,7 +2868,7 @@ var parseString = function(inlines) { var m; if ((m = this.match(reMain))) { - inlines.push({ t: 1, c: m }); + inlines.push({ t: I_STR, c: m }); return true; } else { return false; @@ -2881,9 +2881,9 @@ var m = this.match(/^ *\n/); if (m) { if (m.length > 2) { - inlines.push({ t: 3 }); + inlines.push({ t: I_HARD_BREAK }); } else if (m.length > 0) { - inlines.push({ t: 2 }); + inlines.push({ t: I_SOFT_BREAK }); } return true; } @@ -2896,10 +2896,10 @@ if (this.match(/^!/)) { var link = this.parseLink(inlines); if (link) { - inlines[inlines.length - 1].t = 8; + inlines[inlines.length - 1].t = I_IMAGE; return true; } else { - inlines.push({ t: 1, c: '!' }); + inlines.push({ t: I_STR, c: '!' }); return true; } } else { @@ -2927,7 +2927,7 @@ } // colon: - if (this.peek() === 58) { + if (this.peek() === C_COLON) { this.pos++; } else { this.pos = startpos; @@ -2986,30 +2986,30 @@ } var res; switch(c) { - case 10: - case 32: + case C_NEWLINE: + case C_SPACE: res = this.parseNewline(inlines); break; - case 92: + case C_BACKSLASH: res = this.parseBackslash(inlines); break; - case 96: + case C_BACKTICK: res = this.parseBackticks(inlines); break; - case 42: - case 95: + case C_ASTERISK: + case C_UNDERSCORE: res = this.parseEmphasis(c, inlines); break; - case 91: + case C_OPEN_BRACKET: res = this.parseLink(inlines); break; - case 33: + case C_BANG: res = this.parseImage(inlines); break; - case 60: + case C_LESSTHAN: res = this.parseAutolink(inlines) || this.parseHtmlTag(inlines); break; - case 38: + case C_AMPERSAND: res = this.parseEntity(inlines); break; default: @@ -3018,7 +3018,7 @@ } if (!res) { this.pos += 1; - inlines.push({t: 1, c: String.fromCharCode(c)}); + inlines.push({t: I_STR, c: String.fromCharCode(c)}); } if (memoize) { @@ -3095,17 +3095,17 @@ // Returns true if parent block can contain child block. var canContain = function(parent_type, child_type) { - return ( parent_type === 10 || - parent_type === 12 || - parent_type === 13 || - (parent_type === 14 && child_type === 13) ); + return ( parent_type === B_DOCUMENT || + parent_type === B_BLOCK_QUOTE || + parent_type === B_LIST_ITEM || + (parent_type === B_LIST && child_type === B_LIST_ITEM) ); }; // Returns true if block type can accept lines of text. var acceptsLines = function(block_type) { - return ( block_type === 11 || - block_type === 17 || - block_type === 18 ); + return ( block_type === B_PARAGRAPH || + block_type === B_INDENTED_CODE || + block_type === B_FENCED_CODE ); }; // Returns true if block ends with a blank line, descending if needed @@ -3114,7 +3114,7 @@ if (block.last_line_blank) { return true; } - if ((block.t === 14 || block.t === 13) && block.children.length > 0) { + if ((block.t === B_LIST || block.t === B_LIST_ITEM) && block.children.length > 0) { return endsWithBlankLine(block.children[block.children.length - 1]); } else { return false; @@ -3129,7 +3129,7 @@ var b = block; var last_list = null; do { - if (b.t === 14) { + if (b.t === B_LIST) { last_list = b; } b = b.parent; @@ -3258,10 +3258,10 @@ indent = first_nonspace - offset; switch (container.t) { - case 12: - if (indent <= 3 && ln.charCodeAt(first_nonspace) === 62) { + case B_BLOCK_QUOTE: + if (indent <= 3 && ln.charCodeAt(first_nonspace) === C_GREATERTHAN) { offset = first_nonspace + 1; - if (ln.charCodeAt(offset) === 32) { + if (ln.charCodeAt(offset) === C_SPACE) { offset++; } } else { @@ -3269,7 +3269,7 @@ } break; - case 13: + case B_LIST_ITEM: if (indent >= container.list_data.marker_offset + container.list_data.padding) { offset += container.list_data.marker_offset + @@ -3281,7 +3281,7 @@ } break; - case 17: + case B_INDENTED_CODE: if (indent >= CODE_INDENT) { offset += CODE_INDENT; } else if (blank) { @@ -3291,29 +3291,29 @@ } break; - case 15: - case 16: - case 21: + case B_ATX_HEADER: + case B_SETEXT_HEADER: + case B_HORIZONTAL_RULE: // a header can never container > 1 line, so fail to match: all_matched = false; break; - case 18: + case B_FENCED_CODE: // skip optional spaces of fence offset i = container.fence_offset; - while (i > 0 && ln.charCodeAt(offset) === 32) { + while (i > 0 && ln.charCodeAt(offset) === C_SPACE) { offset++; i--; } break; - case 19: + case B_HTML_BLOCK: if (blank) { all_matched = false; } break; - case 11: + case B_PARAGRAPH: if (blank) { container.last_line_blank = true; all_matched = false; @@ -3352,9 +3352,9 @@ // Unless last matched container is a code block, try new container starts, // adding children to the last matched container: - while (container.t != 18 && - container.t != 17 && - container.t != 19 && + while (container.t != B_FENCED_CODE && + container.t != B_INDENTED_CODE && + container.t != B_HTML_BLOCK && // this is a little performance optimization: matchAt(/^[ #`~*+_=<>0-9-]/,ln,offset) !== null) { @@ -3370,29 +3370,29 @@ if (indent >= CODE_INDENT) { // indented code - if (this.tip.t != 11 && !blank) { + if (this.tip.t != B_PARAGRAPH && !blank) { offset += CODE_INDENT; closeUnmatchedBlocks(this); - container = this.addChild(17, line_number, offset); + container = this.addChild(B_INDENTED_CODE, line_number, offset); } else { // indent > 4 in a lazy paragraph continuation break; } - } else if (ln.charCodeAt(first_nonspace) === 62) { + } else if (ln.charCodeAt(first_nonspace) === C_GREATERTHAN) { // blockquote offset = first_nonspace + 1; // optional following space - if (ln.charCodeAt(offset) === 32) { + if (ln.charCodeAt(offset) === C_SPACE) { offset++; } closeUnmatchedBlocks(this); - container = this.addChild(12, line_number, offset); + container = this.addChild(B_BLOCK_QUOTE, line_number, offset); } else if ((match = ln.slice(first_nonspace).match(/^#{1,6}(?: +|$)/))) { // ATX header offset = first_nonspace + match[0].length; closeUnmatchedBlocks(this); - container = this.addChild(15, line_number, first_nonspace); + container = this.addChild(B_ATX_HEADER, line_number, first_nonspace); container.level = match[0].trim().length; // number of #s // remove trailing ###s: container.strings = @@ -3403,7 +3403,7 @@ // fenced code block var fence_length = match[0].length; closeUnmatchedBlocks(this); - container = this.addChild(18, line_number, first_nonspace); + container = this.addChild(B_FENCED_CODE, line_number, first_nonspace); container.fence_length = fence_length; container.fence_char = match[0][0]; container.fence_offset = first_nonspace - offset; @@ -3413,23 +3413,23 @@ } else if (matchAt(reHtmlBlockOpen, ln, first_nonspace) !== null) { // html block closeUnmatchedBlocks(this); - container = this.addChild(19, line_number, first_nonspace); + container = this.addChild(B_HTML_BLOCK, line_number, first_nonspace); // note, we don't adjust offset because the tag is part of the text break; - } else if (container.t == 11 && + } else if (container.t == B_PARAGRAPH && container.strings.length === 1 && ((match = ln.slice(first_nonspace).match(/^(?:=+|-+) *$/)))) { // setext header line closeUnmatchedBlocks(this); - container.t = 16; // convert Paragraph to SetextHeader + container.t = B_SETEXT_HEADER; // convert Paragraph to SetextHeader container.level = match[0][0] === '=' ? 1 : 2; offset = ln.length; } else if (matchAt(reHrule, ln, first_nonspace) !== null) { // hrule closeUnmatchedBlocks(this); - container = this.addChild(21, line_number, first_nonspace); + container = this.addChild(B_HORIZONTAL_RULE, line_number, first_nonspace); offset = ln.length - 1; break; @@ -3440,14 +3440,14 @@ offset = first_nonspace + data.padding; // add the list if needed - if (container.t !== 14 || + if (container.t !== B_LIST || !(listsMatch(container.list_data, data))) { - container = this.addChild(14, line_number, first_nonspace); + container = this.addChild(B_LIST, line_number, first_nonspace); container.list_data = data; } // add the list item - container = this.addChild(13, line_number, first_nonspace); + container = this.addChild(B_LIST_ITEM, line_number, first_nonspace); container.list_data = data; } else { @@ -3477,7 +3477,7 @@ // First check for a lazy paragraph continuation: if (this.tip !== last_matched_container && !blank && - this.tip.t == 11 && + this.tip.t == B_PARAGRAPH && this.tip.strings.length > 0) { // lazy paragraph continuation @@ -3494,9 +3494,9 @@ // lists or breaking out of lists. We also don't set last_line_blank // on an empty list item. container.last_line_blank = blank && - !(container.t == 12 || - container.t == 18 || - (container.t == 13 && + !(container.t == B_BLOCK_QUOTE || + container.t == B_FENCED_CODE || + (container.t == B_LIST_ITEM && container.children.length === 0 && container.start_line == line_number)); @@ -3507,12 +3507,12 @@ } switch (container.t) { - case 17: - case 19: + case B_INDENTED_CODE: + case B_HTML_BLOCK: this.addLine(ln, offset); break; - case 18: + case B_FENCED_CODE: // check for closing code fence: match = (indent <= 3 && ln.charAt(first_nonspace) == container.fence_char && @@ -3525,9 +3525,9 @@ } break; - case 15: - case 16: - case 21: + case B_ATX_HEADER: + case B_SETEXT_HEADER: + case B_HORIZONTAL_RULE: // nothing to do; we already added the contents. break; @@ -3536,10 +3536,10 @@ this.addLine(ln, first_nonspace); } else if (blank) { // do nothing - } else if (container.t != 21 && - container.t != 16) { + } else if (container.t != B_HORIZONTAL_RULE && + container.t != B_SETEXT_HEADER) { // create paragraph container for line - container = this.addChild(11, line_number, first_nonspace); + container = this.addChild(B_PARAGRAPH, line_number, first_nonspace); this.addLine(ln, first_nonspace); } else { console.log("Line " + line_number.toString() + @@ -3570,32 +3570,32 @@ } switch (block.t) { - case 11: + case B_PARAGRAPH: block.string_content = block.strings.join('\n').replace(/^ */m,''); // try parsing the beginning as link reference definitions: - while (block.string_content.charCodeAt(0) === 91 && + while (block.string_content.charCodeAt(0) === C_OPEN_BRACKET && (pos = this.inlineParser.parseReference(block.string_content, this.refmap))) { block.string_content = block.string_content.slice(pos); if (isBlank(block.string_content)) { - block.t = 20; + block.t = B_REFERENCE_DEF; break; } } break; - case 15: - case 16: - case 19: + case B_ATX_HEADER: + case B_SETEXT_HEADER: + case B_HTML_BLOCK: block.string_content = block.strings.join('\n'); break; - case 17: + case B_INDENTED_CODE: block.string_content = block.strings.join('\n').replace(/(\n *)*$/,'\n'); break; - case 18: + case B_FENCED_CODE: // first line becomes info string block.info = unescapeEntBS(block.strings[0].trim()); if (block.strings.length == 1) { @@ -3605,7 +3605,7 @@ } break; - case 14: + case B_LIST: block.tight = true; // tight by default var numitems = block.children.length; @@ -3646,9 +3646,9 @@ // into inline content where appropriate. var processInlines = function(block) { switch(block.t) { - case 11: - case 16: - case 15: + case B_PARAGRAPH: + case B_SETEXT_HEADER: + case B_ATX_HEADER: block.inline_content = this.inlineParser.parse(block.string_content.trim(), this.refmap); block.string_content = ""; @@ -3667,7 +3667,7 @@ // The main parsing function. Returns a parsed document AST. var parse = function(input) { - this.doc = makeBlock(10, 1, 1); + this.doc = makeBlock(B_DOCUMENT, 1, 1); this.tip = this.doc; this.refmap = {}; var lines = input.replace(/\n$/,'').split(/\r\n|\n|\r/); @@ -3686,7 +3686,7 @@ // The DocParser object. function DocParser(){ return { - doc: makeBlock(10, 1, 1), + doc: makeBlock(B_DOCUMENT, 1, 1), tip: this.doc, refmap: {}, inlineParser: new InlineParser(), @@ -3727,32 +3727,32 @@ var renderInline = function(inline) { var attrs; switch (inline.t) { - case 1: + case I_STR: return this.escape(inline.c); - case 2: + case I_SOFT_BREAK: return this.softbreak; - case 3: + case I_HARD_BREAK: return inTags('br',[],"",true) + '\n'; - case 4: + case I_EMPH: return inTags('em', [], this.renderInlines(inline.c)); - case 5: + case I_STRONG: return inTags('strong', [], this.renderInlines(inline.c)); - case 6: + case I_HTML: return inline.c; - case 7: + case I_LINK: attrs = [['href', this.escape(inline.destination, true)]]; if (inline.title) { attrs.push(['title', this.escape(inline.title, true)]); } return inTags('a', attrs, this.renderInlines(inline.label)); - case 8: + case I_IMAGE: attrs = [['src', this.escape(inline.destination, true)], ['alt', this.escape(this.renderInlines(inline.label))]]; if (inline.title) { attrs.push(['title', this.escape(inline.title, true)]); } return inTags('img', attrs, "", true); - case 9: + case I_CODE: return inTags('code', [], this.escape(inline.c)); default: console.log("Unknown inline type " + inline.t); @@ -3775,48 +3775,48 @@ var attr; var info_words; switch (block.t) { - case 10: + case B_DOCUMENT: var whole_doc = this.renderBlocks(block.children); return (whole_doc === '' ? '' : whole_doc + '\n'); - case 11: + case B_PARAGRAPH: if (in_tight_list) { return this.renderInlines(block.inline_content); } else { return inTags('p', [], this.renderInlines(block.inline_content)); } break; - case 12: + case B_BLOCK_QUOTE: var filling = this.renderBlocks(block.children); return inTags('blockquote', [], filling === '' ? this.innersep : this.innersep + filling + this.innersep); - case 13: + case B_LIST_ITEM: return inTags('li', [], this.renderBlocks(block.children, in_tight_list).trim()); - case 14: + case B_LIST: tag = block.list_data.type == 'Bullet' ? 'ul' : 'ol'; attr = (!block.list_data.start || block.list_data.start == 1) ? [] : [['start', block.list_data.start.toString()]]; return inTags(tag, attr, this.innersep + this.renderBlocks(block.children, block.tight) + this.innersep); - case 15: - case 16: + case B_ATX_HEADER: + case B_SETEXT_HEADER: tag = 'h' + block.level; return inTags(tag, [], this.renderInlines(block.inline_content)); - case 17: + case B_INDENTED_CODE: return inTags('pre', [], inTags('code', [], this.escape(block.string_content))); - case 18: + case B_FENCED_CODE: info_words = block.info.split(/ +/); attr = info_words.length === 0 || info_words[0].length === 0 ? [] : [['class','language-' + this.escape(info_words[0],true)]]; return inTags('pre', [], inTags('code', attr, this.escape(block.string_content))); - case 19: + case B_HTML_BLOCK: return block.string_content; - case 20: + case B_REFERENCE_DEF: return ""; - case 21: + case B_HORIZONTAL_RULE: return inTags('hr',[],"",true); default: console.log("Unknown block type " + block.t); @@ -3828,7 +3828,7 @@ var renderBlocks = function(blocks, in_tight_list) { var result = []; for (var i=0; i < blocks.length; i++) { - if (blocks[i].t !== 20) { + if (blocks[i].t !== B_REFERENCE_DEF) { result.push(this.renderBlock(blocks[i], in_tight_list)); } } -- cgit v1.2.3 From c9ad75b4c69edf064106bc63fdf6a2637a7c5a8b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 2 Oct 2014 17:54:20 -0700 Subject: Revert "Use integers instead of strings for tags." This reverts commit ac611d51c9de9aa719b42b9463e6f28d6e7d74a4. --- js/stmd.js | 238 +++++++++++++++++++++++++++---------------------------------- 1 file changed, 107 insertions(+), 131 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 23caf31..efccad8 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -2137,30 +2137,6 @@ zwj: '‍', zwnj: '‌' }; - // Constants for inline and block types: - - var I_STR = 1; - var I_SOFT_BREAK = 2; - var I_HARD_BREAK = 3; - var I_EMPH = 4; - var I_STRONG = 5; - var I_HTML = 6; - var I_LINK = 7; - var I_IMAGE = 8; - var I_CODE = 9; - var B_DOCUMENT = 10; - var B_PARAGRAPH = 11; - var B_BLOCK_QUOTE = 12; - var B_LIST_ITEM = 13; - var B_LIST = 14; - var B_ATX_HEADER = 15; - var B_SETEXT_HEADER = 16; - var B_INDENTED_CODE = 17; - var B_FENCED_CODE = 18; - var B_HTML_BLOCK = 19; - var B_REFERENCE_DEF = 20; - var B_HORIZONTAL_RULE = 21; - // Constants for character codes: var C_NEWLINE = 10; @@ -2297,7 +2273,7 @@ // Convert tabs to spaces on each line using a 4-space tab stop. var detabLine = function(text) { - if (text.indexOf('\t') === -1) { + if (text.indexOf('\t') == -1) { return text; } else { var lastStop = 0; @@ -2359,8 +2335,8 @@ var foundCode = false; var match; while (!foundCode && (match = this.match(/`+/m))) { - if (match === ticks) { - inlines.push({ t: I_CODE, c: this.subject.slice(afterOpenTicks, + if (match == ticks) { + inlines.push({ t: 'Code', c: this.subject.slice(afterOpenTicks, this.pos - ticks.length) .replace(/[ \n]+/g,' ') .trim() }); @@ -2369,7 +2345,7 @@ } // If we got here, we didn't match a closing backtick sequence. this.pos = afterOpenTicks; - inlines.push({ t: I_STR, c: ticks }); + inlines.push({ t: 'Str', c: ticks }); return true; }; @@ -2382,13 +2358,13 @@ if (subj.charCodeAt(pos) === C_BACKSLASH) { if (subj.charAt(pos + 1) === '\n') { this.pos = this.pos + 2; - inlines.push({ t: I_HARD_BREAK }); + inlines.push({ t: 'Hardbreak' }); } else if (reEscapable.test(subj.charAt(pos + 1))) { this.pos = this.pos + 2; - inlines.push({ t: I_STR, c: subj.charAt(pos + 1) }); + inlines.push({ t: 'Str', c: subj.charAt(pos + 1) }); } else { this.pos++; - inlines.push({t: I_STR, c: '\\'}); + inlines.push({t: 'Str', c: '\\'}); } return true; } else { @@ -2403,15 +2379,15 @@ if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink dest = m.slice(1,-1); inlines.push( - {t: I_LINK, - label: [{ t: I_STR, c: dest }], + {t: 'Link', + label: [{ t: 'Str', c: dest }], destination: 'mailto:' + encodeURI(unescape(dest)) }); return true; } else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) { dest = m.slice(1,-1); inlines.push({ - t: I_LINK, - label: [{ t: I_STR, c: dest }], + t: 'Link', + label: [{ t: 'Str', c: dest }], destination: encodeURI(unescape(dest)) }); return true; } else { @@ -2423,7 +2399,7 @@ var parseHtmlTag = function(inlines) { var m = this.match(reHtmlTag); if (m) { - inlines.push({ t: I_HTML, c: m }); + inlines.push({ t: 'Html', c: m }); return true; } else { return false; @@ -2468,15 +2444,15 @@ }; var Emph = function(ils) { - return {t: I_EMPH, c: ils}; + return {t: 'Emph', c: ils}; } var Strong = function(ils) { - return {t: I_STRONG, c: ils}; + return {t: 'Strong', c: ils}; } var Str = function(s) { - return {t: I_STR, c: s}; + return {t: 'Str', c: s}; } // Attempt to parse emphasis or strong emphasis. @@ -2800,7 +2776,7 @@ // if we got this far, we've parsed a label. // Try to parse an explicit link: [label](url "title") - if (this.peek() === C_OPEN_PAREN) { + if (this.peek() == C_OPEN_PAREN) { this.pos++; if (this.spnl() && ((dest = this.parseLinkDestination()) !== null) && @@ -2810,7 +2786,7 @@ (title = this.parseLinkTitle() || '') || true) && this.spnl() && this.match(/^\)/)) { - inlines.push({ t: I_LINK, + inlines.push({ t: 'Link', destination: dest, title: title, label: parseRawLabel(rawlabel) }); @@ -2826,7 +2802,7 @@ this.spnl(); var beforelabel = this.pos; n = this.parseLinkLabel(); - if (n === 2) { + if (n == 2) { // empty second label reflabel = rawlabel; } else if (n > 0) { @@ -2838,7 +2814,7 @@ // lookup rawlabel in refmap var link = this.refmap[normalizeReference(reflabel)]; if (link) { - inlines.push({t: I_LINK, + inlines.push({t: 'Link', destination: link.destination, title: link.title, label: parseRawLabel(rawlabel) }); @@ -2856,7 +2832,7 @@ var parseEntity = function(inlines) { var m; if ((m = this.match(reEntityHere))) { - inlines.push({ t: I_STR, c: entityToChar(m) }); + inlines.push({ t: 'Str', c: entityToChar(m) }); return true; } else { return false; @@ -2868,7 +2844,7 @@ var parseString = function(inlines) { var m; if ((m = this.match(reMain))) { - inlines.push({ t: I_STR, c: m }); + inlines.push({ t: 'Str', c: m }); return true; } else { return false; @@ -2881,9 +2857,9 @@ var m = this.match(/^ *\n/); if (m) { if (m.length > 2) { - inlines.push({ t: I_HARD_BREAK }); + inlines.push({ t: 'Hardbreak' }); } else if (m.length > 0) { - inlines.push({ t: I_SOFT_BREAK }); + inlines.push({ t: 'Softbreak' }); } return true; } @@ -2896,10 +2872,10 @@ if (this.match(/^!/)) { var link = this.parseLink(inlines); if (link) { - inlines[inlines.length - 1].t = I_IMAGE; + inlines[inlines.length - 1].t = 'Image'; return true; } else { - inlines.push({ t: I_STR, c: '!' }); + inlines.push({ t: 'Str', c: '!' }); return true; } } else { @@ -3018,7 +2994,7 @@ } if (!res) { this.pos += 1; - inlines.push({t: I_STR, c: String.fromCharCode(c)}); + inlines.push({t: 'Str', c: String.fromCharCode(c)}); } if (memoize) { @@ -3095,17 +3071,17 @@ // Returns true if parent block can contain child block. var canContain = function(parent_type, child_type) { - return ( parent_type === B_DOCUMENT || - parent_type === B_BLOCK_QUOTE || - parent_type === B_LIST_ITEM || - (parent_type === B_LIST && child_type === B_LIST_ITEM) ); + return ( parent_type == 'Document' || + parent_type == 'BlockQuote' || + parent_type == 'ListItem' || + (parent_type == 'List' && child_type == 'ListItem') ); }; // Returns true if block type can accept lines of text. var acceptsLines = function(block_type) { - return ( block_type === B_PARAGRAPH || - block_type === B_INDENTED_CODE || - block_type === B_FENCED_CODE ); + return ( block_type == 'Paragraph' || + block_type == 'IndentedCode' || + block_type == 'FencedCode' ); }; // Returns true if block ends with a blank line, descending if needed @@ -3114,7 +3090,7 @@ if (block.last_line_blank) { return true; } - if ((block.t === B_LIST || block.t === B_LIST_ITEM) && block.children.length > 0) { + if ((block.t == 'List' || block.t == 'ListItem') && block.children.length > 0) { return endsWithBlankLine(block.children[block.children.length - 1]); } else { return false; @@ -3129,7 +3105,7 @@ var b = block; var last_list = null; do { - if (b.t === B_LIST) { + if (b.t === 'List') { last_list = b; } b = b.parent; @@ -3258,7 +3234,7 @@ indent = first_nonspace - offset; switch (container.t) { - case B_BLOCK_QUOTE: + case 'BlockQuote': if (indent <= 3 && ln.charCodeAt(first_nonspace) === C_GREATERTHAN) { offset = first_nonspace + 1; if (ln.charCodeAt(offset) === C_SPACE) { @@ -3269,7 +3245,7 @@ } break; - case B_LIST_ITEM: + case 'ListItem': if (indent >= container.list_data.marker_offset + container.list_data.padding) { offset += container.list_data.marker_offset + @@ -3281,7 +3257,7 @@ } break; - case B_INDENTED_CODE: + case 'IndentedCode': if (indent >= CODE_INDENT) { offset += CODE_INDENT; } else if (blank) { @@ -3291,14 +3267,14 @@ } break; - case B_ATX_HEADER: - case B_SETEXT_HEADER: - case B_HORIZONTAL_RULE: + case 'ATXHeader': + case 'SetextHeader': + case 'HorizontalRule': // a header can never container > 1 line, so fail to match: all_matched = false; break; - case B_FENCED_CODE: + case 'FencedCode': // skip optional spaces of fence offset i = container.fence_offset; while (i > 0 && ln.charCodeAt(offset) === C_SPACE) { @@ -3307,13 +3283,13 @@ } break; - case B_HTML_BLOCK: + case 'HtmlBlock': if (blank) { all_matched = false; } break; - case B_PARAGRAPH: + case 'Paragraph': if (blank) { container.last_line_blank = true; all_matched = false; @@ -3352,9 +3328,9 @@ // Unless last matched container is a code block, try new container starts, // adding children to the last matched container: - while (container.t != B_FENCED_CODE && - container.t != B_INDENTED_CODE && - container.t != B_HTML_BLOCK && + while (container.t != 'FencedCode' && + container.t != 'IndentedCode' && + container.t != 'HtmlBlock' && // this is a little performance optimization: matchAt(/^[ #`~*+_=<>0-9-]/,ln,offset) !== null) { @@ -3370,10 +3346,10 @@ if (indent >= CODE_INDENT) { // indented code - if (this.tip.t != B_PARAGRAPH && !blank) { + if (this.tip.t != 'Paragraph' && !blank) { offset += CODE_INDENT; closeUnmatchedBlocks(this); - container = this.addChild(B_INDENTED_CODE, line_number, offset); + container = this.addChild('IndentedCode', line_number, offset); } else { // indent > 4 in a lazy paragraph continuation break; } @@ -3386,13 +3362,13 @@ offset++; } closeUnmatchedBlocks(this); - container = this.addChild(B_BLOCK_QUOTE, line_number, offset); + container = this.addChild('BlockQuote', line_number, offset); } else if ((match = ln.slice(first_nonspace).match(/^#{1,6}(?: +|$)/))) { // ATX header offset = first_nonspace + match[0].length; closeUnmatchedBlocks(this); - container = this.addChild(B_ATX_HEADER, line_number, first_nonspace); + container = this.addChild('ATXHeader', line_number, first_nonspace); container.level = match[0].trim().length; // number of #s // remove trailing ###s: container.strings = @@ -3403,7 +3379,7 @@ // fenced code block var fence_length = match[0].length; closeUnmatchedBlocks(this); - container = this.addChild(B_FENCED_CODE, line_number, first_nonspace); + container = this.addChild('FencedCode', line_number, first_nonspace); container.fence_length = fence_length; container.fence_char = match[0][0]; container.fence_offset = first_nonspace - offset; @@ -3413,23 +3389,23 @@ } else if (matchAt(reHtmlBlockOpen, ln, first_nonspace) !== null) { // html block closeUnmatchedBlocks(this); - container = this.addChild(B_HTML_BLOCK, line_number, first_nonspace); + container = this.addChild('HtmlBlock', line_number, first_nonspace); // note, we don't adjust offset because the tag is part of the text break; - } else if (container.t == B_PARAGRAPH && + } else if (container.t == 'Paragraph' && container.strings.length === 1 && ((match = ln.slice(first_nonspace).match(/^(?:=+|-+) *$/)))) { // setext header line closeUnmatchedBlocks(this); - container.t = B_SETEXT_HEADER; // convert Paragraph to SetextHeader + container.t = 'SetextHeader'; // convert Paragraph to SetextHeader container.level = match[0][0] === '=' ? 1 : 2; offset = ln.length; } else if (matchAt(reHrule, ln, first_nonspace) !== null) { // hrule closeUnmatchedBlocks(this); - container = this.addChild(B_HORIZONTAL_RULE, line_number, first_nonspace); + container = this.addChild('HorizontalRule', line_number, first_nonspace); offset = ln.length - 1; break; @@ -3440,14 +3416,14 @@ offset = first_nonspace + data.padding; // add the list if needed - if (container.t !== B_LIST || + if (container.t !== 'List' || !(listsMatch(container.list_data, data))) { - container = this.addChild(B_LIST, line_number, first_nonspace); + container = this.addChild('List', line_number, first_nonspace); container.list_data = data; } // add the list item - container = this.addChild(B_LIST_ITEM, line_number, first_nonspace); + container = this.addChild('ListItem', line_number, first_nonspace); container.list_data = data; } else { @@ -3477,7 +3453,7 @@ // First check for a lazy paragraph continuation: if (this.tip !== last_matched_container && !blank && - this.tip.t == B_PARAGRAPH && + this.tip.t == 'Paragraph' && this.tip.strings.length > 0) { // lazy paragraph continuation @@ -3494,9 +3470,9 @@ // lists or breaking out of lists. We also don't set last_line_blank // on an empty list item. container.last_line_blank = blank && - !(container.t == B_BLOCK_QUOTE || - container.t == B_FENCED_CODE || - (container.t == B_LIST_ITEM && + !(container.t == 'BlockQuote' || + container.t == 'FencedCode' || + (container.t == 'ListItem' && container.children.length === 0 && container.start_line == line_number)); @@ -3507,12 +3483,12 @@ } switch (container.t) { - case B_INDENTED_CODE: - case B_HTML_BLOCK: + case 'IndentedCode': + case 'HtmlBlock': this.addLine(ln, offset); break; - case B_FENCED_CODE: + case 'FencedCode': // check for closing code fence: match = (indent <= 3 && ln.charAt(first_nonspace) == container.fence_char && @@ -3525,9 +3501,9 @@ } break; - case B_ATX_HEADER: - case B_SETEXT_HEADER: - case B_HORIZONTAL_RULE: + case 'ATXHeader': + case 'SetextHeader': + case 'HorizontalRule': // nothing to do; we already added the contents. break; @@ -3536,10 +3512,10 @@ this.addLine(ln, first_nonspace); } else if (blank) { // do nothing - } else if (container.t != B_HORIZONTAL_RULE && - container.t != B_SETEXT_HEADER) { + } else if (container.t != 'HorizontalRule' && + container.t != 'SetextHeader') { // create paragraph container for line - container = this.addChild(B_PARAGRAPH, line_number, first_nonspace); + container = this.addChild('Paragraph', line_number, first_nonspace); this.addLine(ln, first_nonspace); } else { console.log("Line " + line_number.toString() + @@ -3570,7 +3546,7 @@ } switch (block.t) { - case B_PARAGRAPH: + case 'Paragraph': block.string_content = block.strings.join('\n').replace(/^ */m,''); // try parsing the beginning as link reference definitions: @@ -3579,23 +3555,23 @@ this.refmap))) { block.string_content = block.string_content.slice(pos); if (isBlank(block.string_content)) { - block.t = B_REFERENCE_DEF; + block.t = 'ReferenceDef'; break; } } break; - case B_ATX_HEADER: - case B_SETEXT_HEADER: - case B_HTML_BLOCK: + case 'ATXHeader': + case 'SetextHeader': + case 'HtmlBlock': block.string_content = block.strings.join('\n'); break; - case B_INDENTED_CODE: + case 'IndentedCode': block.string_content = block.strings.join('\n').replace(/(\n *)*$/,'\n'); break; - case B_FENCED_CODE: + case 'FencedCode': // first line becomes info string block.info = unescapeEntBS(block.strings[0].trim()); if (block.strings.length == 1) { @@ -3605,7 +3581,7 @@ } break; - case B_LIST: + case 'List': block.tight = true; // tight by default var numitems = block.children.length; @@ -3646,9 +3622,9 @@ // into inline content where appropriate. var processInlines = function(block) { switch(block.t) { - case B_PARAGRAPH: - case B_SETEXT_HEADER: - case B_ATX_HEADER: + case 'Paragraph': + case 'SetextHeader': + case 'ATXHeader': block.inline_content = this.inlineParser.parse(block.string_content.trim(), this.refmap); block.string_content = ""; @@ -3667,7 +3643,7 @@ // The main parsing function. Returns a parsed document AST. var parse = function(input) { - this.doc = makeBlock(B_DOCUMENT, 1, 1); + this.doc = makeBlock('Document', 1, 1); this.tip = this.doc; this.refmap = {}; var lines = input.replace(/\n$/,'').split(/\r\n|\n|\r/); @@ -3686,7 +3662,7 @@ // The DocParser object. function DocParser(){ return { - doc: makeBlock(B_DOCUMENT, 1, 1), + doc: makeBlock('Document', 1, 1), tip: this.doc, refmap: {}, inlineParser: new InlineParser(), @@ -3727,32 +3703,32 @@ var renderInline = function(inline) { var attrs; switch (inline.t) { - case I_STR: + case 'Str': return this.escape(inline.c); - case I_SOFT_BREAK: + case 'Softbreak': return this.softbreak; - case I_HARD_BREAK: + case 'Hardbreak': return inTags('br',[],"",true) + '\n'; - case I_EMPH: + case 'Emph': return inTags('em', [], this.renderInlines(inline.c)); - case I_STRONG: + case 'Strong': return inTags('strong', [], this.renderInlines(inline.c)); - case I_HTML: + case 'Html': return inline.c; - case I_LINK: + case 'Link': attrs = [['href', this.escape(inline.destination, true)]]; if (inline.title) { attrs.push(['title', this.escape(inline.title, true)]); } return inTags('a', attrs, this.renderInlines(inline.label)); - case I_IMAGE: + case 'Image': attrs = [['src', this.escape(inline.destination, true)], ['alt', this.escape(this.renderInlines(inline.label))]]; if (inline.title) { attrs.push(['title', this.escape(inline.title, true)]); } return inTags('img', attrs, "", true); - case I_CODE: + case 'Code': return inTags('code', [], this.escape(inline.c)); default: console.log("Unknown inline type " + inline.t); @@ -3775,48 +3751,48 @@ var attr; var info_words; switch (block.t) { - case B_DOCUMENT: + case 'Document': var whole_doc = this.renderBlocks(block.children); return (whole_doc === '' ? '' : whole_doc + '\n'); - case B_PARAGRAPH: + case 'Paragraph': if (in_tight_list) { return this.renderInlines(block.inline_content); } else { return inTags('p', [], this.renderInlines(block.inline_content)); } break; - case B_BLOCK_QUOTE: + case 'BlockQuote': var filling = this.renderBlocks(block.children); return inTags('blockquote', [], filling === '' ? this.innersep : this.innersep + filling + this.innersep); - case B_LIST_ITEM: + case 'ListItem': return inTags('li', [], this.renderBlocks(block.children, in_tight_list).trim()); - case B_LIST: + case 'List': tag = block.list_data.type == 'Bullet' ? 'ul' : 'ol'; attr = (!block.list_data.start || block.list_data.start == 1) ? [] : [['start', block.list_data.start.toString()]]; return inTags(tag, attr, this.innersep + this.renderBlocks(block.children, block.tight) + this.innersep); - case B_ATX_HEADER: - case B_SETEXT_HEADER: + case 'ATXHeader': + case 'SetextHeader': tag = 'h' + block.level; return inTags(tag, [], this.renderInlines(block.inline_content)); - case B_INDENTED_CODE: + case 'IndentedCode': return inTags('pre', [], inTags('code', [], this.escape(block.string_content))); - case B_FENCED_CODE: + case 'FencedCode': info_words = block.info.split(/ +/); attr = info_words.length === 0 || info_words[0].length === 0 ? [] : [['class','language-' + this.escape(info_words[0],true)]]; return inTags('pre', [], inTags('code', attr, this.escape(block.string_content))); - case B_HTML_BLOCK: + case 'HtmlBlock': return block.string_content; - case B_REFERENCE_DEF: + case 'ReferenceDef': return ""; - case B_HORIZONTAL_RULE: + case 'HorizontalRule': return inTags('hr',[],"",true); default: console.log("Unknown block type " + block.t); @@ -3828,7 +3804,7 @@ var renderBlocks = function(blocks, in_tight_list) { var result = []; for (var i=0; i < blocks.length; i++) { - if (blocks[i].t !== B_REFERENCE_DEF) { + if (blocks[i].t !== 'ReferenceDef') { result.push(this.renderBlock(blocks[i], in_tight_list)); } } -- cgit v1.2.3 From 52c69afc6f4ad2f962f55c6daa7adaab87f835ae Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 4 Oct 2014 16:43:21 -0700 Subject: Use simpler algorithm. This handles things like `**hi***there*` and gives symmetrical treatment of `**hi*` and `*hi**`. Also handles the case from #147. --- js/stmd.js | 192 ++++++++++--------------------------------------------------- 1 file changed, 30 insertions(+), 162 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index efccad8..24651fb 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -2463,6 +2463,7 @@ c = String.fromCharCode(cc); var numdelims; + var numclosedelims; var delimpos; // Get opening delimiters. @@ -2482,187 +2483,54 @@ this.pos += numdelims; - var fallbackpos = this.pos; + var delims_to_match = numdelims; - var next_inline; - var first = []; - var second = []; - var current = first; + var current = []; var state = 0; var can_close = false; var can_open = false; var last_emphasis_closer = null; - - if (numdelims === 3) { - state = 1; - } else if (numdelims === 2) { - state = 2; - } else if (numdelims === 1) { - state = 3; - } - - while (true) { - if (this.last_emphasis_closer[c] < this.pos) { - break; - } + while (this.last_emphasis_closer[c] >= this.pos) { res = this.scanDelims(cc); + numclosedelims = res.numdelims; - if (res) { - numdelims = res.numdelims; - can_close = res.can_close; - if (can_close) { + if (res.can_close) { + if (last_emphasis_closer === null || + last_emphasis_closer < this.pos) { last_emphasis_closer = this.pos; } - can_open = res.can_open; - switch (state) { - case 1: // ***a - if (numdelims === 3 && can_close) { - this.pos += 3; - inlines.push(Strong([Emph(first)])); - return true; - } else if (numdelims === 2 && can_close) { - this.pos += 2; - current = second; - state = can_open ? 4 : 6; - continue; - } else if (numdelims === 1 && can_close) { - this.pos += 1; - current = second; - state = can_open ? 5 : 7; - continue; - } - break; - case 2: // **a - if (numdelims === 2 && can_close) { - this.pos += 2; - inlines.push(Strong(first)); - return true; - } else if (numdelims === 1 && can_open) { - this.pos += 1; - current = second; - state = 8; - continue; - } - break; - case 3: // *a - if (numdelims === 1 && can_close) { - this.pos += 1; - inlines.push(Emph(first)); - return true; - } else if (numdelims === 2 && can_open) { - this.pos += 2; - current = second; - state = 9; - continue; - } - break; - case 4: // ***a**b - if (numdelims === 3 && can_close) { - this.pos += 3; - inlines.push(Strong([Emph(first.concat([Str(c+c)], second))])); - return true; - } else if (numdelims === 2 && can_close) { - this.pos += 2; - inlines.push(Strong([Str(c+c+c)].concat( - first, - [Strong(second)]))); - return true; - } else if (numdelims === 1 && can_close) { - this.pos += 1; - inlines.push(Emph([Strong(first)].concat(second))); - return true; - } - break; - case 5: // ***a*b - if (numdelims === 3 && can_close) { - this.pos += 3; - inlines.push(Strong([Emph(first.concat([Str(c)], second))])); - return true; - } else if (numdelims === 2 && can_close) { - this.pos += 2; - inlines.push(Strong([Emph(first)].concat(second))); - return true; - } else if (numdelims === 1 && can_close) { - this.pos += 1; - inlines.push(Strong([Str(c+c+c)].concat( - first, - [Emph(second)]))); - return true; - } - break; - case 6: // ***a** b - if (numdelims === 3 && can_close) { - this.pos += 3; - inlines.push(Strong([Emph(first.concat([Str(c+c)], second))])); - return true; - } else if (numdelims === 1 && can_close) { - this.pos += 1; - inlines.push(Emph([Strong(first)].concat(second))); - return true; - } - break; - case 7: // ***a* b - if (numdelims === 3 && can_close) { - this.pos += 3; - inlines.push(Strong([Emph(first.concat([Str(c)], second))])); - return true; - } else if (numdelims === 2 && can_close) { - this.pos += 2; - inlines.push(Strong([Emph(first)].concat(second))); - return true; - } - break; - case 8: // **a *b - if (numdelims === 3 && can_close) { - this.pos += 3; - inlines.push(Strong(first.concat([Emph(second)]))); - return true; - } else if (numdelims === 2 && can_close) { - this.pos += 2; - inlines.push(Strong(first.concat([Str(c)], second))); - return true; - } else if (numdelims === 1 && can_close) { - this.pos += 1; - first.push(Emph(second)); - current = first; - state = 2; - continue; - } - break; - case 9: // *a **b - if (numdelims === 3 && can_close) { - this.pos += 3; - inlines.push(Emph(first.concat([Strong(second)]))); - return true; - } else if (numdelims === 2 && can_close) { - this.pos += 2; - first.push(Strong(second)); - current = first; - state = 3; - continue; - } else if (numdelims === 1 && can_close) { - this.pos += 1; - inlines.push(Emph(first.concat([Str(c+c)], second))); - return true; + if (numclosedelims === 3 && delims_to_match === 3) { + delims_to_match -= 3; + this.pos += 3; + current = [{t: 'Strong', c: [{t: 'Emph', c: current}]}]; + } else if (numclosedelims >= 2 && delims_to_match >= 2) { + delims_to_match -= 2; + this.pos += 2; + current = [{t: 'Strong', c: current}]; + } else if (numclosedelims >= 1 && delims_to_match >= 1) { + delims_to_match -= 1; + this.pos += 1; + current = [{t: 'Emph', c: current}]; + } else { + if (!(this.parseInline(current,true))) { + break; } - break; - default: - break; } - } - - if (!(this.parseInline(current,true))) { + if (delims_to_match === 0) { + Array.prototype.push.apply(inlines, current); + return true; + } + } else if (!(this.parseInline(current,true))) { break; } - } // we didn't match emphasis: fallback - this.pos = fallbackpos; + this.pos = startpos + 1; if (last_emphasis_closer) { this.last_emphasis_closer[c] = last_emphasis_closer; } - inlines.push(Str(this.subject.slice(startpos, fallbackpos))); + inlines.push(Str(c)); return true; }; -- cgit v1.2.3 From 9d590fa7cd1158da138e602af542d2ca59d8d76e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 4 Oct 2014 16:49:05 -0700 Subject: Some jshint fixes. --- js/stmd.js | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 24651fb..9a3a8c7 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -2184,7 +2184,7 @@ PROCESSINGINSTRUCTION + "|" + DECLARATION + "|" + CDATA + ")"; var HTMLBLOCKOPEN = "<(?:" + BLOCKTAGNAME + "[\\s/>]" + "|" + "/" + BLOCKTAGNAME + "[\\s>]" + "|" + "[?!])"; - var ENTITY = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});" + var ENTITY = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});"; var reHtmlTag = new RegExp('^' + HTMLTAG, 'i'); @@ -2239,12 +2239,12 @@ uchar = entities[m.slice(1,-1)]; } return (uchar || m); - } + }; // Replace entities and backslash escapes with literal characters. var unescapeEntBS = function(s) { return s.replace(reAllEscapedChar, '$1') - .replace(reEntity, entityToChar);; + .replace(reEntity, entityToChar); }; // Returns true if string contains only space characters. @@ -2445,15 +2445,15 @@ var Emph = function(ils) { return {t: 'Emph', c: ils}; - } + }; var Strong = function(ils) { return {t: 'Strong', c: ils}; - } + }; var Str = function(s) { return {t: 'Str', c: s}; - } + }; // Attempt to parse emphasis or strong emphasis. var parseEmphasis = function(cc,inlines) { @@ -2588,8 +2588,9 @@ this.parseBackticks([]); break; case C_LESSTHAN: - this.parseAutolink([]) || this.parseHtmlTag([]) || + if (!(this.parseAutolink([]) || this.parseHtmlTag([]))) { this.pos++; + } break; case C_OPEN_BRACKET: // nested [] nest_level++; -- cgit v1.2.3 From cd198620a44576afb0f325abd58d503eab65bf32 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 4 Oct 2014 17:15:52 -0700 Subject: Further emph fallback optimizations. --- js/stmd.js | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/js/stmd.js b/js/stmd.js index 9a3a8c7..e227578 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -2486,6 +2486,8 @@ var delims_to_match = numdelims; var current = []; + var firstend; + var firstpos; var state = 0; var can_close = false; var can_open = false; @@ -2506,10 +2508,14 @@ } else if (numclosedelims >= 2 && delims_to_match >= 2) { delims_to_match -= 2; this.pos += 2; + firstend = current.length; + firstpos = this.pos; current = [{t: 'Strong', c: current}]; } else if (numclosedelims >= 1 && delims_to_match >= 1) { delims_to_match -= 1; this.pos += 1; + firstend = current.length; + firstpos = this.pos; current = [{t: 'Emph', c: current}]; } else { if (!(this.parseInline(current,true))) { @@ -2526,13 +2532,19 @@ } // we didn't match emphasis: fallback - this.pos = startpos + 1; + inlines.push(Str(this.subject.slice(startpos, + startpos + delims_to_match))); + if (delims_to_match < numdelims) { + Array.prototype.push.apply(inlines, current.slice(0,firstend)); + this.pos = firstpos; + } else { // delims_to_match === numdelims + this.pos = startpos + delims_to_match; + } + if (last_emphasis_closer) { this.last_emphasis_closer[c] = last_emphasis_closer; } - inlines.push(Str(c)); return true; - }; // Attempt to parse link title (sans quotes), returning the string -- cgit v1.2.3 From 8a2b85da34e1de10abaf55b212b0660a7917b5d8 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 7 Oct 2014 09:05:27 -0700 Subject: Removed spurious 'and', reflowed. --- spec.txt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/spec.txt b/spec.txt index bc2e381..c520272 100644 --- a/spec.txt +++ b/spec.txt @@ -4817,9 +4817,10 @@ in Markdown:

link

. -URL-escaping and should be left alone inside the destination, as all URL-escaped characters -are also valid URL characters. HTML entities in the destination will be parsed into their UTF8 -codepoints, as usual, and optionally URL-escaped when written as HTML. +URL-escaping should be left alone inside the destination, as all +URL-escaped characters are also valid URL characters. HTML entities in +the destination will be parsed into their UTF8 codepoints, as usual, and +optionally URL-escaped when written as HTML. . [link](foo%20bä) -- cgit v1.2.3 From 4dc7bbb0c3fb1057c921dedc2f83786caaa6f0ad Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 7 Oct 2014 09:05:27 -0700 Subject: Removed spurious 'and', reflowed. --- spec.txt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/spec.txt b/spec.txt index 0a62b80..990ae8c 100644 --- a/spec.txt +++ b/spec.txt @@ -4816,9 +4816,10 @@ in Markdown:

link

. -URL-escaping and should be left alone inside the destination, as all URL-escaped characters -are also valid URL characters. HTML entities in the destination will be parsed into their UTF8 -codepoints, as usual, and optionally URL-escaped when written as HTML. +URL-escaping should be left alone inside the destination, as all +URL-escaped characters are also valid URL characters. HTML entities in +the destination will be parsed into their UTF8 codepoints, as usual, and +optionally URL-escaped when written as HTML. . [link](foo%20bä) -- cgit v1.2.3 From aabd412250999ecd9c1033966ddfe8a66e26972f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 7 Oct 2014 21:31:35 -0700 Subject: Reset label_nest_level before parsing reference. This fixes a bug with text like: [[some unrelated text [link] [link]: destination See #146. --- js/stmd.js | 1 + 1 file changed, 1 insertion(+) diff --git a/js/stmd.js b/js/stmd.js index e227578..bc6b2d1 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -2768,6 +2768,7 @@ var parseReference = function(s, refmap) { this.subject = s; this.pos = 0; + this.label_nest_level = 0; var rawlabel; var dest; var title; -- cgit v1.2.3 From c0c33f83326927d515a973aa7afdd26bb194e0c8 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 7 Oct 2014 22:02:21 -0700 Subject: stmd.js: Fixed entityToChar, adding fromCodePoint polyfill. Closes #151. --- LICENSE | 25 +++++++++++++++++++++ js/stmd.js | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 94 insertions(+), 4 deletions(-) diff --git a/LICENSE b/LICENSE index bb8c36f..988c4b4 100644 --- a/LICENSE +++ b/LICENSE @@ -28,3 +28,28 @@ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +----- + +The polyfill for String.fromCodePoint included in stmd.js is +Copyright Mathias Bynens + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + diff --git a/js/stmd.js b/js/stmd.js index bc6b2d1..dd7876a 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -2223,6 +2223,71 @@ var reMain = /^(?:[_*`\n]+|[\[\]\\!<&*_]|(?: *[^\n `\[\]\\!<&*_]+)+|[ \n]+)/m; // UTILITY FUNCTIONS + // polyfill for fromCodePoint: + // https://github.com/mathiasbynens/String.fromCodePoint + /*! http://mths.be/fromcodepoint v0.2.1 by @mathias */ + if (!String.fromCodePoint) { + (function() { + var defineProperty = (function() { + // IE 8 only supports `Object.defineProperty` on DOM elements + try { + var object = {}; + var $defineProperty = Object.defineProperty; + var result = $defineProperty(object, object, object) && $defineProperty; + } catch(error) {} + return result; + }()); + var stringFromCharCode = String.fromCharCode; + var floor = Math.floor; + var fromCodePoint = function(_) { + var MAX_SIZE = 0x4000; + var codeUnits = []; + var highSurrogate; + var lowSurrogate; + var index = -1; + var length = arguments.length; + if (!length) { + return ''; + } + var result = ''; + while (++index < length) { + var codePoint = Number(arguments[index]); + if ( + !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity` + codePoint < 0 || // not a valid Unicode code point + codePoint > 0x10FFFF || // not a valid Unicode code point + floor(codePoint) != codePoint // not an integer + ) { + return String.fromCharCode(0xFFFD); + } + if (codePoint <= 0xFFFF) { // BMP code point + codeUnits.push(codePoint); + } else { // Astral code point; split in surrogate halves + // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae + codePoint -= 0x10000; + highSurrogate = (codePoint >> 10) + 0xD800; + lowSurrogate = (codePoint % 0x400) + 0xDC00; + codeUnits.push(highSurrogate, lowSurrogate); + } + if (index + 1 == length || codeUnits.length > MAX_SIZE) { + result += stringFromCharCode.apply(null, codeUnits); + codeUnits.length = 0; + } + } + return result; + }; + if (defineProperty) { + defineProperty(String, 'fromCodePoint', { + 'value': fromCodePoint, + 'configurable': true, + 'writable': true + }); + } else { + String.fromCodePoint = fromCodePoint; + } + }()); + } + var entityToChar = function(m) { var isNumeric = /^&#/.test(m); var isHex = /^&#[Xx]/.test(m); @@ -2234,7 +2299,7 @@ } else { num = parseInt(m.slice(2,-1), 10); } - uchar = String.fromCharCode(num); + uchar = String.fromCodePoint(num); } else { uchar = entities[m.slice(1,-1)]; } @@ -2428,7 +2493,7 @@ if (cc_after === -1) { char_after = '\n'; } else { - char_after = String.fromCharCode(cc_after); + char_after = String.fromCodePoint(cc_after); } var can_open = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_after)); @@ -2460,7 +2525,7 @@ var startpos = this.pos; var c ; var first_close = 0; - c = String.fromCharCode(cc); + c = String.fromCodePoint(cc); var numdelims; var numclosedelims; @@ -2876,7 +2941,7 @@ } if (!res) { this.pos += 1; - inlines.push({t: 'Str', c: String.fromCharCode(c)}); + inlines.push({t: 'Str', c: String.fromCodePoint(c)}); } if (memoize) { -- cgit v1.2.3 From 3d99baba064091f74b9da78eaed38fcf4875af46 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 7 Oct 2014 22:21:03 -0700 Subject: Adjusted tests for new js parser. --- spec.txt | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/spec.txt b/spec.txt index 990ae8c..db62f53 100644 --- a/spec.txt +++ b/spec.txt @@ -4525,6 +4525,24 @@ __foo _bar_ baz__

foo bar baz

. +But note: + +. +*foo**bar**baz* +. +

foobarbaz

+. + +. +**foo*bar*baz** +. +

foobarbaz**

+. + +The difference is that in the two preceding cases, +the internal delimiters [can close emphasis](#can-close-emphasis), +while in the cases with spaces, they cannot. + Note that you cannot nest emphasis directly inside emphasis using the same delimeter, or strong emphasis directly inside strong emphasis: @@ -4606,7 +4624,7 @@ However, a string of four or more `****` can never close emphasis:

*foo****

. -Note that there are some asymmetries here: +We retain symmetry in these cases: . *foo** @@ -4614,7 +4632,7 @@ Note that there are some asymmetries here: **foo* .

foo*

-

**foo*

+

*foo

. . @@ -4637,7 +4655,7 @@ More cases with mismatched delimiters: . ***foo* . -

***foo*

+

**foo

. . @@ -4649,7 +4667,7 @@ More cases with mismatched delimiters: . ***foo** . -

***foo**

+

*foo

. . -- cgit v1.2.3 From d3c3e749f4f7b95a9604f751cf993fd488a15b19 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 7 Oct 2014 22:24:53 -0700 Subject: Cleaned up entity section of spec. We convert entities to unicode characters, not UTF-8 sequences. (Though they might ultimately be output that way.) --- spec.txt | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/spec.txt b/spec.txt index db62f53..489b9c0 100644 --- a/spec.txt +++ b/spec.txt @@ -3727,21 +3727,25 @@ foo ## Entities -With the goal of making this standard as HTML-agnostic as possible, all HTML valid HTML Entities in any -context are recognized as such and converted into their actual values (i.e. the UTF8 characters representing -the entity itself) before they are stored in the AST. +With the goal of making this standard as HTML-agnostic as possible, all +valid HTML entities in any context are recognized as such and +converted into unicode characters before they are stored in the AST. -This allows implementations that target HTML output to trivially escape the entities when generating HTML, -and simplifies the job of implementations targetting other languages, as these will only need to handle the -UTF8 chars and need not be HTML-entity aware. +This allows implementations that target HTML output to trivially escape +the entities when generating HTML, and simplifies the job of +implementations targetting other languages, as these will only need to +handle the unicode chars and need not be HTML-entity aware. [Named entities](#name-entities) consist of `&` -+ any of the valid HTML5 entity names + `;`. The [following document](http://www.whatwg.org/specs/web-apps/current-work/multipage/entities.json) -is used as an authoritative source of the valid entity names and their corresponding codepoints. ++ any of the valid HTML5 entity names + `;`. The +[following document](http://www.whatwg.org/specs/web-apps/current-work/multipage/entities.json) +is used as an authoritative source of the valid entity names and their +corresponding codepoints. -Conforming implementations that target Markdown don't need to generate entities for all the valid -named entities that exist, with the exception of `"` (`"`), `&` (`&`), `<` (`<`) and `>` (`>`), -which always need to be written as entities for security reasons. +Conforming implementations that target HTML don't need to generate +entities for all the valid named entities that exist, with the exception +of `"` (`"`), `&` (`&`), `<` (`<`) and `>` (`>`), which +always need to be written as entities for security reasons. .   & © Æ Ď ¾ ℋ ⅆ ∲ @@ -3750,9 +3754,10 @@ which always need to be written as entities for security reasons. . [Decimal entities](#decimal-entities) -consist of `&#` + a string of 1--8 arabic digits + `;`. Again, these entities need to be recognised -and tranformed into their corresponding UTF8 codepoints. Invalid Unicode codepoints will be written -as the "unknown codepoint" character (`0xFFFD`) +consist of `&#` + a string of 1--8 arabic digits + `;`. Again, these +entities need to be recognised and tranformed into their corresponding +UTF8 codepoints. Invalid Unicode codepoints will be written as the +"unknown codepoint" character (`0xFFFD`) . # Ӓ Ϡ � @@ -3779,7 +3784,8 @@ Here are some nonentities: . Although HTML5 does accept some entities without a trailing semicolon -(such as `©`), these are not recognized as entities here, because it makes the grammar too ambiguous: +(such as `©`), these are not recognized as entities here, because it +makes the grammar too ambiguous: . © @@ -3787,7 +3793,8 @@ Although HTML5 does accept some entities without a trailing semicolon

&copy

. -Strings that are not on the list of HTML5 named entities are not recognized as entities either: +Strings that are not on the list of HTML5 named entities are not +recognized as entities either: . &MadeUpEntity; @@ -4836,7 +4843,7 @@ in Markdown: URL-escaping should be left alone inside the destination, as all URL-escaped characters are also valid URL characters. HTML entities in -the destination will be parsed into their UTF8 codepoints, as usual, and +the destination will be parsed into their UTF-8 codepoints, as usual, and optionally URL-escaped when written as HTML. . -- cgit v1.2.3 From bc5b7c288d29215c585db254a203889e0dea54e2 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 7 Oct 2014 22:35:19 -0700 Subject: Removed oldtests. --- Makefile | 8 +- oldtests/Blockquotes/Indents.html | 12 - oldtests/Blockquotes/Indents.markdown | 5 - oldtests/Blockquotes/Nesting.html | 32 - oldtests/Blockquotes/Nesting.markdown | 22 - oldtests/Blockquotes/Separation.html | 39 - oldtests/Blockquotes/Separation.markdown | 29 - oldtests/Code/BlankLines.html | 33 - oldtests/Code/BlankLines.markdown | 28 - oldtests/Code/BlankLinesAtEnd.html | 14 - oldtests/Code/BlankLinesAtEnd.markdown | 14 - oldtests/Code/FenceMatching.html | 8 - oldtests/Code/FenceMatching.markdown | 10 - oldtests/Code/FencedCodeBlocks.html | 24 - oldtests/Code/FencedCodeBlocks.markdown | 35 - oldtests/Code/IndentedCodeBlocks.html | 22 - oldtests/Code/IndentedCodeBlocks.markdown | 22 - oldtests/Code/IndentedFences.html | 20 - oldtests/Code/IndentedFences.markdown | 26 - oldtests/Code/IndentedInLists.html | 22 - oldtests/Code/IndentedInLists.markdown | 17 - oldtests/Code/Inline.html | 13 - oldtests/Code/Inline.markdown | 13 - oldtests/Code/ListBreakAfter.html | 30 - oldtests/Code/ListBreakAfter.markdown | 26 - oldtests/Code/WhiteLines.html | 7 - oldtests/Code/WhiteLines.markdown | 9 - oldtests/Emphasis/Escapes.html | 1 - oldtests/Emphasis/Escapes.markdown | 1 - oldtests/Emphasis/NestedEmphAndStrong.html | 66 -- oldtests/Emphasis/NestedEmphAndStrong.markdown | 69 -- oldtests/Emphasis/Pathological.html | 24 - oldtests/Emphasis/Pathological.markdown | 26 - oldtests/Emphasis/Punctuation.html | 10 - oldtests/Emphasis/Punctuation.markdown | 19 - oldtests/HTML/Blocks.html | 18 - oldtests/HTML/Blocks.markdown | 26 - oldtests/HTML/Inline.html | 8 - oldtests/HTML/Inline.markdown | 8 - oldtests/HTML/UppercaseTags.html | 4 - oldtests/HTML/UppercaseTags.markdown | 5 - oldtests/Headers/ATX.html | 14 - oldtests/Headers/ATX.markdown | 20 - oldtests/Headers/Setext.html | 9 - oldtests/Headers/Setext.markdown | 17 - oldtests/Links/AngleBrackets.html | 3 - oldtests/Links/AngleBrackets.markdown | 7 - oldtests/Links/AutoLinks.html | 7 - oldtests/Links/AutoLinks.markdown | 7 - oldtests/Links/BackticksInLinks.html | 1 - oldtests/Links/BackticksInLinks.markdown | 1 - oldtests/Links/CaseInsensitiveReferences.html | 1 - oldtests/Links/CaseInsensitiveReferences.markdown | 3 - oldtests/Links/Entities.html | 2 - oldtests/Links/Entities.markdown | 3 - oldtests/Links/InlineLinks.html | 10 - oldtests/Links/InlineLinks.markdown | 9 - oldtests/Links/ParensInURLs.html | 6 - oldtests/Links/ParensInURLs.markdown | 14 - oldtests/Links/ReferenceLinks.html | 7 - oldtests/Links/ReferenceLinks.markdown | 10 - oldtests/Lists/CodeBlocksInLists.html | 14 - oldtests/Lists/CodeBlocksInLists.markdown | 18 - oldtests/Lists/ConsecutiveLists.html | 20 - oldtests/Lists/ConsecutiveLists.markdown | 10 - oldtests/Lists/EmptyListItem.html | 10 - oldtests/Lists/EmptyListItem.markdown | 7 - oldtests/Lists/InBlockquote.html | 22 - oldtests/Lists/InBlockquote.markdown | 12 - oldtests/Lists/Indents.html | 22 - oldtests/Lists/Indents.markdown | 17 - oldtests/Lists/ListsAndHRs.html | 7 - oldtests/Lists/ListsAndHRs.markdown | 3 - oldtests/Lists/ListsAndSetextHeaders.html | 6 - oldtests/Lists/ListsAndSetextHeaders.markdown | 4 - oldtests/Lists/MultipleBlankLines.html | 56 -- oldtests/Lists/MultipleBlankLines.markdown | 37 - oldtests/Lists/Start.html | 11 - oldtests/Lists/Start.markdown | 7 - oldtests/Lists/Sublists.html | 49 -- oldtests/Lists/Sublists.markdown | 24 - oldtests/Lists/TightAndLoose.html | 49 -- oldtests/Lists/TightAndLoose.markdown | 45 -- oldtests/Lists/TightLooseBlockquote.html | 32 - oldtests/Lists/TightLooseBlockquote.markdown | 25 - oldtests/Lists/TightLooseMore.html | 7 - oldtests/Lists/TightLooseMore.markdown | 4 - oldtests/Lists/TwoBlankLinesEndList.html | 21 - oldtests/Lists/TwoBlankLinesEndList.markdown | 20 - oldtests/Makefile | 55 -- oldtests/Misc/BackslashEscapes.html | 14 - oldtests/Misc/BackslashEscapes.markdown | 19 - oldtests/Misc/Laziness.html | 22 - oldtests/Misc/Laziness.markdown | 14 - oldtests/Misc/LineBreaks.html | 11 - oldtests/Misc/LineBreaks.markdown | 18 - oldtests/Misc/Transitions.html | 26 - oldtests/Misc/Transitions.markdown | 20 - oldtests/Original/Amps_and_angle_encoding.html | 9 - oldtests/Original/Amps_and_angle_encoding.markdown | 21 - oldtests/Original/Auto_links.html | 13 - oldtests/Original/Auto_links.markdown | 13 - oldtests/Original/Backslash_escapes.html | 75 -- oldtests/Original/Backslash_escapes.markdown | 120 --- .../Original/Blockquotes_with_code_blocks.html | 12 - .../Original/Blockquotes_with_code_blocks.markdown | 11 - oldtests/Original/Code_Blocks.html | 12 - oldtests/Original/Code_Blocks.markdown | 14 - oldtests/Original/Code_Spans.html | 3 - oldtests/Original/Code_Spans.markdown | 5 - oldtests/Original/Horizontal_rules.html | 39 - oldtests/Original/Horizontal_rules.markdown | 67 -- oldtests/Original/Images.html | 11 - oldtests/Original/Images.markdown | 26 - oldtests/Original/Inline_HTML_Advanced.html | 23 - oldtests/Original/Inline_HTML_Advanced.markdown | 30 - oldtests/Original/Inline_HTML_Simple.html | 45 -- oldtests/Original/Inline_HTML_Simple.markdown | 69 -- oldtests/Original/Inline_HTML_comments.html | 8 - oldtests/Original/Inline_HTML_comments.markdown | 13 - oldtests/Original/Links_inline_style.html | 12 - oldtests/Original/Links_inline_style.markdown | 24 - oldtests/Original/Links_reference_style.html | 28 - oldtests/Original/Links_reference_style.markdown | 71 -- oldtests/Original/Links_shortcut_references.html | 6 - .../Original/Links_shortcut_references.markdown | 20 - oldtests/Original/Literal_quotes_in_titles.html | 2 - .../Original/Literal_quotes_in_titles.markdown | 7 - .../Original/Markdown_Documentation_Basics.html | 242 ------ .../Markdown_Documentation_Basics.markdown | 306 ------- .../Original/Markdown_Documentation_Syntax.html | 708 ---------------- .../Markdown_Documentation_Syntax.markdown | 888 --------------------- oldtests/Original/Nested_blockquotes.html | 7 - oldtests/Original/Nested_blockquotes.markdown | 5 - oldtests/Original/Ordered_and_unordered_lists.html | 112 --- .../Original/Ordered_and_unordered_lists.markdown | 131 --- oldtests/Original/README | 15 - oldtests/Original/Strong_and_em_together.html | 4 - oldtests/Original/Strong_and_em_together.markdown | 7 - oldtests/Original/Tabs.html | 19 - oldtests/Original/Tabs.markdown | 21 - oldtests/Original/Tidyness.html | 8 - oldtests/Original/Tidyness.markdown | 5 - oldtests/Tabs/TabConversionUnicode.html | 1 - oldtests/Tabs/TabConversionUnicode.markdown | 1 - 145 files changed, 3 insertions(+), 5020 deletions(-) delete mode 100644 oldtests/Blockquotes/Indents.html delete mode 100644 oldtests/Blockquotes/Indents.markdown delete mode 100644 oldtests/Blockquotes/Nesting.html delete mode 100644 oldtests/Blockquotes/Nesting.markdown delete mode 100644 oldtests/Blockquotes/Separation.html delete mode 100644 oldtests/Blockquotes/Separation.markdown delete mode 100644 oldtests/Code/BlankLines.html delete mode 100644 oldtests/Code/BlankLines.markdown delete mode 100644 oldtests/Code/BlankLinesAtEnd.html delete mode 100644 oldtests/Code/BlankLinesAtEnd.markdown delete mode 100644 oldtests/Code/FenceMatching.html delete mode 100644 oldtests/Code/FenceMatching.markdown delete mode 100644 oldtests/Code/FencedCodeBlocks.html delete mode 100644 oldtests/Code/FencedCodeBlocks.markdown delete mode 100644 oldtests/Code/IndentedCodeBlocks.html delete mode 100644 oldtests/Code/IndentedCodeBlocks.markdown delete mode 100644 oldtests/Code/IndentedFences.html delete mode 100644 oldtests/Code/IndentedFences.markdown delete mode 100644 oldtests/Code/IndentedInLists.html delete mode 100644 oldtests/Code/IndentedInLists.markdown delete mode 100644 oldtests/Code/Inline.html delete mode 100644 oldtests/Code/Inline.markdown delete mode 100644 oldtests/Code/ListBreakAfter.html delete mode 100644 oldtests/Code/ListBreakAfter.markdown delete mode 100644 oldtests/Code/WhiteLines.html delete mode 100644 oldtests/Code/WhiteLines.markdown delete mode 100644 oldtests/Emphasis/Escapes.html delete mode 100644 oldtests/Emphasis/Escapes.markdown delete mode 100644 oldtests/Emphasis/NestedEmphAndStrong.html delete mode 100644 oldtests/Emphasis/NestedEmphAndStrong.markdown delete mode 100644 oldtests/Emphasis/Pathological.html delete mode 100644 oldtests/Emphasis/Pathological.markdown delete mode 100644 oldtests/Emphasis/Punctuation.html delete mode 100644 oldtests/Emphasis/Punctuation.markdown delete mode 100644 oldtests/HTML/Blocks.html delete mode 100644 oldtests/HTML/Blocks.markdown delete mode 100644 oldtests/HTML/Inline.html delete mode 100644 oldtests/HTML/Inline.markdown delete mode 100644 oldtests/HTML/UppercaseTags.html delete mode 100644 oldtests/HTML/UppercaseTags.markdown delete mode 100644 oldtests/Headers/ATX.html delete mode 100644 oldtests/Headers/ATX.markdown delete mode 100644 oldtests/Headers/Setext.html delete mode 100644 oldtests/Headers/Setext.markdown delete mode 100644 oldtests/Links/AngleBrackets.html delete mode 100644 oldtests/Links/AngleBrackets.markdown delete mode 100644 oldtests/Links/AutoLinks.html delete mode 100644 oldtests/Links/AutoLinks.markdown delete mode 100644 oldtests/Links/BackticksInLinks.html delete mode 100644 oldtests/Links/BackticksInLinks.markdown delete mode 100644 oldtests/Links/CaseInsensitiveReferences.html delete mode 100644 oldtests/Links/CaseInsensitiveReferences.markdown delete mode 100644 oldtests/Links/Entities.html delete mode 100644 oldtests/Links/Entities.markdown delete mode 100644 oldtests/Links/InlineLinks.html delete mode 100644 oldtests/Links/InlineLinks.markdown delete mode 100644 oldtests/Links/ParensInURLs.html delete mode 100644 oldtests/Links/ParensInURLs.markdown delete mode 100644 oldtests/Links/ReferenceLinks.html delete mode 100644 oldtests/Links/ReferenceLinks.markdown delete mode 100644 oldtests/Lists/CodeBlocksInLists.html delete mode 100644 oldtests/Lists/CodeBlocksInLists.markdown delete mode 100644 oldtests/Lists/ConsecutiveLists.html delete mode 100644 oldtests/Lists/ConsecutiveLists.markdown delete mode 100644 oldtests/Lists/EmptyListItem.html delete mode 100644 oldtests/Lists/EmptyListItem.markdown delete mode 100644 oldtests/Lists/InBlockquote.html delete mode 100644 oldtests/Lists/InBlockquote.markdown delete mode 100644 oldtests/Lists/Indents.html delete mode 100644 oldtests/Lists/Indents.markdown delete mode 100644 oldtests/Lists/ListsAndHRs.html delete mode 100644 oldtests/Lists/ListsAndHRs.markdown delete mode 100644 oldtests/Lists/ListsAndSetextHeaders.html delete mode 100644 oldtests/Lists/ListsAndSetextHeaders.markdown delete mode 100644 oldtests/Lists/MultipleBlankLines.html delete mode 100644 oldtests/Lists/MultipleBlankLines.markdown delete mode 100644 oldtests/Lists/Start.html delete mode 100644 oldtests/Lists/Start.markdown delete mode 100644 oldtests/Lists/Sublists.html delete mode 100644 oldtests/Lists/Sublists.markdown delete mode 100644 oldtests/Lists/TightAndLoose.html delete mode 100644 oldtests/Lists/TightAndLoose.markdown delete mode 100644 oldtests/Lists/TightLooseBlockquote.html delete mode 100644 oldtests/Lists/TightLooseBlockquote.markdown delete mode 100644 oldtests/Lists/TightLooseMore.html delete mode 100644 oldtests/Lists/TightLooseMore.markdown delete mode 100644 oldtests/Lists/TwoBlankLinesEndList.html delete mode 100644 oldtests/Lists/TwoBlankLinesEndList.markdown delete mode 100644 oldtests/Makefile delete mode 100644 oldtests/Misc/BackslashEscapes.html delete mode 100644 oldtests/Misc/BackslashEscapes.markdown delete mode 100644 oldtests/Misc/Laziness.html delete mode 100644 oldtests/Misc/Laziness.markdown delete mode 100644 oldtests/Misc/LineBreaks.html delete mode 100644 oldtests/Misc/LineBreaks.markdown delete mode 100644 oldtests/Misc/Transitions.html delete mode 100644 oldtests/Misc/Transitions.markdown delete mode 100644 oldtests/Original/Amps_and_angle_encoding.html delete mode 100644 oldtests/Original/Amps_and_angle_encoding.markdown delete mode 100644 oldtests/Original/Auto_links.html delete mode 100644 oldtests/Original/Auto_links.markdown delete mode 100644 oldtests/Original/Backslash_escapes.html delete mode 100644 oldtests/Original/Backslash_escapes.markdown delete mode 100644 oldtests/Original/Blockquotes_with_code_blocks.html delete mode 100644 oldtests/Original/Blockquotes_with_code_blocks.markdown delete mode 100644 oldtests/Original/Code_Blocks.html delete mode 100644 oldtests/Original/Code_Blocks.markdown delete mode 100644 oldtests/Original/Code_Spans.html delete mode 100644 oldtests/Original/Code_Spans.markdown delete mode 100644 oldtests/Original/Horizontal_rules.html delete mode 100644 oldtests/Original/Horizontal_rules.markdown delete mode 100644 oldtests/Original/Images.html delete mode 100644 oldtests/Original/Images.markdown delete mode 100644 oldtests/Original/Inline_HTML_Advanced.html delete mode 100644 oldtests/Original/Inline_HTML_Advanced.markdown delete mode 100644 oldtests/Original/Inline_HTML_Simple.html delete mode 100644 oldtests/Original/Inline_HTML_Simple.markdown delete mode 100644 oldtests/Original/Inline_HTML_comments.html delete mode 100644 oldtests/Original/Inline_HTML_comments.markdown delete mode 100644 oldtests/Original/Links_inline_style.html delete mode 100644 oldtests/Original/Links_inline_style.markdown delete mode 100644 oldtests/Original/Links_reference_style.html delete mode 100644 oldtests/Original/Links_reference_style.markdown delete mode 100644 oldtests/Original/Links_shortcut_references.html delete mode 100644 oldtests/Original/Links_shortcut_references.markdown delete mode 100644 oldtests/Original/Literal_quotes_in_titles.html delete mode 100644 oldtests/Original/Literal_quotes_in_titles.markdown delete mode 100644 oldtests/Original/Markdown_Documentation_Basics.html delete mode 100644 oldtests/Original/Markdown_Documentation_Basics.markdown delete mode 100644 oldtests/Original/Markdown_Documentation_Syntax.html delete mode 100644 oldtests/Original/Markdown_Documentation_Syntax.markdown delete mode 100644 oldtests/Original/Nested_blockquotes.html delete mode 100644 oldtests/Original/Nested_blockquotes.markdown delete mode 100644 oldtests/Original/Ordered_and_unordered_lists.html delete mode 100644 oldtests/Original/Ordered_and_unordered_lists.markdown delete mode 100644 oldtests/Original/README delete mode 100644 oldtests/Original/Strong_and_em_together.html delete mode 100644 oldtests/Original/Strong_and_em_together.markdown delete mode 100644 oldtests/Original/Tabs.html delete mode 100644 oldtests/Original/Tabs.markdown delete mode 100644 oldtests/Original/Tidyness.html delete mode 100644 oldtests/Original/Tidyness.markdown delete mode 100644 oldtests/Tabs/TabConversionUnicode.html delete mode 100644 oldtests/Tabs/TabConversionUnicode.markdown diff --git a/Makefile b/Makefile index 671d30d..8d35b9d 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ DATADIR?=data PROG?=./stmd -.PHONY: all oldtests test spec benchjs testjs +.PHONY: all test spec benchjs testjs all: $(SRCDIR)/case_fold_switch.inc $(PROG) README.html: README.md template.html @@ -28,9 +28,6 @@ spec.pdf: spec.md template.tex specfilter.hs --number-sections -V documentclass=report -V tocdepth=2 \ -V classoption=twosides -oldtests: - make -C oldtests --quiet clean all - test: spec.txt perl runtests.pl $< $(PROG) @@ -63,7 +60,8 @@ dingus: cd js && echo "Starting dingus server at http://localhost:9000" && python -m SimpleHTTPServer 9000 leakcheck: $(PROG) - cat oldtests/*/*.markdown | valgrind --leak-check=full --dsymutil=yes $(PROG) + # TODO produce leaktest.md that tests everything + cat leaktest.md | valgrind --leak-check=full --dsymutil=yes $(PROG) operf: $(PROG) operf $(PROG) /dev/null diff --git a/oldtests/Blockquotes/Indents.html b/oldtests/Blockquotes/Indents.html deleted file mode 100644 index fd98ee8..0000000 --- a/oldtests/Blockquotes/Indents.html +++ /dev/null @@ -1,12 +0,0 @@ -
-

one -blockquote

-
-
-
-
-

triply nested -triply nested

-
-
-
diff --git a/oldtests/Blockquotes/Indents.markdown b/oldtests/Blockquotes/Indents.markdown deleted file mode 100644 index f9342ff..0000000 --- a/oldtests/Blockquotes/Indents.markdown +++ /dev/null @@ -1,5 +0,0 @@ -> one - > blockquote - ->>> triply nested - > > > triply nested diff --git a/oldtests/Blockquotes/Nesting.html b/oldtests/Blockquotes/Nesting.html deleted file mode 100644 index f40e999..0000000 --- a/oldtests/Blockquotes/Nesting.html +++ /dev/null @@ -1,32 +0,0 @@ -

These are all equivalent:

-
-
-

nested -blockquote

-
-
-
-
-

nested -blockquote

-
-
-
-
-

nested -blockquote

-
-
-
-
-

nested -blockquote

-
-
-

This is not:

-
-

nested

-
-

blockquote

-
-
diff --git a/oldtests/Blockquotes/Nesting.markdown b/oldtests/Blockquotes/Nesting.markdown deleted file mode 100644 index 3d67843..0000000 --- a/oldtests/Blockquotes/Nesting.markdown +++ /dev/null @@ -1,22 +0,0 @@ -These are all equivalent: - -> > nested -> > blockquote - - ->> nested ->> blockquote - - -> > nested -blockquote - - -> > nested -> blockquote - - -This is not: - -> nested -> > blockquote diff --git a/oldtests/Blockquotes/Separation.html b/oldtests/Blockquotes/Separation.html deleted file mode 100644 index 910d545..0000000 --- a/oldtests/Blockquotes/Separation.html +++ /dev/null @@ -1,39 +0,0 @@ -

One blockquote, two paragraphs:

-
-

one

-

two

-
-

Two blockquotes:

-
-

one

-
-
-

two

-
-

Nested blockquote, two paragraphs:

-
-
-

one

-

two

-
-
-

Nested blockquote, two blockquotes:

-
-
-

one

-
-
-

two

-
-
-

Two nested blockquotes:

-
-
-

one

-
-
-
-
-

two

-
-
diff --git a/oldtests/Blockquotes/Separation.markdown b/oldtests/Blockquotes/Separation.markdown deleted file mode 100644 index 823d865..0000000 --- a/oldtests/Blockquotes/Separation.markdown +++ /dev/null @@ -1,29 +0,0 @@ -One blockquote, two paragraphs: - -> one -> -> two - -Two blockquotes: - -> one - -> two - -Nested blockquote, two paragraphs: - -> > one -> > -> > two - -Nested blockquote, two blockquotes: - -> > one -> -> > two - -Two nested blockquotes: - -> > one - -> > two diff --git a/oldtests/Code/BlankLines.html b/oldtests/Code/BlankLines.html deleted file mode 100644 index ae0abf7..0000000 --- a/oldtests/Code/BlankLines.html +++ /dev/null @@ -1,33 +0,0 @@ -
foo
-
-
-
-bar
-
-
-
foo
-
-
-
-bar
-
-
-
foo
-
-    
-
-bar
-
-
    -
  1. One

    -
    CodeA
    -
    -CodeB
    -
  2. -
  3. Two

    -
    CodeA
    -
  4. -
-
    -
  1. One
  2. -
diff --git a/oldtests/Code/BlankLines.markdown b/oldtests/Code/BlankLines.markdown deleted file mode 100644 index b0d5a0c..0000000 --- a/oldtests/Code/BlankLines.markdown +++ /dev/null @@ -1,28 +0,0 @@ - foo - - - - bar -> foo -> -> -> -> bar - foo - - - - bar - -1. One - - CodeA - - CodeB - -2. Two - - CodeA - - -1. One diff --git a/oldtests/Code/BlankLinesAtEnd.html b/oldtests/Code/BlankLinesAtEnd.html deleted file mode 100644 index ac803d9..0000000 --- a/oldtests/Code/BlankLinesAtEnd.html +++ /dev/null @@ -1,14 +0,0 @@ -
    -
  • List

    -
    code
    -
  • -
-
    -
  • one
  • -
  • two
  • -
-
    -
  • one -not code

  • -
  • two

  • -
diff --git a/oldtests/Code/BlankLinesAtEnd.markdown b/oldtests/Code/BlankLinesAtEnd.markdown deleted file mode 100644 index 55879ae..0000000 --- a/oldtests/Code/BlankLinesAtEnd.markdown +++ /dev/null @@ -1,14 +0,0 @@ -* List - - code - - - * one - * two - - - -* one - not code - -* two diff --git a/oldtests/Code/FenceMatching.html b/oldtests/Code/FenceMatching.html deleted file mode 100644 index 4c7468e..0000000 --- a/oldtests/Code/FenceMatching.html +++ /dev/null @@ -1,8 +0,0 @@ -
```
-
-

-`````
-
-````
-
-
diff --git a/oldtests/Code/FenceMatching.markdown b/oldtests/Code/FenceMatching.markdown deleted file mode 100644 index d86169a..0000000 --- a/oldtests/Code/FenceMatching.markdown +++ /dev/null @@ -1,10 +0,0 @@ -````abc -``` -```` -``````blah - -````` - -```` - -``````````` diff --git a/oldtests/Code/FencedCodeBlocks.html b/oldtests/Code/FencedCodeBlocks.html deleted file mode 100644 index 4813d72..0000000 --- a/oldtests/Code/FencedCodeBlocks.html +++ /dev/null @@ -1,24 +0,0 @@ -

This is a fenced code block:

-
pairs :: [(Int,Char)]
-pairs = [(x,y) | x <- [0..10], y <- ['a'..'z']]
-
-

Here is one with tildes:

-
pairs :: [(Int,Char)]
-pairs = [(x,y) | x <- [0..10], y <- ['a'..'z']]
-
-

More metadata:

-
pairs :: [(Int,Char)]
-pairs = [(x,y) | x <- [0..10], y <- ['a'..'z']]
-
-

More backticks:

-
pairs :: [(Int,Char)]
-pairs = [(x,y) | x <- [0..10], y <- ['a'..'z']]
-
-backticks :: String
-backticks = "`````"
-
-

Without an end:

-
code with
-no end
-
-
diff --git a/oldtests/Code/FencedCodeBlocks.markdown b/oldtests/Code/FencedCodeBlocks.markdown deleted file mode 100644 index 6ccc6be..0000000 --- a/oldtests/Code/FencedCodeBlocks.markdown +++ /dev/null @@ -1,35 +0,0 @@ -This is a fenced code block: -```haskell -pairs :: [(Int,Char)] -pairs = [(x,y) | x <- [0..10], y <- ['a'..'z']] -``` -Here is one with tildes: - -~~~ haskell -pairs :: [(Int,Char)] -pairs = [(x,y) | x <- [0..10], y <- ['a'..'z']] -~~~ - -More metadata: - -```haskell numberLines start=50 -pairs :: [(Int,Char)] -pairs = [(x,y) | x <- [0..10], y <- ['a'..'z']] -``` - -More backticks: - -```````` haskell -pairs :: [(Int,Char)] -pairs = [(x,y) | x <- [0..10], y <- ['a'..'z']] - -backticks :: String -backticks = "`````" -````````````` - -Without an end: - -``` -code with -no end - diff --git a/oldtests/Code/IndentedCodeBlocks.html b/oldtests/Code/IndentedCodeBlocks.html deleted file mode 100644 index 0b9b7e7..0000000 --- a/oldtests/Code/IndentedCodeBlocks.html +++ /dev/null @@ -1,22 +0,0 @@ -

Indented code with two space indent in first and last line:

-
  two spaces *hello*
-{ more }
-
-  and
-
-

Indented code requires a leading/trailing blank line: -quick-command --option "$*"

-

Indented code does not require a trailing blank line:

-
code
-
-

and not code.

-

Code in blockquote:

-
-
code
-
-
-

Code in list:

-
    -
  1. code
    -
  2. -
diff --git a/oldtests/Code/IndentedCodeBlocks.markdown b/oldtests/Code/IndentedCodeBlocks.markdown deleted file mode 100644 index 2a99db0..0000000 --- a/oldtests/Code/IndentedCodeBlocks.markdown +++ /dev/null @@ -1,22 +0,0 @@ -Indented code with two space indent in first and last line: - - two spaces *hello* - { more } - - and - -Indented code requires a leading/trailing blank line: - quick-command --option "$*" - -Indented code does not require a trailing blank line: - - code -and not code. - -Code in blockquote: - -> code - -Code in list: - -1. code diff --git a/oldtests/Code/IndentedFences.html b/oldtests/Code/IndentedFences.html deleted file mode 100644 index 66e76da..0000000 --- a/oldtests/Code/IndentedFences.html +++ /dev/null @@ -1,20 +0,0 @@ -
a
-
-
z
-
-
a
-a
-a
- a
-
-
    -
  • foo

    -
         Hello
    -
    -World
    -
  • -
-
-
a
-
-
diff --git a/oldtests/Code/IndentedFences.markdown b/oldtests/Code/IndentedFences.markdown deleted file mode 100644 index 098545f..0000000 --- a/oldtests/Code/IndentedFences.markdown +++ /dev/null @@ -1,26 +0,0 @@ - ``` - a - ``` - - ``` -z -``` - - ``` -a - a - a - a - ``` - -* foo - - ``` - Hello - - World - ``` - -> ``` ->a ->``` diff --git a/oldtests/Code/IndentedInLists.html b/oldtests/Code/IndentedInLists.html deleted file mode 100644 index 76ed424..0000000 --- a/oldtests/Code/IndentedInLists.html +++ /dev/null @@ -1,22 +0,0 @@ -
    -
  • code starts here
    -
  • -
-
    -
  1. foo

    -
    code starts here
    -
  2. -
  3. foo

    -
    code starts here
    -
  4. -
-
    -
  • foo

    -
    code starts here
    -
    -
      -
    • foo

      -
      code starts here
      -
    • -
  • -
diff --git a/oldtests/Code/IndentedInLists.markdown b/oldtests/Code/IndentedInLists.markdown deleted file mode 100644 index 54e1af1..0000000 --- a/oldtests/Code/IndentedInLists.markdown +++ /dev/null @@ -1,17 +0,0 @@ -- code starts here - -1. foo - - code starts here - -2. foo - - code starts here - -- foo - - code starts here - - - foo - - code starts here diff --git a/oldtests/Code/Inline.html b/oldtests/Code/Inline.html deleted file mode 100644 index 9c52790..0000000 --- a/oldtests/Code/Inline.html +++ /dev/null @@ -1,13 +0,0 @@ -

All of these are equivalent:

-
    -
  • *hi*
  • -
  • *hi*
  • -
  • *hi*
  • -
  • *hi*
  • -
  • *hi*
  • -
-

Backticks in code spans:

-
    -
  • ``code``
  • -
  • ``code``
  • -
diff --git a/oldtests/Code/Inline.markdown b/oldtests/Code/Inline.markdown deleted file mode 100644 index 38e5b0c..0000000 --- a/oldtests/Code/Inline.markdown +++ /dev/null @@ -1,13 +0,0 @@ -All of these are equivalent: - -- `*hi*` -- ` *hi* ` -- ``*hi* `` -- ````*hi*```` -- `*hi* - ` - -Backticks in code spans: - -- ``` ``code`` ``` -- ` ``code`` ` diff --git a/oldtests/Code/ListBreakAfter.html b/oldtests/Code/ListBreakAfter.html deleted file mode 100644 index 29d6d5e..0000000 --- a/oldtests/Code/ListBreakAfter.html +++ /dev/null @@ -1,30 +0,0 @@ -
    -
  • foo

    -
      -
    • bar

      -
      code1
      -code2
      -
      -

      code?

    • -
  • -
  • foo

    -
      -
    • bar

      -
      code1
      -code2
      -
    • -
  • -
-
code?
-
-
    -
  • foo -
      -
    • bar

      -
      code1
      -code2
      -
    • -
  • -
-
code?
-
diff --git a/oldtests/Code/ListBreakAfter.markdown b/oldtests/Code/ListBreakAfter.markdown deleted file mode 100644 index 4fa79f1..0000000 --- a/oldtests/Code/ListBreakAfter.markdown +++ /dev/null @@ -1,26 +0,0 @@ -* foo - * bar - - code1 - code2 - - code? - -* foo - * bar - - code1 - code2 - - - code? - -* foo - * bar - - code1 - code2 - - - - code? diff --git a/oldtests/Code/WhiteLines.html b/oldtests/Code/WhiteLines.html deleted file mode 100644 index 7fa137f..0000000 --- a/oldtests/Code/WhiteLines.html +++ /dev/null @@ -1,7 +0,0 @@ -
ABC
-  
- 
-
-DEF
-
-

GHI

diff --git a/oldtests/Code/WhiteLines.markdown b/oldtests/Code/WhiteLines.markdown deleted file mode 100644 index ea17af7..0000000 --- a/oldtests/Code/WhiteLines.markdown +++ /dev/null @@ -1,9 +0,0 @@ - ABC - - - - DEF - - - -GHI diff --git a/oldtests/Emphasis/Escapes.html b/oldtests/Emphasis/Escapes.html deleted file mode 100644 index 17c9e2d..0000000 --- a/oldtests/Emphasis/Escapes.html +++ /dev/null @@ -1 +0,0 @@ -

hi* there

diff --git a/oldtests/Emphasis/Escapes.markdown b/oldtests/Emphasis/Escapes.markdown deleted file mode 100644 index 4f14698..0000000 --- a/oldtests/Emphasis/Escapes.markdown +++ /dev/null @@ -1 +0,0 @@ -*hi\* there* \ No newline at end of file diff --git a/oldtests/Emphasis/NestedEmphAndStrong.html b/oldtests/Emphasis/NestedEmphAndStrong.html deleted file mode 100644 index b41b527..0000000 --- a/oldtests/Emphasis/NestedEmphAndStrong.html +++ /dev/null @@ -1,66 +0,0 @@ -
    -
  1. test test
  2. -
  3. test test
  4. -
  5. test test
  6. -
  7. test test
  8. -
  9. test test
  10. -
  11. test test
  12. -
  13. test test
  14. -
  15. test test
  16. -
  17. test test
  18. -
  19. test test
  20. -
  21. test test
  22. -
  23. test test
  24. -
  25. test test
  26. -
  27. test test
  28. -
  29. test test
  30. -
  31. test test
  32. -
-

Incorrect nesting:

-
    -
  1. *test test* test
  2. -
  3. _test test_ test
  4. -
  5. **test test* test*
  6. -
  7. __test␣test_␣test_
  8. -
  9. test test test
  10. -
  11. test test test
  12. -
  13. test test test
  14. -
  15. test test test
  16. -
-

No emphasis:

-
    -
  1. test* test *test
  2. -
  3. test** test **test
  4. -
  5. test_ test _test
  6. -
  7. test__ test __test
  8. -
-

Middle-word emphasis (asterisks):

-
    -
  1. ab
  2. -
  3. ab
  4. -
  5. abc
  6. -
  7. ab
  8. -
  9. ab
  10. -
  11. abc
  12. -
-

Middle-word emphasis (underscore):

-
    -
  1. _a_b
  2. -
  3. a_b_
  4. -
  5. a_b_c
  6. -
  7. __a__b
  8. -
  9. a__b__
  10. -
  11. a__b__c
  12. -
  13. my_precious_file.txt
  14. -
-

Tricky Cases:

-
    -
  1. E**. Test TestTestTest
  2. -
  3. E**. Test Test Test Test
  4. -
-

Overlong emphasis:

-

Name: ____________
-Organization: ____
-Region/Country: __

-

_____Cut here_____

-

____Cut here____

diff --git a/oldtests/Emphasis/NestedEmphAndStrong.markdown b/oldtests/Emphasis/NestedEmphAndStrong.markdown deleted file mode 100644 index ec7da25..0000000 --- a/oldtests/Emphasis/NestedEmphAndStrong.markdown +++ /dev/null @@ -1,69 +0,0 @@ -1. ***test test*** -2. ___test test___ -3. *test **test*** -4. **test *test*** -5. ***test* test** -6. ***test** test* -7. ***test* test** -8. **test *test*** -9. *test **test*** -10. _test __test___ -11. __test _test___ -12. ___test_ test__ -13. ___test__ test_ -14. ___test_ test__ -15. __test _test___ -16. _test __test___ - -Incorrect nesting: - -1. *test **test* test** -2. _test __test_ test__ -3. **test *test** test* -4. __test _test__ test_ -5. *test *test* test* -6. _test _test_ test_ -7. **test **test** test** -8. __test __test__ test__ - -No emphasis: - -1. test* test *test -2. test** test **test -3. test_ test _test -4. test__ test __test - -Middle-word emphasis (asterisks): - -1. *a*b -2. a*b* -3. a*b*c -4. **a**b -5. a**b** -6. a**b**c - -Middle-word emphasis (underscore): - -1. _a_b -2. a_b_ -3. a_b_c -4. __a__b -5. a__b__ -6. a__b__c -7. my_precious_file.txt - -Tricky Cases: - -1. E**. **Test** TestTestTest -2. E**. **Test** Test Test Test - -Overlong emphasis: - -Name: ____________ -Organization: ____ -Region/Country: __ - -_____Cut here_____ - -____Cut here____ - diff --git a/oldtests/Emphasis/Pathological.html b/oldtests/Emphasis/Pathological.html deleted file mode 100644 index 37eb9fa..0000000 --- a/oldtests/Emphasis/Pathological.html +++ /dev/null @@ -1,24 +0,0 @@ -

This input can take a long time to parse in some implementations.

-

*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -aaaaa

-

aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa**

diff --git a/oldtests/Emphasis/Pathological.markdown b/oldtests/Emphasis/Pathological.markdown deleted file mode 100644 index 5deb95e..0000000 --- a/oldtests/Emphasis/Pathological.markdown +++ /dev/null @@ -1,26 +0,0 @@ -This input can take a long time to parse in some implementations. - -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -aaaaa - -*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a** diff --git a/oldtests/Emphasis/Punctuation.html b/oldtests/Emphasis/Punctuation.html deleted file mode 100644 index 6061b81..0000000 --- a/oldtests/Emphasis/Punctuation.html +++ /dev/null @@ -1,10 +0,0 @@ -

Here is a _ that is cool.

-

Foo.

-

Foo.

-

Foo.

-

Foo.

-

Foo.

-

Foo.

-

Foo. Foo? Foo! Foo: Foo; (Foo)

-

Foo. Foo? Foo! Foo: Foo; (Foo)

-

Foo. Foo? Foo! Foo: Foo; (Foo)

diff --git a/oldtests/Emphasis/Punctuation.markdown b/oldtests/Emphasis/Punctuation.markdown deleted file mode 100644 index e3f23b8..0000000 --- a/oldtests/Emphasis/Punctuation.markdown +++ /dev/null @@ -1,19 +0,0 @@ -Here is a _ that is _cool_. - -_Foo._ - -__Foo.__ - -___Foo.___ - -_Foo_. - -__Foo__. - -___Foo___. - -_Foo_. _Foo_? _Foo_! _Foo_: _Foo_; (_Foo_) - -__Foo__. __Foo__? __Foo__! __Foo__: __Foo__; (__Foo__) - -___Foo___. ___Foo___? ___Foo___! ___Foo___: ___Foo___; (___Foo___) diff --git a/oldtests/HTML/Blocks.html b/oldtests/HTML/Blocks.html deleted file mode 100644 index dc80335..0000000 --- a/oldtests/HTML/Blocks.html +++ /dev/null @@ -1,18 +0,0 @@ -
-
- *raw html* -
-
-
-
-

this is markdown

-
-
- - -
-* raw html with trailing space -
diff --git a/oldtests/HTML/Blocks.markdown b/oldtests/HTML/Blocks.markdown deleted file mode 100644 index a83fa66..0000000 --- a/oldtests/HTML/Blocks.markdown +++ /dev/null @@ -1,26 +0,0 @@ -
-
- *raw html* -
-
- -
-
- -*this is markdown* - -
-
- - - - - -
-* raw html with trailing space -
diff --git a/oldtests/HTML/Inline.html b/oldtests/HTML/Inline.html deleted file mode 100644 index 94d40ac..0000000 --- a/oldtests/HTML/Inline.html +++ /dev/null @@ -1,8 +0,0 @@ -

hi -hi - -Hello there. -A line
break. -

diff --git a/oldtests/HTML/Inline.markdown b/oldtests/HTML/Inline.markdown deleted file mode 100644 index 2259421..0000000 --- a/oldtests/HTML/Inline.markdown +++ /dev/null @@ -1,8 +0,0 @@ -hi -`hi` - -Hello there. -A line
break. - diff --git a/oldtests/HTML/UppercaseTags.html b/oldtests/HTML/UppercaseTags.html deleted file mode 100644 index 8d2d828..0000000 --- a/oldtests/HTML/UppercaseTags.html +++ /dev/null @@ -1,4 +0,0 @@ -
-this is a block -
-

Here is some inline html.

diff --git a/oldtests/HTML/UppercaseTags.markdown b/oldtests/HTML/UppercaseTags.markdown deleted file mode 100644 index b476ffb..0000000 --- a/oldtests/HTML/UppercaseTags.markdown +++ /dev/null @@ -1,5 +0,0 @@ -
-this is a block -
- -Here is some inline html. \ No newline at end of file diff --git a/oldtests/Headers/ATX.html b/oldtests/Headers/ATX.html deleted file mode 100644 index f375b98..0000000 --- a/oldtests/Headers/ATX.html +++ /dev/null @@ -1,14 +0,0 @@ -

One

-

Two

-

Three

-

Four

-
Five
-
Six
-

####### Seven

-

Three with

-

Spacing doesn't matter

-

Escaped final #

-

## Not a header

-

#5 not a header

-

-

(empty header)

diff --git a/oldtests/Headers/ATX.markdown b/oldtests/Headers/ATX.markdown deleted file mode 100644 index f687aa5..0000000 --- a/oldtests/Headers/ATX.markdown +++ /dev/null @@ -1,20 +0,0 @@ -# One -## Two -### Three -#### Four -##### Five - -###### Six - -####### Seven - -### Three with ### -## Spacing doesn't matter ## -## Escaped final \## - -\## Not a header - -#5 not a header - -## -(empty header) diff --git a/oldtests/Headers/Setext.html b/oldtests/Headers/Setext.html deleted file mode 100644 index 787fb02..0000000 --- a/oldtests/Headers/Setext.html +++ /dev/null @@ -1,9 +0,0 @@ -

Level one

-

Two

-

In a paragraph

-

Level two

-

more text

-

====== -no empty headers

-

not a header

-
diff --git a/oldtests/Headers/Setext.markdown b/oldtests/Headers/Setext.markdown deleted file mode 100644 index da0c7e2..0000000 --- a/oldtests/Headers/Setext.markdown +++ /dev/null @@ -1,17 +0,0 @@ -Level one -========= - -Two ---- - -In a paragraph - -Level two ---------- -more text - -====== -no empty headers - -not a header ------------- ----- diff --git a/oldtests/Links/AngleBrackets.html b/oldtests/Links/AngleBrackets.html deleted file mode 100644 index 21ac00d..0000000 --- a/oldtests/Links/AngleBrackets.html +++ /dev/null @@ -1,3 +0,0 @@ -

silly URL with angle brackets.

-

link.

-

link.

diff --git a/oldtests/Links/AngleBrackets.markdown b/oldtests/Links/AngleBrackets.markdown deleted file mode 100644 index c2e06ff..0000000 --- a/oldtests/Links/AngleBrackets.markdown +++ /dev/null @@ -1,7 +0,0 @@ -[silly URL with angle brackets](). - -[link]( "title"). - -[link][]. - -[link]: "title" diff --git a/oldtests/Links/AutoLinks.html b/oldtests/Links/AutoLinks.html deleted file mode 100644 index 092353f..0000000 --- a/oldtests/Links/AutoLinks.html +++ /dev/null @@ -1,7 +0,0 @@ -

http://google.com?query=blah&time=15 -someone.else@somedomain.com -ftp://old.ftp.server.edu -git://some.git.repo/project.git - -<http://not.an autolink> -<relative/not/autolink>

diff --git a/oldtests/Links/AutoLinks.markdown b/oldtests/Links/AutoLinks.markdown deleted file mode 100644 index bf95b8d..0000000 --- a/oldtests/Links/AutoLinks.markdown +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - - diff --git a/oldtests/Links/BackticksInLinks.html b/oldtests/Links/BackticksInLinks.html deleted file mode 100644 index ff70383..0000000 --- a/oldtests/Links/BackticksInLinks.html +++ /dev/null @@ -1 +0,0 @@ -

the right bracket character (])

diff --git a/oldtests/Links/BackticksInLinks.markdown b/oldtests/Links/BackticksInLinks.markdown deleted file mode 100644 index 539fd52..0000000 --- a/oldtests/Links/BackticksInLinks.markdown +++ /dev/null @@ -1 +0,0 @@ -[the right bracket character (`]`)](/url) diff --git a/oldtests/Links/CaseInsensitiveReferences.html b/oldtests/Links/CaseInsensitiveReferences.html deleted file mode 100644 index afe4557..0000000 --- a/oldtests/Links/CaseInsensitiveReferences.html +++ /dev/null @@ -1 +0,0 @@ -

Толпой is a Russian word.

diff --git a/oldtests/Links/CaseInsensitiveReferences.markdown b/oldtests/Links/CaseInsensitiveReferences.markdown deleted file mode 100644 index f9653b9..0000000 --- a/oldtests/Links/CaseInsensitiveReferences.markdown +++ /dev/null @@ -1,3 +0,0 @@ -[Толпой] is a Russian word. - -[ТОЛПОЙ]: /url diff --git a/oldtests/Links/Entities.html b/oldtests/Links/Entities.html deleted file mode 100644 index 252dadb..0000000 --- a/oldtests/Links/Entities.html +++ /dev/null @@ -1,2 +0,0 @@ -

http://göögle.com

-

hi

diff --git a/oldtests/Links/Entities.markdown b/oldtests/Links/Entities.markdown deleted file mode 100644 index d81ee36..0000000 --- a/oldtests/Links/Entities.markdown +++ /dev/null @@ -1,3 +0,0 @@ - - -[hi](/url "göögle & yahoo") diff --git a/oldtests/Links/InlineLinks.html b/oldtests/Links/InlineLinks.html deleted file mode 100644 index ae33f33..0000000 --- a/oldtests/Links/InlineLinks.html +++ /dev/null @@ -1,10 +0,0 @@ -
    -
  1. link
  2. -
  3. link
  4. -
  5. link
  6. -
  7. link with -linebreak.
  8. -
  9. link
  10. -
  11. [not a link] (/url)
  12. -
diff --git a/oldtests/Links/InlineLinks.markdown b/oldtests/Links/InlineLinks.markdown deleted file mode 100644 index a822c4d..0000000 --- a/oldtests/Links/InlineLinks.markdown +++ /dev/null @@ -1,9 +0,0 @@ -1. [link](/url) -2. [link](/url "title") -3. [link](/url - "title") -4. [link *with -linebreak*](
"title -with linebreak"). -5. [link](/url(withparens) 'and single quoted title') -6. [not a link] (/url) diff --git a/oldtests/Links/ParensInURLs.html b/oldtests/Links/ParensInURLs.html deleted file mode 100644 index 9cd6de7..0000000 --- a/oldtests/Links/ParensInURLs.html +++ /dev/null @@ -1,6 +0,0 @@ -

Inline link 1 with parens.

-

Inline link 2 with parens.

-

Inline link 3 with non-escaped parens.

-

Inline link 4 with non-escaped parens.

-

Reference link 1 with parens.

-

Reference link 2 with parens.

diff --git a/oldtests/Links/ParensInURLs.markdown b/oldtests/Links/ParensInURLs.markdown deleted file mode 100644 index bb7be4f..0000000 --- a/oldtests/Links/ParensInURLs.markdown +++ /dev/null @@ -1,14 +0,0 @@ -[Inline link 1 with parens](/url\(test\) "title"). - -[Inline link 2 with parens]( "title"). - -[Inline link 3 with non-escaped parens](/url(test) "title"). - -[Inline link 4 with non-escaped parens]( "title"). - -[Reference link 1 with parens][1]. - -[Reference link 2 with parens][2]. - - [1]: /url(test) "title" - [2]: "title" diff --git a/oldtests/Links/ReferenceLinks.html b/oldtests/Links/ReferenceLinks.html deleted file mode 100644 index 397cdb2..0000000 --- a/oldtests/Links/ReferenceLinks.html +++ /dev/null @@ -1,7 +0,0 @@ -
    -
  1. Link references can be defined anywhere.

  2. -
-
-

another one

-
diff --git a/oldtests/Links/ReferenceLinks.markdown b/oldtests/Links/ReferenceLinks.markdown deleted file mode 100644 index ebcf5a9..0000000 --- a/oldtests/Links/ReferenceLinks.markdown +++ /dev/null @@ -1,10 +0,0 @@ -1. [Link references] can be defined anywhere. - - [Link references]: /url - (even in a list item) - -> [another] one -> -> [another]: -> /foo "can break -> lines" diff --git a/oldtests/Lists/CodeBlocksInLists.html b/oldtests/Lists/CodeBlocksInLists.html deleted file mode 100644 index fcd3e2a..0000000 --- a/oldtests/Lists/CodeBlocksInLists.html +++ /dev/null @@ -1,14 +0,0 @@ -
    -
  1. list item -code

  2. -
  3. list item

    -
    code
    -
  4. -
  5. code
    -
  6. -
  7. code
    -
  8. -
  9. code
    -code
    -
  10. -
diff --git a/oldtests/Lists/CodeBlocksInLists.markdown b/oldtests/Lists/CodeBlocksInLists.markdown deleted file mode 100644 index 7730808..0000000 --- a/oldtests/Lists/CodeBlocksInLists.markdown +++ /dev/null @@ -1,18 +0,0 @@ -1. list item - code - -2. list item - ~~~ - code - ~~~ - -3. ~~~ - code - ~~~ - -4. ~~~ - code - ~~~ - -5. code - code diff --git a/oldtests/Lists/ConsecutiveLists.html b/oldtests/Lists/ConsecutiveLists.html deleted file mode 100644 index f8f9098..0000000 --- a/oldtests/Lists/ConsecutiveLists.html +++ /dev/null @@ -1,20 +0,0 @@ -
    -
  • one
  • -
  • one
  • -
-
    -
  • two
  • -
  • two
  • -
-
    -
  • three
  • -
  • three
  • -
-
    -
  1. four
  2. -
  3. four
  4. -
-
    -
  1. five
  2. -
  3. five
  4. -
diff --git a/oldtests/Lists/ConsecutiveLists.markdown b/oldtests/Lists/ConsecutiveLists.markdown deleted file mode 100644 index c4faa54..0000000 --- a/oldtests/Lists/ConsecutiveLists.markdown +++ /dev/null @@ -1,10 +0,0 @@ -* one -* one -+ two -+ two -- three -- three -1. four -1. four -1) five -1) five diff --git a/oldtests/Lists/EmptyListItem.html b/oldtests/Lists/EmptyListItem.html deleted file mode 100644 index 2c23fe1..0000000 --- a/oldtests/Lists/EmptyListItem.html +++ /dev/null @@ -1,10 +0,0 @@ -
    -
  • one
  • -
  • -
  • three
  • -
-
    -
  1. one
  2. -
  3. -
  4. three
  5. -
diff --git a/oldtests/Lists/EmptyListItem.markdown b/oldtests/Lists/EmptyListItem.markdown deleted file mode 100644 index d30cbc3..0000000 --- a/oldtests/Lists/EmptyListItem.markdown +++ /dev/null @@ -1,7 +0,0 @@ -- one -- -- three - -1. one -2. -3. three diff --git a/oldtests/Lists/InBlockquote.html b/oldtests/Lists/InBlockquote.html deleted file mode 100644 index da233e8..0000000 --- a/oldtests/Lists/InBlockquote.html +++ /dev/null @@ -1,22 +0,0 @@ -
-
    -
  • tight
  • -
  • tight
  • -
-
-
-
    -
  • loose

  • -
  • loose

  • -
-
-
-
    -
  • one-item list
  • -
-
-
-
    -
  • one-item list
  • -
-
diff --git a/oldtests/Lists/InBlockquote.markdown b/oldtests/Lists/InBlockquote.markdown deleted file mode 100644 index 511563b..0000000 --- a/oldtests/Lists/InBlockquote.markdown +++ /dev/null @@ -1,12 +0,0 @@ -> - tight -> - tight - - -> - loose -> -> - loose - - -> - one-item list - -> - one-item list diff --git a/oldtests/Lists/Indents.html b/oldtests/Lists/Indents.html deleted file mode 100644 index a11a5a6..0000000 --- a/oldtests/Lists/Indents.html +++ /dev/null @@ -1,22 +0,0 @@ -
-
    -
  • foo

    -

    bar

  • -
-
-
    -
  • one
  • -
  • two
  • -
-
    -
  • one
  • -
  • two
  • -
  • three
  • -
-
    -
  • one -
      -
    • two
    • -
  • -
  • three
  • -
diff --git a/oldtests/Lists/Indents.markdown b/oldtests/Lists/Indents.markdown deleted file mode 100644 index 293d112..0000000 --- a/oldtests/Lists/Indents.markdown +++ /dev/null @@ -1,17 +0,0 @@ - > * foo -> -> bar - - - - one - - two - - -- one - - two -- three - - -- one - - two -- three diff --git a/oldtests/Lists/ListsAndHRs.html b/oldtests/Lists/ListsAndHRs.html deleted file mode 100644 index 40826f7..0000000 --- a/oldtests/Lists/ListsAndHRs.html +++ /dev/null @@ -1,7 +0,0 @@ -
    -
  • item 1 -
      -
    • item 2
    • -
  • -
-
diff --git a/oldtests/Lists/ListsAndHRs.markdown b/oldtests/Lists/ListsAndHRs.markdown deleted file mode 100644 index 19c07e7..0000000 --- a/oldtests/Lists/ListsAndHRs.markdown +++ /dev/null @@ -1,3 +0,0 @@ -* item 1 - * item 2 -* * * * * diff --git a/oldtests/Lists/ListsAndSetextHeaders.html b/oldtests/Lists/ListsAndSetextHeaders.html deleted file mode 100644 index c6af9eb..0000000 --- a/oldtests/Lists/ListsAndSetextHeaders.html +++ /dev/null @@ -1,6 +0,0 @@ -
    -
  1. item
  2. -
  3. item -Not header
  4. -
-
diff --git a/oldtests/Lists/ListsAndSetextHeaders.markdown b/oldtests/Lists/ListsAndSetextHeaders.markdown deleted file mode 100644 index acfa655..0000000 --- a/oldtests/Lists/ListsAndSetextHeaders.markdown +++ /dev/null @@ -1,4 +0,0 @@ -1. item -2. item -Not header ----------- diff --git a/oldtests/Lists/MultipleBlankLines.html b/oldtests/Lists/MultipleBlankLines.html deleted file mode 100644 index d894db1..0000000 --- a/oldtests/Lists/MultipleBlankLines.html +++ /dev/null @@ -1,56 +0,0 @@ -
    -
  1. First Item

    -
      -
    • one

      -
        -
      • two
      • -
    • -
    • one

      -
        -
      • two
      • -
    • -
  2. -
  3. Second Item

    -
      -
    • one -
        -
      • two
      • -
    • -
  4. -
-
    -
  • one -
      -
    • two
    • -
  • -
-
    -
  1. Third Item

    -
      -
    • one -
        -
      • two
      • -
    • -
  2. -
-
    -
  • one -
      -
    • two
    • -
  • -
-
    -
  1. Fourth Item

    -
      -
    • one -
        -
      • two
      • -
    • -
  2. -
-
    -
  • one -
      -
    • two
    • -
  • -
diff --git a/oldtests/Lists/MultipleBlankLines.markdown b/oldtests/Lists/MultipleBlankLines.markdown deleted file mode 100644 index e24a4f2..0000000 --- a/oldtests/Lists/MultipleBlankLines.markdown +++ /dev/null @@ -1,37 +0,0 @@ -1. First Item - - * one - * two - - * one - * two - -1. Second Item - - * one - * two - - - * one - * two - -1. Third Item - - * one - * two - - - - * one - * two - -1. Fourth Item - - * one - * two - - - - - * one - * two diff --git a/oldtests/Lists/Start.html b/oldtests/Lists/Start.html deleted file mode 100644 index 46e0550..0000000 --- a/oldtests/Lists/Start.html +++ /dev/null @@ -1,11 +0,0 @@ -
    -
  1. this list starts with 4.
  2. -
  3. and continues
  4. -
  5. the continuation number is irrelevant.
  6. -
-
    -
  1. a space odyssey
  2. -
-
    -
  1. standard lists get no start attribute
  2. -
diff --git a/oldtests/Lists/Start.markdown b/oldtests/Lists/Start.markdown deleted file mode 100644 index 175226f..0000000 --- a/oldtests/Lists/Start.markdown +++ /dev/null @@ -1,7 +0,0 @@ -4. this list starts with 4. -5. and continues -1. the continuation number is irrelevant. - -2001) a space odyssey - -1. standard lists get no start attribute diff --git a/oldtests/Lists/Sublists.html b/oldtests/Lists/Sublists.html deleted file mode 100644 index af62915..0000000 --- a/oldtests/Lists/Sublists.html +++ /dev/null @@ -1,49 +0,0 @@ -

Four levels:

-
    -
  • one -
      -
    • two -
        -
      • three -
          -
        • four
        • -
      • -
    • -
  • -
-
    -
  1. one -
      -
    1. two -
        -
      1. three -
          -
        1. four
        2. -
      2. -
    2. -
  2. -
-
    -
  1. one -
      -
    • two -
        -
      1. three -
          -
        • four
        • -
      2. -
    • -
  2. -
-
    -
  • one -
      -
    • two -
        -
      1. three -
          -
        1. four
        2. -
      2. -
    • -
  • -
diff --git a/oldtests/Lists/Sublists.markdown b/oldtests/Lists/Sublists.markdown deleted file mode 100644 index 9eced9e..0000000 --- a/oldtests/Lists/Sublists.markdown +++ /dev/null @@ -1,24 +0,0 @@ -Four levels: - -- one - - two - - three - - four - - -1. one - 1. two - 1. three - 1. four - - -1) one - - two - 1) three - - four - - -- one - - two - 1. three - 1) four diff --git a/oldtests/Lists/TightAndLoose.html b/oldtests/Lists/TightAndLoose.html deleted file mode 100644 index 7792ebb..0000000 --- a/oldtests/Lists/TightAndLoose.html +++ /dev/null @@ -1,49 +0,0 @@ -
    -
  1. tight
  2. -
-
    -
  • tight
  • -
  • list
  • -
-
    -
  • loose

  • -
  • list

  • -
-
    -
  1. tight
  2. -
  3. list
  4. -
-
    -
  1. loose

  2. -
  3. list

  4. -
-
    -
  1. loose

    -
      -
    • sublist
    • -
  2. -
-
    -
  1. tight -
      -
    • sublist
    • -
  2. -
-
    -
  • tight -
    -

    blockquote -and

    -
    -
    code
    -
  • -
  • tight
  • -
-
    -
  • tight -
      -
    • with loose

    • -
    • sublist

    • -
  • -
  • tight
  • -
diff --git a/oldtests/Lists/TightAndLoose.markdown b/oldtests/Lists/TightAndLoose.markdown deleted file mode 100644 index 263a34c..0000000 --- a/oldtests/Lists/TightAndLoose.markdown +++ /dev/null @@ -1,45 +0,0 @@ -1. tight - - -- tight -- list - - -- loose - -- list - - -1. tight -2. list - - -1. loose - -2. list - - -1. loose - - - sublist - - - -1. tight - - sublist - - -- tight - > blockquote - and - ``` - code - ``` -- tight - - -- tight - - with loose - - - sublist -- tight diff --git a/oldtests/Lists/TightLooseBlockquote.html b/oldtests/Lists/TightLooseBlockquote.html deleted file mode 100644 index 7e78214..0000000 --- a/oldtests/Lists/TightLooseBlockquote.html +++ /dev/null @@ -1,32 +0,0 @@ -
    -
  • tight I -
    -

    bq

    -
  • -
  • tight I
  • -
-
    -
  • tight II -
    -

    bq

    -
    -foo
  • -
  • tight II
  • -
-
    -
  1. Blank lines in bq don't break list -
    -

    bq

    -
  2. -
  3. Should say (2) in output
  4. -
-
    -
  • Blank lines in bq don't break LI -
      -
    • item A -
      -

      bq

      -
    • -
    • item B
    • -
  • -
diff --git a/oldtests/Lists/TightLooseBlockquote.markdown b/oldtests/Lists/TightLooseBlockquote.markdown deleted file mode 100644 index 08200cc..0000000 --- a/oldtests/Lists/TightLooseBlockquote.markdown +++ /dev/null @@ -1,25 +0,0 @@ -* tight I - > bq - > -* tight I - - -* tight II - > bq - > - foo -* tight II - -1. Blank lines in bq don't break list - > bq - > - > -1. Should say (2) in output - -* Blank lines in bq don't break LI - * item A - > bq - > - > - * item B - \ No newline at end of file diff --git a/oldtests/Lists/TightLooseMore.html b/oldtests/Lists/TightLooseMore.html deleted file mode 100644 index f26f457..0000000 --- a/oldtests/Lists/TightLooseMore.html +++ /dev/null @@ -1,7 +0,0 @@ -
    -
  • foo

    -
      -
    • bar
    • -
    -

    blah

  • -
diff --git a/oldtests/Lists/TightLooseMore.markdown b/oldtests/Lists/TightLooseMore.markdown deleted file mode 100644 index 7ace63f..0000000 --- a/oldtests/Lists/TightLooseMore.markdown +++ /dev/null @@ -1,4 +0,0 @@ -* foo - * bar - - blah \ No newline at end of file diff --git a/oldtests/Lists/TwoBlankLinesEndList.html b/oldtests/Lists/TwoBlankLinesEndList.html deleted file mode 100644 index 629add1..0000000 --- a/oldtests/Lists/TwoBlankLinesEndList.html +++ /dev/null @@ -1,21 +0,0 @@ -
    -
  1. one

  2. -
  3. two

  4. -
-
    -
  1. new list
  2. -
-
-
    -
  • one

  • -
  • two

  • -
-
    -
  • new list
  • -
-
-
    -
  1. one
  2. -
-
code
-
diff --git a/oldtests/Lists/TwoBlankLinesEndList.markdown b/oldtests/Lists/TwoBlankLinesEndList.markdown deleted file mode 100644 index 2984a19..0000000 --- a/oldtests/Lists/TwoBlankLinesEndList.markdown +++ /dev/null @@ -1,20 +0,0 @@ -1. one - -2. two - - -1. new list - - -> - one -> -> - two -> -> -> - new list - - -1. one - - - code diff --git a/oldtests/Makefile b/oldtests/Makefile deleted file mode 100644 index c8a30bd..0000000 --- a/oldtests/Makefile +++ /dev/null @@ -1,55 +0,0 @@ -SHELL=/bin/bash -TESTDIR ?= * -PATT ?= . -TESTS=$(shell ls $(TESTDIR)/*.markdown | grep $(PATT)) -DIFFS=$(patsubst %.markdown,%.diff,$(TESTS)) -PROG ?= ../stmd -FILTER ?= perl -pe 's/ /␣/g' -TIDYCMD ?= tidy -asxhtml -utf8 --show-body-only yes --show-warnings no -quiet -DETAILS ?= 1 - -# Check to see if echo supports -e option to allow backslash escapes -ifeq ($(shell echo -e),-e) -ECHO=echo -else -ECHO=echo -e -endif - -all: $(DIFFS) - PASS=0;TESTS=0; \ - for f in $(DIFFS); do \ - let TESTS=TESTS+1; \ - [ -s $$f ] || let PASS=PASS+1; \ - done; \ - $(ECHO) "\033[1m$$PASS of $$TESTS tests passed.\033[0m"; \ - if [ $$TESTS -eq $$PASS ]; then exit 0; else exit 1; fi - -%.actual.html: %.markdown -ifeq ($(TIDY),1) - -cat $< | $(PROG) | $(TIDYCMD) > $@ -else - -cat $< | $(PROG) > $@ -endif - -%.expected.html: %.html -ifeq ($(TIDY),1) - -$(TIDYCMD) $< > $@ -else - cp $< $@ -endif - -%.diff: %.expected.html %.actual.html - diff --unified=1 <(cat $(word 1,$^) | $(FILTER)) <(cat $(word 2,$^) | $(FILTER)) > $@ ; \ - if [ -s $@ ]; then \ - $(ECHO) "\033[1;31m✘ $(patsubst %.diff,%,$@)\033[0m"; \ - if [ $(DETAILS) == "1" ]; then \ - $(ECHO) "\033[0;36m" ; cat $@; $(ECHO) "\033[0m"; \ - fi \ - else \ - $(ECHO) "\033[1;32m✓ $(patsubst %.diff,%,$@)\033[0m"; \ - fi - -.PHONY: all clean - -clean: - -@rm */*.{diff,actual.html,expected.html} diff --git a/oldtests/Misc/BackslashEscapes.html b/oldtests/Misc/BackslashEscapes.html deleted file mode 100644 index 3eb2aed..0000000 --- a/oldtests/Misc/BackslashEscapes.html +++ /dev/null @@ -1,14 +0,0 @@ -

*not emphasized* -\emphasis -**not bold** -<br/> not a tag -[link](/foo) not a link -link -`not code`

-

1. not a list item

-

* not a list.

-

# Not a header

-

[foo]: /url "not a reference"

-

$ ^ ; can be escaped. -\a \b \T cannot. -unicode letters and symbols cannot: \π \‥.

diff --git a/oldtests/Misc/BackslashEscapes.markdown b/oldtests/Misc/BackslashEscapes.markdown deleted file mode 100644 index 23496dc..0000000 --- a/oldtests/Misc/BackslashEscapes.markdown +++ /dev/null @@ -1,19 +0,0 @@ -\*not emphasized* -\\*emphasis* -\*\*not bold** -\
not a tag -\[link](/foo) not a link -[link](/foo\) "title\"") -\`not code` - -1\. not a list item - -\* not a list. - -\# Not a header - -\[foo]: /url "not a reference" - -\$ \^ \; can be escaped. -\a \b \T cannot. -unicode letters and symbols cannot: \π \‥. diff --git a/oldtests/Misc/Laziness.html b/oldtests/Misc/Laziness.html deleted file mode 100644 index e130eb5..0000000 --- a/oldtests/Misc/Laziness.html +++ /dev/null @@ -1,22 +0,0 @@ -
-
    -
  1. one -two
  2. -
-
-

Laziness only affects paragraph continuations:

-
-
code
-
-
-
not same code block
-
-
    -
  1. hello
  2. -
-
-
-
-
-

code

-
diff --git a/oldtests/Misc/Laziness.markdown b/oldtests/Misc/Laziness.markdown deleted file mode 100644 index 2c32870..0000000 --- a/oldtests/Misc/Laziness.markdown +++ /dev/null @@ -1,14 +0,0 @@ -> 1. one -> two - -Laziness only affects paragraph continuations: - -> code - not same code block - -1. hello ------ - -> ``` -code -``` diff --git a/oldtests/Misc/LineBreaks.html b/oldtests/Misc/LineBreaks.html deleted file mode 100644 index 2d85e85..0000000 --- a/oldtests/Misc/LineBreaks.html +++ /dev/null @@ -1,11 +0,0 @@ -

Two spaces
-break a line. Or more than two
-and spaces in the following line are absorbed.

-

You can also break lines with
-a backslash.

-

Two spaces at the end of a paragraph are -not a line break.

-

A backslash at the end of a paragraph is -not a line break.\

-

Similarly with setext headers

-

And with backslashes\

diff --git a/oldtests/Misc/LineBreaks.markdown b/oldtests/Misc/LineBreaks.markdown deleted file mode 100644 index 3632dcb..0000000 --- a/oldtests/Misc/LineBreaks.markdown +++ /dev/null @@ -1,18 +0,0 @@ -Two spaces -break a line. Or more than two - and spaces in the following line are absorbed. - -You can also break lines with\ -a backslash. - -Two spaces at the end of a paragraph are -not a line break. - -A backslash at the end of a paragraph is -not a line break.\ - -Similarly with setext headers -------------------------------- - -And with backslashes\ ---------------------- diff --git a/oldtests/Misc/Transitions.html b/oldtests/Misc/Transitions.html deleted file mode 100644 index fceff9f..0000000 --- a/oldtests/Misc/Transitions.html +++ /dev/null @@ -1,26 +0,0 @@ -
-

blockquote

-
-

blockquote

-
-
-
    -
  1. list
  2. -
  3. list -
      -
    • sublist
    • -
  4. -
-
-

paragraph

-

header

-

header

-
code
-
-
code
-
-
-
-# not a header -
-
diff --git a/oldtests/Misc/Transitions.markdown b/oldtests/Misc/Transitions.markdown deleted file mode 100644 index 5f3a9d3..0000000 --- a/oldtests/Misc/Transitions.markdown +++ /dev/null @@ -1,20 +0,0 @@ -> blockquote -> > blockquote -1. list -2. list - - sublist -* * * * * -paragraph - -header ------- -### header - code -``` -code -``` -
-
-# not a header -
-
diff --git a/oldtests/Original/Amps_and_angle_encoding.html b/oldtests/Original/Amps_and_angle_encoding.html deleted file mode 100644 index fc1b2c3..0000000 --- a/oldtests/Original/Amps_and_angle_encoding.html +++ /dev/null @@ -1,9 +0,0 @@ -

AT&T has an ampersand in their name.

-

AT&T is another way to write it.

-

This & that.

-

4 < 5.

-

6 > 5.

-

Here's a link with an ampersand in the URL.

-

Here's a link with an amersand in the link text: AT&T.

-

Here's an inline link.

-

Here's an inline link.

diff --git a/oldtests/Original/Amps_and_angle_encoding.markdown b/oldtests/Original/Amps_and_angle_encoding.markdown deleted file mode 100644 index 0e9527f..0000000 --- a/oldtests/Original/Amps_and_angle_encoding.markdown +++ /dev/null @@ -1,21 +0,0 @@ -AT&T has an ampersand in their name. - -AT&T is another way to write it. - -This & that. - -4 < 5. - -6 > 5. - -Here's a [link] [1] with an ampersand in the URL. - -Here's a link with an amersand in the link text: [AT&T] [2]. - -Here's an inline [link](/script?foo=1&bar=2). - -Here's an inline [link](). - - -[1]: http://example.com/?foo=1&bar=2 -[2]: http://att.com/ "AT&T" \ No newline at end of file diff --git a/oldtests/Original/Auto_links.html b/oldtests/Original/Auto_links.html deleted file mode 100644 index f517fe6..0000000 --- a/oldtests/Original/Auto_links.html +++ /dev/null @@ -1,13 +0,0 @@ -

Link: http://example.com/.

-

With an ampersand: http://example.com/?foo=1&bar=2

- -
-

Blockquoted: http://example.com/

-
-

Auto-links should not occur here: <http://example.com/>

-
or here: <http://example.com/>
-
diff --git a/oldtests/Original/Auto_links.markdown b/oldtests/Original/Auto_links.markdown deleted file mode 100644 index abbc488..0000000 --- a/oldtests/Original/Auto_links.markdown +++ /dev/null @@ -1,13 +0,0 @@ -Link: . - -With an ampersand: - -* In a list? -* -* It should. - -> Blockquoted: - -Auto-links should not occur here: `` - - or here: \ No newline at end of file diff --git a/oldtests/Original/Backslash_escapes.html b/oldtests/Original/Backslash_escapes.html deleted file mode 100644 index 9a83379..0000000 --- a/oldtests/Original/Backslash_escapes.html +++ /dev/null @@ -1,75 +0,0 @@ -

These should all get escaped:

-

Backslash: \

-

Backtick: `

-

Asterisk: *

-

Underscore: _

-

Left brace: {

-

Right brace: }

-

Left bracket: [

-

Right bracket: ]

-

Left paren: (

-

Right paren: )

-

Greater-than: >

-

Hash: #

-

Period: .

-

Bang: !

-

Plus: +

-

Minus: -

-

These should not, because they occur within a code block:

-
Backslash: \\
-
-Backtick: \`
-
-Asterisk: \*
-
-Underscore: \_
-
-Left brace: \{
-
-Right brace: \}
-
-Left bracket: \[
-
-Right bracket: \]
-
-Left paren: \(
-
-Right paren: \)
-
-Greater-than: \>
-
-Hash: \#
-
-Period: \.
-
-Bang: \!
-
-Plus: \+
-
-Minus: \-
-
-

Nor should these, which occur in code spans:

-

Backslash: \\

-

Backtick: \`

-

Asterisk: \*

-

Underscore: \_

-

Left brace: \{

-

Right brace: \}

-

Left bracket: \[

-

Right bracket: \]

-

Left paren: \(

-

Right paren: \)

-

Greater-than: \>

-

Hash: \#

-

Period: \.

-

Bang: \!

-

Plus: \+

-

Minus: \-

-

These should get escaped, even though they're matching pairs for -other Markdown constructs:

-

*asterisks*

-

_underscores_

-

`backticks`

-

This is a code span with a literal backslash-backtick sequence: \`

-

This is a tag with unescaped backticks bar.

-

This is a tag with backslashes bar.

diff --git a/oldtests/Original/Backslash_escapes.markdown b/oldtests/Original/Backslash_escapes.markdown deleted file mode 100644 index 5b014cb..0000000 --- a/oldtests/Original/Backslash_escapes.markdown +++ /dev/null @@ -1,120 +0,0 @@ -These should all get escaped: - -Backslash: \\ - -Backtick: \` - -Asterisk: \* - -Underscore: \_ - -Left brace: \{ - -Right brace: \} - -Left bracket: \[ - -Right bracket: \] - -Left paren: \( - -Right paren: \) - -Greater-than: \> - -Hash: \# - -Period: \. - -Bang: \! - -Plus: \+ - -Minus: \- - - - -These should not, because they occur within a code block: - - Backslash: \\ - - Backtick: \` - - Asterisk: \* - - Underscore: \_ - - Left brace: \{ - - Right brace: \} - - Left bracket: \[ - - Right bracket: \] - - Left paren: \( - - Right paren: \) - - Greater-than: \> - - Hash: \# - - Period: \. - - Bang: \! - - Plus: \+ - - Minus: \- - - -Nor should these, which occur in code spans: - -Backslash: `\\` - -Backtick: `` \` `` - -Asterisk: `\*` - -Underscore: `\_` - -Left brace: `\{` - -Right brace: `\}` - -Left bracket: `\[` - -Right bracket: `\]` - -Left paren: `\(` - -Right paren: `\)` - -Greater-than: `\>` - -Hash: `\#` - -Period: `\.` - -Bang: `\!` - -Plus: `\+` - -Minus: `\-` - - -These should get escaped, even though they're matching pairs for -other Markdown constructs: - -\*asterisks\* - -\_underscores\_ - -\`backticks\` - -This is a code span with a literal backslash-backtick sequence: `` \` `` - -This is a tag with unescaped backticks bar. - -This is a tag with backslashes bar. diff --git a/oldtests/Original/Blockquotes_with_code_blocks.html b/oldtests/Original/Blockquotes_with_code_blocks.html deleted file mode 100644 index fd1cb1b..0000000 --- a/oldtests/Original/Blockquotes_with_code_blocks.html +++ /dev/null @@ -1,12 +0,0 @@ -
-

Example:

-
sub status {
-    print "working";
-}
-
-

Or:

-
sub status {
-    return "working";
-}
-
-
diff --git a/oldtests/Original/Blockquotes_with_code_blocks.markdown b/oldtests/Original/Blockquotes_with_code_blocks.markdown deleted file mode 100644 index c31d171..0000000 --- a/oldtests/Original/Blockquotes_with_code_blocks.markdown +++ /dev/null @@ -1,11 +0,0 @@ -> Example: -> -> sub status { -> print "working"; -> } -> -> Or: -> -> sub status { -> return "working"; -> } diff --git a/oldtests/Original/Code_Blocks.html b/oldtests/Original/Code_Blocks.html deleted file mode 100644 index 7d89615..0000000 --- a/oldtests/Original/Code_Blocks.html +++ /dev/null @@ -1,12 +0,0 @@ -
code block on the first line
-
-

Regular text.

-
code block indented by spaces
-
-

Regular text.

-
the lines in this block  
-all contain trailing spaces  
-
-

Regular Text.

-
code block on the last line
-
diff --git a/oldtests/Original/Code_Blocks.markdown b/oldtests/Original/Code_Blocks.markdown deleted file mode 100644 index b54b092..0000000 --- a/oldtests/Original/Code_Blocks.markdown +++ /dev/null @@ -1,14 +0,0 @@ - code block on the first line - -Regular text. - - code block indented by spaces - -Regular text. - - the lines in this block - all contain trailing spaces - -Regular Text. - - code block on the last line \ No newline at end of file diff --git a/oldtests/Original/Code_Spans.html b/oldtests/Original/Code_Spans.html deleted file mode 100644 index 27acea1..0000000 --- a/oldtests/Original/Code_Spans.html +++ /dev/null @@ -1,3 +0,0 @@ -

<test a=" content of attribute ">

-

Fix for backticks within HTML tag: like this

-

Here's how you put `backticks` in a code span.

diff --git a/oldtests/Original/Code_Spans.markdown b/oldtests/Original/Code_Spans.markdown deleted file mode 100644 index 5c229c7..0000000 --- a/oldtests/Original/Code_Spans.markdown +++ /dev/null @@ -1,5 +0,0 @@ -`` - -Fix for backticks within HTML tag: like this - -Here's how you put `` `backticks` `` in a code span. \ No newline at end of file diff --git a/oldtests/Original/Horizontal_rules.html b/oldtests/Original/Horizontal_rules.html deleted file mode 100644 index a89efdb..0000000 --- a/oldtests/Original/Horizontal_rules.html +++ /dev/null @@ -1,39 +0,0 @@ -

Dashes:

-
-
-
-
-
---
-
-
-
-
-
-
- - -
-
-

Asterisks:

-
-
-
-
-
***
-
-
-
-
-
-
* * *
-
-

Underscores:

-
-
-
-
-
___
-
-
-
-
-
-
_ _ _
-
diff --git a/oldtests/Original/Horizontal_rules.markdown b/oldtests/Original/Horizontal_rules.markdown deleted file mode 100644 index 1594bda..0000000 --- a/oldtests/Original/Horizontal_rules.markdown +++ /dev/null @@ -1,67 +0,0 @@ -Dashes: - ---- - - --- - - --- - - --- - - --- - -- - - - - - - - - - - - - - - - - - - - - - - - - -Asterisks: - -*** - - *** - - *** - - *** - - *** - -* * * - - * * * - - * * * - - * * * - - * * * - - -Underscores: - -___ - - ___ - - ___ - - ___ - - ___ - -_ _ _ - - _ _ _ - - _ _ _ - - _ _ _ - - _ _ _ diff --git a/oldtests/Original/Images.html b/oldtests/Original/Images.html deleted file mode 100644 index bd5a7e0..0000000 --- a/oldtests/Original/Images.html +++ /dev/null @@ -1,11 +0,0 @@ -

Alt text

-

Alt text

-

Inline within a paragraph: alt text.

-

alt text

-

alt text

-

alt text

-

alt text.

-

Empty

-

this is a stupid URL

-

alt text

-

alt text

diff --git a/oldtests/Original/Images.markdown b/oldtests/Original/Images.markdown deleted file mode 100644 index 5707590..0000000 --- a/oldtests/Original/Images.markdown +++ /dev/null @@ -1,26 +0,0 @@ -![Alt text](/path/to/img.jpg) - -![Alt text](/path/to/img.jpg "Optional title") - -Inline within a paragraph: [alt text](/url/). - -![alt text](/url/ "title preceded by two spaces") - -![alt text](/url/ "title has spaces afterward" ) - -![alt text]() - -![alt text]( "with a title"). - -![Empty]() - -![this is a stupid URL](http://example.com/(parens).jpg) - - -![alt text][foo] - - [foo]: /url/ - -![alt text][bar] - - [bar]: /url/ "Title here" \ No newline at end of file diff --git a/oldtests/Original/Inline_HTML_Advanced.html b/oldtests/Original/Inline_HTML_Advanced.html deleted file mode 100644 index 631c135..0000000 --- a/oldtests/Original/Inline_HTML_Advanced.html +++ /dev/null @@ -1,23 +0,0 @@ -

Simple block on one line:

-
foo
-

And nested without indentation:

-
-
-
-foo -
-
-
-
bar
-
-

And with attributes:

-
-
-
-
-

This was broken in 1.0.2b7:

-
-
-foo -
-
diff --git a/oldtests/Original/Inline_HTML_Advanced.markdown b/oldtests/Original/Inline_HTML_Advanced.markdown deleted file mode 100644 index 3633f81..0000000 --- a/oldtests/Original/Inline_HTML_Advanced.markdown +++ /dev/null @@ -1,30 +0,0 @@ -Simple block on one line: - -
foo
- -And nested without indentation: - -
-
-
-foo -
-
-
-
bar
-
- -And with attributes: - -
-
-
-
- -This was broken in 1.0.2b7: - -
-
-foo -
-
diff --git a/oldtests/Original/Inline_HTML_Simple.html b/oldtests/Original/Inline_HTML_Simple.html deleted file mode 100644 index 923a18c..0000000 --- a/oldtests/Original/Inline_HTML_Simple.html +++ /dev/null @@ -1,45 +0,0 @@ -

Here's a simple block:

-
- foo -
-

This should be a code block, though:

-
<div>
-    foo
-</div>
-
-

As should this:

-
<div>foo</div>
-
-

Now, nested:

-
-
-
- foo -
-
-
-

This should just be an HTML comment:

- -

Multiline:

- -

Code block:

-
<!-- Comment -->
-
-

Just plain comment, with trailing spaces on the line:

- -

Code:

-
<hr />
-
-

Hr's:

-
-
-
-
-
-
-
-
-
diff --git a/oldtests/Original/Inline_HTML_Simple.markdown b/oldtests/Original/Inline_HTML_Simple.markdown deleted file mode 100644 index 14aa2dc..0000000 --- a/oldtests/Original/Inline_HTML_Simple.markdown +++ /dev/null @@ -1,69 +0,0 @@ -Here's a simple block: - -
- foo -
- -This should be a code block, though: - -
- foo -
- -As should this: - -
foo
- -Now, nested: - -
-
-
- foo -
-
-
- -This should just be an HTML comment: - - - -Multiline: - - - -Code block: - - - -Just plain comment, with trailing spaces on the line: - - - -Code: - -
- -Hr's: - -
- -
- -
- -
- -
- -
- -
- -
- -
- diff --git a/oldtests/Original/Inline_HTML_comments.html b/oldtests/Original/Inline_HTML_comments.html deleted file mode 100644 index ebc4818..0000000 --- a/oldtests/Original/Inline_HTML_comments.html +++ /dev/null @@ -1,8 +0,0 @@ -

Paragraph one.

- - -

Paragraph two.

- -

The end.

diff --git a/oldtests/Original/Inline_HTML_comments.markdown b/oldtests/Original/Inline_HTML_comments.markdown deleted file mode 100644 index 41d830d..0000000 --- a/oldtests/Original/Inline_HTML_comments.markdown +++ /dev/null @@ -1,13 +0,0 @@ -Paragraph one. - - - - - -Paragraph two. - - - -The end. diff --git a/oldtests/Original/Links_inline_style.html b/oldtests/Original/Links_inline_style.html deleted file mode 100644 index feb4637..0000000 --- a/oldtests/Original/Links_inline_style.html +++ /dev/null @@ -1,12 +0,0 @@ -

Just a URL.

-

URL and title.

-

URL and title.

-

URL and title.

-

URL and title.

-

URL wrapped in angle brackets.

-

URL w/ angle brackets + title.

-

Empty.

-

With parens in the URL

-

(With outer parens and parens in url)

-

With parens in the URL

-

(With outer parens and parens in url)

diff --git a/oldtests/Original/Links_inline_style.markdown b/oldtests/Original/Links_inline_style.markdown deleted file mode 100644 index aba9658..0000000 --- a/oldtests/Original/Links_inline_style.markdown +++ /dev/null @@ -1,24 +0,0 @@ -Just a [URL](/url/). - -[URL and title](/url/ "title"). - -[URL and title](/url/ "title preceded by two spaces"). - -[URL and title](/url/ "title preceded by a tab"). - -[URL and title](/url/ "title has spaces afterward" ). - -[URL wrapped in angle brackets](). - -[URL w/ angle brackets + title]( "Here's the title"). - -[Empty](). - -[With parens in the URL](http://en.wikipedia.org/wiki/WIMP_(computing)) - -(With outer parens and [parens in url](/foo(bar))) - - -[With parens in the URL](/foo(bar) "and a title") - -(With outer parens and [parens in url](/foo(bar) "and a title")) diff --git a/oldtests/Original/Links_reference_style.html b/oldtests/Original/Links_reference_style.html deleted file mode 100644 index 6d78b96..0000000 --- a/oldtests/Original/Links_reference_style.html +++ /dev/null @@ -1,28 +0,0 @@ -

Foo bar.

-

Foo bar.

-

Foo bar.

-

With embedded [brackets].

-

Indented once.

-

Indented twice.

-

Indented thrice.

-

Indented [four][] times.

-
[four]: /url
-
-
-

this should work

-

So should this.

-

And this.

-

And this.

-

And this.

-

But not [that] [].

-

Nor [that][].

-

Nor [that].

-

[Something in brackets like this should work]

-

[Same with this.]

-

In this case, this points to something else.

-

Backslashing should suppress [this] and [this].

-
-

Here's one where the link -breaks across lines.

-

Here's another where the link -breaks across lines, but with a line-ending space.

diff --git a/oldtests/Original/Links_reference_style.markdown b/oldtests/Original/Links_reference_style.markdown deleted file mode 100644 index 341ec88..0000000 --- a/oldtests/Original/Links_reference_style.markdown +++ /dev/null @@ -1,71 +0,0 @@ -Foo [bar] [1]. - -Foo [bar][1]. - -Foo [bar] -[1]. - -[1]: /url/ "Title" - - -With [embedded [brackets]] [b]. - - -Indented [once][]. - -Indented [twice][]. - -Indented [thrice][]. - -Indented [four][] times. - - [once]: /url - - [twice]: /url - - [thrice]: /url - - [four]: /url - - -[b]: /url/ - -* * * - -[this] [this] should work - -So should [this][this]. - -And [this] []. - -And [this][]. - -And [this]. - -But not [that] []. - -Nor [that][]. - -Nor [that]. - -[Something in brackets like [this][] should work] - -[Same with [this].] - -In this case, [this](/somethingelse/) points to something else. - -Backslashing should suppress \[this] and [this\]. - -[this]: foo - - -* * * - -Here's one where the [link -breaks] across lines. - -Here's another where the [link -breaks] across lines, but with a line-ending space. - - -[link breaks]: /url/ diff --git a/oldtests/Original/Links_shortcut_references.html b/oldtests/Original/Links_shortcut_references.html deleted file mode 100644 index 8163ade..0000000 --- a/oldtests/Original/Links_shortcut_references.html +++ /dev/null @@ -1,6 +0,0 @@ -

This is the simple case.

-

This one has a line -break.

-

This one has a line -break with a line-ending space.

-

this and the other

diff --git a/oldtests/Original/Links_shortcut_references.markdown b/oldtests/Original/Links_shortcut_references.markdown deleted file mode 100644 index 8c44c98..0000000 --- a/oldtests/Original/Links_shortcut_references.markdown +++ /dev/null @@ -1,20 +0,0 @@ -This is the [simple case]. - -[simple case]: /simple - - - -This one has a [line -break]. - -This one has a [line -break] with a line-ending space. - -[line break]: /foo - - -[this] [that] and the [other] - -[this]: /this -[that]: /that -[other]: /other diff --git a/oldtests/Original/Literal_quotes_in_titles.html b/oldtests/Original/Literal_quotes_in_titles.html deleted file mode 100644 index 62e8641..0000000 --- a/oldtests/Original/Literal_quotes_in_titles.html +++ /dev/null @@ -1,2 +0,0 @@ -

Foo bar.

-

Foo bar.

diff --git a/oldtests/Original/Literal_quotes_in_titles.markdown b/oldtests/Original/Literal_quotes_in_titles.markdown deleted file mode 100644 index 29d0e42..0000000 --- a/oldtests/Original/Literal_quotes_in_titles.markdown +++ /dev/null @@ -1,7 +0,0 @@ -Foo [bar][]. - -Foo [bar](/url/ "Title with "quotes" inside"). - - - [bar]: /url/ "Title with "quotes" inside" - diff --git a/oldtests/Original/Markdown_Documentation_Basics.html b/oldtests/Original/Markdown_Documentation_Basics.html deleted file mode 100644 index 0dee67f..0000000 --- a/oldtests/Original/Markdown_Documentation_Basics.html +++ /dev/null @@ -1,242 +0,0 @@ -

Markdown: Basics

- -

Getting the Gist of Markdown's Formatting Syntax

-

This page offers a brief overview of what it's like to use Markdown. -The syntax page provides complete, detailed documentation for -every feature, but Markdown should be very easy to pick up simply by -looking at a few examples of it in action. The examples on this page -are written in a before/after style, showing example syntax and the -HTML output produced by Markdown.

-

It's also helpful to simply try Markdown out; the Dingus is a -web application that allows you type your own Markdown-formatted text -and translate it to XHTML.

-

Note: This document is itself written using Markdown; you -can see the source for it by adding '.text' to the URL.

-

Paragraphs, Headers, Blockquotes

-

A paragraph is simply one or more consecutive lines of text, separated -by one or more blank lines. (A blank line is any line that looks like a -blank line -- a line containing nothing spaces or tabs is considered -blank.) Normal paragraphs should not be intended with spaces or tabs.

-

Markdown offers two styles of headers: Setext and atx. -Setext-style headers for <h1> and <h2> are created by -"underlining" with equal signs (=) and hyphens (-), respectively. -To create an atx-style header, you put 1-6 hash marks (#) at the -beginning of the line -- the number of hashes equals the resulting -HTML header level.

-

Blockquotes are indicated using email-style '>' angle brackets.

-

Markdown:

-
A First Level Header
-====================
-
-A Second Level Header
----------------------
-
-Now is the time for all good men to come to
-the aid of their country. This is just a
-regular paragraph.
-
-The quick brown fox jumped over the lazy
-dog's back.
-
-### Header 3
-
-> This is a blockquote.
->
-> This is the second paragraph in the blockquote.
->
-> ## This is an H2 in a blockquote
-
-

Output:

-
<h1>A First Level Header</h1>
-
-<h2>A Second Level Header</h2>
-
-<p>Now is the time for all good men to come to
-the aid of their country. This is just a
-regular paragraph.</p>
-
-<p>The quick brown fox jumped over the lazy
-dog's back.</p>
-
-<h3>Header 3</h3>
-
-<blockquote>
-    <p>This is a blockquote.</p>
-
-    <p>This is the second paragraph in the blockquote.</p>
-
-    <h2>This is an H2 in a blockquote</h2>
-</blockquote>
-
-

Phrase Emphasis

-

Markdown uses asterisks and underscores to indicate spans of emphasis.

-

Markdown:

-
Some of these words *are emphasized*.
-Some of these words _are emphasized also_.
-
-Use two asterisks for **strong emphasis**.
-Or, if you prefer, __use two underscores instead__.
-
-

Output:

-
<p>Some of these words <em>are emphasized</em>.
-Some of these words <em>are emphasized also</em>.</p>
-
-<p>Use two asterisks for <strong>strong emphasis</strong>.
-Or, if you prefer, <strong>use two underscores instead</strong>.</p>
-
-

Lists

-

Unordered (bulleted) lists use asterisks, pluses, and hyphens (*, -+, and -) as list markers. These three markers are -interchangable; this:

-
*   Candy.
-*   Gum.
-*   Booze.
-
-

this:

-
+   Candy.
-+   Gum.
-+   Booze.
-
-

and this:

-
-   Candy.
--   Gum.
--   Booze.
-
-

all produce the same output:

-
<ul>
-<li>Candy.</li>
-<li>Gum.</li>
-<li>Booze.</li>
-</ul>
-
-

Ordered (numbered) lists use regular numbers, followed by periods, as -list markers:

-
1.  Red
-2.  Green
-3.  Blue
-
-

Output:

-
<ol>
-<li>Red</li>
-<li>Green</li>
-<li>Blue</li>
-</ol>
-
-

If you put blank lines between items, you'll get <p> tags for the -list item text. You can create multi-paragraph list items by indenting -the paragraphs by 4 spaces or 1 tab:

-
*   A list item.
-
-    With multiple paragraphs.
-
-*   Another item in the list.
-
-

Output:

-
<ul>
-<li><p>A list item.</p>
-<p>With multiple paragraphs.</p></li>
-<li><p>Another item in the list.</p></li>
-</ul>
-
-

Links

-

Markdown supports two styles for creating links: inline and -reference. With both styles, you use square brackets to delimit the -text you want to turn into a link.

-

Inline-style links use parentheses immediately after the link text. -For example:

-
This is an [example link](http://example.com/).
-
-

Output:

-
<p>This is an <a href="http://example.com/">
-example link</a>.</p>
-
-

Optionally, you may include a title attribute in the parentheses:

-
This is an [example link](http://example.com/ "With a Title").
-
-

Output:

-
<p>This is an <a href="http://example.com/" title="With a Title">
-example link</a>.</p>
-
-

Reference-style links allow you to refer to your links by names, which -you define elsewhere in your document:

-
I get 10 times more traffic from [Google][1] than from
-[Yahoo][2] or [MSN][3].
-
-[1]: http://google.com/        "Google"
-[2]: http://search.yahoo.com/  "Yahoo Search"
-[3]: http://search.msn.com/    "MSN Search"
-
-

Output:

-
<p>I get 10 times more traffic from <a href="http://google.com/"
-title="Google">Google</a> than from <a href="http://search.yahoo.com/"
-title="Yahoo Search">Yahoo</a> or <a href="http://search.msn.com/"
-title="MSN Search">MSN</a>.</p>
-
-

The title attribute is optional. Link names may contain letters, -numbers and spaces, but are not case sensitive:

-
I start my morning with a cup of coffee and
-[The New York Times][NY Times].
-
-[ny times]: http://www.nytimes.com/
-
-

Output:

-
<p>I start my morning with a cup of coffee and
-<a href="http://www.nytimes.com/">The New York Times</a>.</p>
-
-

Images

-

Image syntax is very much like link syntax.

-

Inline (titles are optional):

-
![alt text](/path/to/img.jpg "Title")
-
-

Reference-style:

-
![alt text][id]
-
-[id]: /path/to/img.jpg "Title"
-
-

Both of the above examples produce the same output:

-
<img src="/path/to/img.jpg" alt="alt text" title="Title" />
-
-

Code

-

In a regular paragraph, you can create code span by wrapping text in -backtick quotes. Any ampersands (&) and angle brackets (< or ->) will automatically be translated into HTML entities. This makes -it easy to use Markdown to write about HTML example code:

-
I strongly recommend against using any `<blink>` tags.
-
-I wish SmartyPants used named entities like `&mdash;`
-instead of decimal-encoded entites like `&#8212;`.
-
-

Output:

-
<p>I strongly recommend against using any
-<code>&lt;blink&gt;</code> tags.</p>
-
-<p>I wish SmartyPants used named entities like
-<code>&amp;mdash;</code> instead of decimal-encoded
-entites like <code>&amp;#8212;</code>.</p>
-
-

To specify an entire block of pre-formatted code, indent every line of -the block by 4 spaces or 1 tab. Just like with code spans, &, <, -and > characters will be escaped automatically.

-

Markdown:

-
If you want your page to validate under XHTML 1.0 Strict,
-you've got to put paragraph tags in your blockquotes:
-
-    <blockquote>
-        <p>For example.</p>
-    </blockquote>
-
-

Output:

-
<p>If you want your page to validate under XHTML 1.0 Strict,
-you've got to put paragraph tags in your blockquotes:</p>
-
-<pre><code>&lt;blockquote&gt;
-    &lt;p&gt;For example.&lt;/p&gt;
-&lt;/blockquote&gt;
-</code></pre>
-
diff --git a/oldtests/Original/Markdown_Documentation_Basics.markdown b/oldtests/Original/Markdown_Documentation_Basics.markdown deleted file mode 100644 index 24eba65..0000000 --- a/oldtests/Original/Markdown_Documentation_Basics.markdown +++ /dev/null @@ -1,306 +0,0 @@ -Markdown: Basics -================ - - - - -Getting the Gist of Markdown's Formatting Syntax ------------------------------------------------- - -This page offers a brief overview of what it's like to use Markdown. -The [syntax page] [s] provides complete, detailed documentation for -every feature, but Markdown should be very easy to pick up simply by -looking at a few examples of it in action. The examples on this page -are written in a before/after style, showing example syntax and the -HTML output produced by Markdown. - -It's also helpful to simply try Markdown out; the [Dingus] [d] is a -web application that allows you type your own Markdown-formatted text -and translate it to XHTML. - -**Note:** This document is itself written using Markdown; you -can [see the source for it by adding '.text' to the URL] [src]. - - [s]: /projects/markdown/syntax "Markdown Syntax" - [d]: /projects/markdown/dingus "Markdown Dingus" - [src]: /projects/markdown/basics.text - - -## Paragraphs, Headers, Blockquotes ## - -A paragraph is simply one or more consecutive lines of text, separated -by one or more blank lines. (A blank line is any line that looks like a -blank line -- a line containing nothing spaces or tabs is considered -blank.) Normal paragraphs should not be intended with spaces or tabs. - -Markdown offers two styles of headers: *Setext* and *atx*. -Setext-style headers for `

` and `

` are created by -"underlining" with equal signs (`=`) and hyphens (`-`), respectively. -To create an atx-style header, you put 1-6 hash marks (`#`) at the -beginning of the line -- the number of hashes equals the resulting -HTML header level. - -Blockquotes are indicated using email-style '`>`' angle brackets. - -Markdown: - - A First Level Header - ==================== - - A Second Level Header - --------------------- - - Now is the time for all good men to come to - the aid of their country. This is just a - regular paragraph. - - The quick brown fox jumped over the lazy - dog's back. - - ### Header 3 - - > This is a blockquote. - > - > This is the second paragraph in the blockquote. - > - > ## This is an H2 in a blockquote - - -Output: - -

A First Level Header

- -

A Second Level Header

- -

Now is the time for all good men to come to - the aid of their country. This is just a - regular paragraph.

- -

The quick brown fox jumped over the lazy - dog's back.

- -

Header 3

- -
-

This is a blockquote.

- -

This is the second paragraph in the blockquote.

- -

This is an H2 in a blockquote

-
- - - -### Phrase Emphasis ### - -Markdown uses asterisks and underscores to indicate spans of emphasis. - -Markdown: - - Some of these words *are emphasized*. - Some of these words _are emphasized also_. - - Use two asterisks for **strong emphasis**. - Or, if you prefer, __use two underscores instead__. - -Output: - -

Some of these words are emphasized. - Some of these words are emphasized also.

- -

Use two asterisks for strong emphasis. - Or, if you prefer, use two underscores instead.

- - - -## Lists ## - -Unordered (bulleted) lists use asterisks, pluses, and hyphens (`*`, -`+`, and `-`) as list markers. These three markers are -interchangable; this: - - * Candy. - * Gum. - * Booze. - -this: - - + Candy. - + Gum. - + Booze. - -and this: - - - Candy. - - Gum. - - Booze. - -all produce the same output: - -
    -
  • Candy.
  • -
  • Gum.
  • -
  • Booze.
  • -
- -Ordered (numbered) lists use regular numbers, followed by periods, as -list markers: - - 1. Red - 2. Green - 3. Blue - -Output: - -
    -
  1. Red
  2. -
  3. Green
  4. -
  5. Blue
  6. -
- -If you put blank lines between items, you'll get `

` tags for the -list item text. You can create multi-paragraph list items by indenting -the paragraphs by 4 spaces or 1 tab: - - * A list item. - - With multiple paragraphs. - - * Another item in the list. - -Output: - -

    -
  • A list item.

    -

    With multiple paragraphs.

  • -
  • Another item in the list.

  • -
- - - -### Links ### - -Markdown supports two styles for creating links: *inline* and -*reference*. With both styles, you use square brackets to delimit the -text you want to turn into a link. - -Inline-style links use parentheses immediately after the link text. -For example: - - This is an [example link](http://example.com/). - -Output: - -

This is an - example link.

- -Optionally, you may include a title attribute in the parentheses: - - This is an [example link](http://example.com/ "With a Title"). - -Output: - -

This is an - example link.

- -Reference-style links allow you to refer to your links by names, which -you define elsewhere in your document: - - I get 10 times more traffic from [Google][1] than from - [Yahoo][2] or [MSN][3]. - - [1]: http://google.com/ "Google" - [2]: http://search.yahoo.com/ "Yahoo Search" - [3]: http://search.msn.com/ "MSN Search" - -Output: - -

I get 10 times more traffic from Google than from Yahoo or MSN.

- -The title attribute is optional. Link names may contain letters, -numbers and spaces, but are *not* case sensitive: - - I start my morning with a cup of coffee and - [The New York Times][NY Times]. - - [ny times]: http://www.nytimes.com/ - -Output: - -

I start my morning with a cup of coffee and - The New York Times.

- - -### Images ### - -Image syntax is very much like link syntax. - -Inline (titles are optional): - - ![alt text](/path/to/img.jpg "Title") - -Reference-style: - - ![alt text][id] - - [id]: /path/to/img.jpg "Title" - -Both of the above examples produce the same output: - - alt text - - - -### Code ### - -In a regular paragraph, you can create code span by wrapping text in -backtick quotes. Any ampersands (`&`) and angle brackets (`<` or -`>`) will automatically be translated into HTML entities. This makes -it easy to use Markdown to write about HTML example code: - - I strongly recommend against using any `` tags. - - I wish SmartyPants used named entities like `—` - instead of decimal-encoded entites like `—`. - -Output: - -

I strongly recommend against using any - <blink> tags.

- -

I wish SmartyPants used named entities like - &mdash; instead of decimal-encoded - entites like &#8212;.

- - -To specify an entire block of pre-formatted code, indent every line of -the block by 4 spaces or 1 tab. Just like with code spans, `&`, `<`, -and `>` characters will be escaped automatically. - -Markdown: - - If you want your page to validate under XHTML 1.0 Strict, - you've got to put paragraph tags in your blockquotes: - -
-

For example.

-
- -Output: - -

If you want your page to validate under XHTML 1.0 Strict, - you've got to put paragraph tags in your blockquotes:

- -
<blockquote>
-        <p>For example.</p>
-    </blockquote>
-    
diff --git a/oldtests/Original/Markdown_Documentation_Syntax.html b/oldtests/Original/Markdown_Documentation_Syntax.html deleted file mode 100644 index f379dcf..0000000 --- a/oldtests/Original/Markdown_Documentation_Syntax.html +++ /dev/null @@ -1,708 +0,0 @@ -

Markdown: Syntax

- - -

Note: This document is itself written using Markdown; you -can see the source for it by adding '.text' to the URL.

-
-

Overview

-

Philosophy

-

Markdown is intended to be as easy-to-read and easy-to-write as is feasible.

-

Readability, however, is emphasized above all else. A Markdown-formatted -document should be publishable as-is, as plain text, without looking -like it's been marked up with tags or formatting instructions. While -Markdown's syntax has been influenced by several existing text-to-HTML -filters -- including Setext, atx, Textile, reStructuredText, -Grutatext, and EtText -- the single biggest source of -inspiration for Markdown's syntax is the format of plain text email.

-

To this end, Markdown's syntax is comprised entirely of punctuation -characters, which punctuation characters have been carefully chosen so -as to look like what they mean. E.g., asterisks around a word actually -look like *emphasis*. Markdown lists look like, well, lists. Even -blockquotes look like quoted passages of text, assuming you've ever -used email.

-

Inline HTML

-

Markdown's syntax is intended for one purpose: to be used as a -format for writing for the web.

-

Markdown is not a replacement for HTML, or even close to it. Its -syntax is very small, corresponding only to a very small subset of -HTML tags. The idea is not to create a syntax that makes it easier -to insert HTML tags. In my opinion, HTML tags are already easy to -insert. The idea for Markdown is to make it easy to read, write, and -edit prose. HTML is a publishing format; Markdown is a writing -format. Thus, Markdown's formatting syntax only addresses issues that -can be conveyed in plain text.

-

For any markup that is not covered by Markdown's syntax, you simply -use HTML itself. There's no need to preface it or delimit it to -indicate that you're switching from Markdown to HTML; you just use -the tags.

-

The only restrictions are that block-level HTML elements -- e.g. <div>, -<table>, <pre>, <p>, etc. -- must be separated from surrounding -content by blank lines, and the start and end tags of the block should -not be indented with tabs or spaces. Markdown is smart enough not -to add extra (unwanted) <p> tags around HTML block-level tags.

-

For example, to add an HTML table to a Markdown article:

-
This is a regular paragraph.
-
-<table>
-    <tr>
-        <td>Foo</td>
-    </tr>
-</table>
-
-This is another regular paragraph.
-
-

Note that Markdown formatting syntax is not processed within block-level -HTML tags. E.g., you can't use Markdown-style *emphasis* inside an -HTML block.

-

Span-level HTML tags -- e.g. <span>, <cite>, or <del> -- can be -used anywhere in a Markdown paragraph, list item, or header. If you -want, you can even use HTML tags instead of Markdown formatting; e.g. if -you'd prefer to use HTML <a> or <img> tags instead of Markdown's -link or image syntax, go right ahead.

-

Unlike block-level HTML tags, Markdown syntax is processed within -span-level tags.

-

Automatic Escaping for Special Characters

-

In HTML, there are two characters that demand special treatment: < -and &. Left angle brackets are used to start tags; ampersands are -used to denote HTML entities. If you want to use them as literal -characters, you must escape them as entities, e.g. &lt;, and -&amp;.

-

Ampersands in particular are bedeviling for web writers. If you want to -write about 'AT&T', you need to write 'AT&amp;T'. You even need to -escape ampersands within URLs. Thus, if you want to link to:

-
http://images.google.com/images?num=30&q=larry+bird
-
-

you need to encode the URL as:

-
http://images.google.com/images?num=30&amp;q=larry+bird
-
-

in your anchor tag href attribute. Needless to say, this is easy to -forget, and is probably the single most common source of HTML validation -errors in otherwise well-marked-up web sites.

-

Markdown allows you to use these characters naturally, taking care of -all the necessary escaping for you. If you use an ampersand as part of -an HTML entity, it remains unchanged; otherwise it will be translated -into &amp;.

-

So, if you want to include a copyright symbol in your article, you can write:

-
&copy;
-
-

and Markdown will leave it alone. But if you write:

-
AT&T
-
-

Markdown will translate it to:

-
AT&amp;T
-
-

Similarly, because Markdown supports inline HTML, if you use -angle brackets as delimiters for HTML tags, Markdown will treat them as -such. But if you write:

-
4 < 5
-
-

Markdown will translate it to:

-
4 &lt; 5
-
-

However, inside Markdown code spans and blocks, angle brackets and -ampersands are always encoded automatically. This makes it easy to use -Markdown to write about HTML code. (As opposed to raw HTML, which is a -terrible format for writing about HTML syntax, because every single < -and & in your example code needs to be escaped.)

-
-

Block Elements

-

Paragraphs and Line Breaks

-

A paragraph is simply one or more consecutive lines of text, separated -by one or more blank lines. (A blank line is any line that looks like a -blank line -- a line containing nothing but spaces or tabs is considered -blank.) Normal paragraphs should not be intended with spaces or tabs.

-

The implication of the "one or more consecutive lines of text" rule is -that Markdown supports "hard-wrapped" text paragraphs. This differs -significantly from most other text-to-HTML formatters (including Movable -Type's "Convert Line Breaks" option) which translate every line break -character in a paragraph into a <br /> tag.

-

When you do want to insert a <br /> break tag using Markdown, you -end a line with two or more spaces, then type return.

-

Yes, this takes a tad more effort to create a <br />, but a simplistic -"every line break is a <br />" rule wouldn't work for Markdown. -Markdown's email-style blockquoting and multi-paragraph list items -work best -- and look better -- when you format them with hard breaks.

- -

Markdown supports two styles of headers, Setext and atx.

-

Setext-style headers are "underlined" using equal signs (for first-level -headers) and dashes (for second-level headers). For example:

-
This is an H1
-=============
-
-This is an H2
--------------
-
-

Any number of underlining ='s or -'s will work.

-

Atx-style headers use 1-6 hash characters at the start of the line, -corresponding to header levels 1-6. For example:

-
# This is an H1
-
-## This is an H2
-
-###### This is an H6
-
-

Optionally, you may "close" atx-style headers. This is purely -cosmetic -- you can use this if you think it looks better. The -closing hashes don't even need to match the number of hashes -used to open the header. (The number of opening hashes -determines the header level.) :

-
# This is an H1 #
-
-## This is an H2 ##
-
-### This is an H3 ######
-
-

Blockquotes

-

Markdown uses email-style > characters for blockquoting. If you're -familiar with quoting passages of text in an email message, then you -know how to create a blockquote in Markdown. It looks best if you hard -wrap the text and put a > before every line:

-
> This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
-> consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
-> Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
-> 
-> Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
-> id sem consectetuer libero luctus adipiscing.
-
-

Markdown allows you to be lazy and only put the > before the first -line of a hard-wrapped paragraph:

-
> This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
-consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
-Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
-
-> Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
-id sem consectetuer libero luctus adipiscing.
-
-

Blockquotes can be nested (i.e. a blockquote-in-a-blockquote) by -adding additional levels of >:

-
> This is the first level of quoting.
->
-> > This is nested blockquote.
->
-> Back to the first level.
-
-

Blockquotes can contain other Markdown elements, including headers, lists, -and code blocks:

-
> ## This is a header.
-> 
-> 1.   This is the first list item.
-> 2.   This is the second list item.
-> 
-> Here's some example code:
-> 
->     return shell_exec("echo $input | $markdown_script");
-
-

Any decent text editor should make email-style quoting easy. For -example, with BBEdit, you can make a selection and choose Increase -Quote Level from the Text menu.

-

Lists

-

Markdown supports ordered (numbered) and unordered (bulleted) lists.

-

Unordered lists use asterisks, pluses, and hyphens -- interchangably --- as list markers:

-
*   Red
-*   Green
-*   Blue
-
-

is equivalent to:

-
+   Red
-+   Green
-+   Blue
-
-

and:

-
-   Red
--   Green
--   Blue
-
-

Ordered lists use numbers followed by periods:

-
1.  Bird
-2.  McHale
-3.  Parish
-
-

It's important to note that the actual numbers you use to mark the -list have no effect on the HTML output Markdown produces. The HTML -Markdown produces from the above list is:

-
<ol>
-<li>Bird</li>
-<li>McHale</li>
-<li>Parish</li>
-</ol>
-
-

If you instead wrote the list in Markdown like this:

-
1.  Bird
-1.  McHale
-1.  Parish
-
-

or even:

-
3. Bird
-1. McHale
-8. Parish
-
-

you'd get the exact same HTML output. The point is, if you want to, -you can use ordinal numbers in your ordered Markdown lists, so that -the numbers in your source match the numbers in your published HTML. -But if you want to be lazy, you don't have to.

-

If you do use lazy list numbering, however, you should still start the -list with the number 1. At some point in the future, Markdown may support -starting ordered lists at an arbitrary number.

-

List markers typically start at the left margin, but may be indented by -up to three spaces. List markers must be followed by one or more spaces -or a tab.

-

To make lists look nice, you can wrap items with hanging indents:

-
*   Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
-    Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
-    viverra nec, fringilla in, laoreet vitae, risus.
-*   Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
-    Suspendisse id sem consectetuer libero luctus adipiscing.
-
-

But if you want to be lazy, you don't have to:

-
*   Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
-Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
-viverra nec, fringilla in, laoreet vitae, risus.
-*   Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
-Suspendisse id sem consectetuer libero luctus adipiscing.
-
-

If list items are separated by blank lines, Markdown will wrap the -items in <p> tags in the HTML output. For example, this input:

-
*   Bird
-*   Magic
-
-

will turn into:

-
<ul>
-<li>Bird</li>
-<li>Magic</li>
-</ul>
-
-

But this:

-
*   Bird
-
-*   Magic
-
-

will turn into:

-
<ul>
-<li><p>Bird</p></li>
-<li><p>Magic</p></li>
-</ul>
-
-

List items may consist of multiple paragraphs. Each subsequent -paragraph in a list item must be intended by either 4 spaces -or one tab:

-
1.  This is a list item with two paragraphs. Lorem ipsum dolor
-    sit amet, consectetuer adipiscing elit. Aliquam hendrerit
-    mi posuere lectus.
-
-    Vestibulum enim wisi, viverra nec, fringilla in, laoreet
-    vitae, risus. Donec sit amet nisl. Aliquam semper ipsum
-    sit amet velit.
-
-2.  Suspendisse id sem consectetuer libero luctus adipiscing.
-
-

It looks nice if you indent every line of the subsequent -paragraphs, but here again, Markdown will allow you to be -lazy:

-
*   This is a list item with two paragraphs.
-
-    This is the second paragraph in the list item. You're
-only required to indent the first line. Lorem ipsum dolor
-sit amet, consectetuer adipiscing elit.
-
-*   Another item in the same list.
-
-

To put a blockquote within a list item, the blockquote's > -delimiters need to be indented:

-
*   A list item with a blockquote:
-
-    > This is a blockquote
-    > inside a list item.
-
-

To put a code block within a list item, the code block needs -to be indented twice -- 8 spaces or two tabs:

-
*   A list item with a code block:
-
-        <code goes here>
-
-

It's worth noting that it's possible to trigger an ordered list by -accident, by writing something like this:

-
1986. What a great season.
-
-

In other words, a number-period-space sequence at the beginning of a -line. To avoid this, you can backslash-escape the period:

-
1986\. What a great season.
-
-

Code Blocks

-

Pre-formatted code blocks are used for writing about programming or -markup source code. Rather than forming normal paragraphs, the lines -of a code block are interpreted literally. Markdown wraps a code block -in both <pre> and <code> tags.

-

To produce a code block in Markdown, simply indent every line of the -block by at least 4 spaces or 1 tab. For example, given this input:

-
This is a normal paragraph:
-
-    This is a code block.
-
-

Markdown will generate:

-
<p>This is a normal paragraph:</p>
-
-<pre><code>This is a code block.
-</code></pre>
-
-

One level of indentation -- 4 spaces or 1 tab -- is removed from each -line of the code block. For example, this:

-
Here is an example of AppleScript:
-
-    tell application "Foo"
-        beep
-    end tell
-
-

will turn into:

-
<p>Here is an example of AppleScript:</p>
-
-<pre><code>tell application "Foo"
-    beep
-end tell
-</code></pre>
-
-

A code block continues until it reaches a line that is not indented -(or the end of the article).

-

Within a code block, ampersands (&) and angle brackets (< and >) -are automatically converted into HTML entities. This makes it very -easy to include example HTML source code using Markdown -- just paste -it and indent it, and Markdown will handle the hassle of encoding the -ampersands and angle brackets. For example, this:

-
    <div class="footer">
-        &copy; 2004 Foo Corporation
-    </div>
-
-

will turn into:

-
<pre><code>&lt;div class="footer"&gt;
-    &amp;copy; 2004 Foo Corporation
-&lt;/div&gt;
-</code></pre>
-
-

Regular Markdown syntax is not processed within code blocks. E.g., -asterisks are just literal asterisks within a code block. This means -it's also easy to use Markdown to write about Markdown's own syntax.

-

Horizontal Rules

-

You can produce a horizontal rule tag (<hr />) by placing three or -more hyphens, asterisks, or underscores on a line by themselves. If you -wish, you may use spaces between the hyphens or asterisks. Each of the -following lines will produce a horizontal rule:

-
* * *
-
-***
-
-*****
-
-- - -
-
----------------------------------------
-
-_ _ _
-
-
-

Span Elements

- -

Markdown supports two style of links: inline and reference.

-

In both styles, the link text is delimited by [square brackets].

-

To create an inline link, use a set of regular parentheses immediately -after the link text's closing square bracket. Inside the parentheses, -put the URL where you want the link to point, along with an optional -title for the link, surrounded in quotes. For example:

-
This is [an example](http://example.com/ "Title") inline link.
-
-[This link](http://example.net/) has no title attribute.
-
-

Will produce:

-
<p>This is <a href="http://example.com/" title="Title">
-an example</a> inline link.</p>
-
-<p><a href="http://example.net/">This link</a> has no
-title attribute.</p>
-
-

If you're referring to a local resource on the same server, you can -use relative paths:

-
See my [About](/about/) page for details.
-
-

Reference-style links use a second set of square brackets, inside -which you place a label of your choosing to identify the link:

-
This is [an example][id] reference-style link.
-
-

You can optionally use a space to separate the sets of brackets:

-
This is [an example] [id] reference-style link.
-
-

Then, anywhere in the document, you define your link label like this, -on a line by itself:

-
[id]: http://example.com/  "Optional Title Here"
-
-

That is:

-
    -
  • Square brackets containing the link identifier (optionally -indented from the left margin using up to three spaces);
  • -
  • followed by a colon;
  • -
  • followed by one or more spaces (or tabs);
  • -
  • followed by the URL for the link;
  • -
  • optionally followed by a title attribute for the link, enclosed -in double or single quotes.
  • -
-

The link URL may, optionally, be surrounded by angle brackets:

-
[id]: <http://example.com/>  "Optional Title Here"
-
-

You can put the title attribute on the next line and use extra spaces -or tabs for padding, which tends to look better with longer URLs:

-
[id]: http://example.com/longish/path/to/resource/here
-    "Optional Title Here"
-
-

Link definitions are only used for creating links during Markdown -processing, and are stripped from your document in the HTML output.

-

Link definition names may constist of letters, numbers, spaces, and punctuation -- but they are not case sensitive. E.g. these two links:

-
[link text][a]
-[link text][A]
-
-

are equivalent.

-

The implicit link name shortcut allows you to omit the name of the -link, in which case the link text itself is used as the name. -Just use an empty set of square brackets -- e.g., to link the word -"Google" to the google.com web site, you could simply write:

-
[Google][]
-
-

And then define the link:

-
[Google]: http://google.com/
-
-

Because link names may contain spaces, this shortcut even works for -multiple words in the link text:

-
Visit [Daring Fireball][] for more information.
-
-

And then define the link:

-
[Daring Fireball]: http://daringfireball.net/
-
-

Link definitions can be placed anywhere in your Markdown document. I -tend to put them immediately after each paragraph in which they're -used, but if you want, you can put them all at the end of your -document, sort of like footnotes.

-

Here's an example of reference links in action:

-
I get 10 times more traffic from [Google] [1] than from
-[Yahoo] [2] or [MSN] [3].
-
-  [1]: http://google.com/        "Google"
-  [2]: http://search.yahoo.com/  "Yahoo Search"
-  [3]: http://search.msn.com/    "MSN Search"
-
-

Using the implicit link name shortcut, you could instead write:

-
I get 10 times more traffic from [Google][] than from
-[Yahoo][] or [MSN][].
-
-  [google]: http://google.com/        "Google"
-  [yahoo]:  http://search.yahoo.com/  "Yahoo Search"
-  [msn]:    http://search.msn.com/    "MSN Search"
-
-

Both of the above examples will produce the following HTML output:

-
<p>I get 10 times more traffic from <a href="http://google.com/"
-title="Google">Google</a> than from
-<a href="http://search.yahoo.com/" title="Yahoo Search">Yahoo</a>
-or <a href="http://search.msn.com/" title="MSN Search">MSN</a>.</p>
-
-

For comparison, here is the same paragraph written using -Markdown's inline link style:

-
I get 10 times more traffic from [Google](http://google.com/ "Google")
-than from [Yahoo](http://search.yahoo.com/ "Yahoo Search") or
-[MSN](http://search.msn.com/ "MSN Search").
-
-

The point of reference-style links is not that they're easier to -write. The point is that with reference-style links, your document -source is vastly more readable. Compare the above examples: using -reference-style links, the paragraph itself is only 81 characters -long; with inline-style links, it's 176 characters; and as raw HTML, -it's 234 characters. In the raw HTML, there's more markup than there -is text.

-

With Markdown's reference-style links, a source document much more -closely resembles the final output, as rendered in a browser. By -allowing you to move the markup-related metadata out of the paragraph, -you can add links without interrupting the narrative flow of your -prose.

-

Emphasis

-

Markdown treats asterisks (*) and underscores (_) as indicators of -emphasis. Text wrapped with one * or _ will be wrapped with an -HTML <em> tag; double *'s or _'s will be wrapped with an HTML -<strong> tag. E.g., this input:

-
*single asterisks*
-
-_single underscores_
-
-**double asterisks**
-
-__double underscores__
-
-

will produce:

-
<em>single asterisks</em>
-
-<em>single underscores</em>
-
-<strong>double asterisks</strong>
-
-<strong>double underscores</strong>
-
-

You can use whichever style you prefer; the lone restriction is that -the same character must be used to open and close an emphasis span.

-

Emphasis can be used in the middle of a word:

-
un*fucking*believable
-
-

But if you surround an * or _ with spaces, it'll be treated as a -literal asterisk or underscore.

-

To produce a literal asterisk or underscore at a position where it -would otherwise be used as an emphasis delimiter, you can backslash -escape it:

-
\*this text is surrounded by literal asterisks\*
-
-

Code

-

To indicate a span of code, wrap it with backtick quotes (`). -Unlike a pre-formatted code block, a code span indicates code within a -normal paragraph. For example:

-
Use the `printf()` function.
-
-

will produce:

-
<p>Use the <code>printf()</code> function.</p>
-
-

To include a literal backtick character within a code span, you can use -multiple backticks as the opening and closing delimiters:

-
``There is a literal backtick (`) here.``
-
-

which will produce this:

-
<p><code>There is a literal backtick (`) here.</code></p>
-
-

The backtick delimiters surrounding a code span may include spaces -- -one after the opening, one before the closing. This allows you to place -literal backtick characters at the beginning or end of a code span:

-
A single backtick in a code span: `` ` ``
-
-A backtick-delimited string in a code span: `` `foo` ``
-
-

will produce:

-
<p>A single backtick in a code span: <code>`</code></p>
-
-<p>A backtick-delimited string in a code span: <code>`foo`</code></p>
-
-

With a code span, ampersands and angle brackets are encoded as HTML -entities automatically, which makes it easy to include example HTML -tags. Markdown will turn this:

-
Please don't use any `<blink>` tags.
-
-

into:

-
<p>Please don't use any <code>&lt;blink&gt;</code> tags.</p>
-
-

You can write this:

-
`&#8212;` is the decimal-encoded equivalent of `&mdash;`.
-
-

to produce:

-
<p><code>&amp;#8212;</code> is the decimal-encoded
-equivalent of <code>&amp;mdash;</code>.</p>
-
-

Images

-

Admittedly, it's fairly difficult to devise a "natural" syntax for -placing images into a plain text document format.

-

Markdown uses an image syntax that is intended to resemble the syntax -for links, allowing for two styles: inline and reference.

-

Inline image syntax looks like this:

-
![Alt text](/path/to/img.jpg)
-
-![Alt text](/path/to/img.jpg "Optional title")
-
-

That is:

-
    -
  • An exclamation mark: !;
  • -
  • followed by a set of square brackets, containing the alt -attribute text for the image;
  • -
  • followed by a set of parentheses, containing the URL or path to -the image, and an optional title attribute enclosed in double -or single quotes.
  • -
-

Reference-style image syntax looks like this:

-
![Alt text][id]
-
-

Where "id" is the name of a defined image reference. Image references -are defined using syntax identical to link references:

-
[id]: url/to/image  "Optional title attribute"
-
-

As of this writing, Markdown has no syntax for specifying the -dimensions of an image; if this is important to you, you can simply -use regular HTML <img> tags.

-
-

Miscellaneous

- -

Markdown supports a shortcut style for creating "automatic" links for URLs and email addresses: simply surround the URL or email address with angle brackets. What this means is that if you want to show the actual text of a URL or email address, and also have it be a clickable link, you can do this:

-
<http://example.com/>
-
-

Markdown will turn this into:

-
<a href="http://example.com/">http://example.com/</a>
-
-

Automatic links for email addresses work similarly, except that -Markdown will also perform a bit of randomized decimal and hex -entity-encoding to help obscure your address from address-harvesting -spambots. For example, Markdown will turn this:

-
<address@example.com>
-
-

into something like this:

-
<a href="&#x6D;&#x61;i&#x6C;&#x74;&#x6F;:&#x61;&#x64;&#x64;&#x72;&#x65;
-&#115;&#115;&#64;&#101;&#120;&#x61;&#109;&#x70;&#x6C;e&#x2E;&#99;&#111;
-&#109;">&#x61;&#x64;&#x64;&#x72;&#x65;&#115;&#115;&#64;&#101;&#120;&#x61;
-&#109;&#x70;&#x6C;e&#x2E;&#99;&#111;&#109;</a>
-
-

which will render in a browser as a clickable link to "address@example.com".

-

(This sort of entity-encoding trick will indeed fool many, if not -most, address-harvesting bots, but it definitely won't fool all of -them. It's better than nothing, but an address published in this way -will probably eventually start receiving spam.)

-

Backslash Escapes

-

Markdown allows you to use backslash escapes to generate literal -characters which would otherwise have special meaning in Markdown's -formatting syntax. For example, if you wanted to surround a word with -literal asterisks (instead of an HTML <em> tag), you can backslashes -before the asterisks, like this:

-
\*literal asterisks\*
-
-

Markdown provides backslash escapes for the following characters:

-
\   backslash
-`   backtick
-*   asterisk
-_   underscore
-{}  curly braces
-[]  square brackets
-()  parentheses
-#   hash mark
-+   plus sign
--   minus sign (hyphen)
-.   dot
-!   exclamation mark
-
diff --git a/oldtests/Original/Markdown_Documentation_Syntax.markdown b/oldtests/Original/Markdown_Documentation_Syntax.markdown deleted file mode 100644 index 57360a1..0000000 --- a/oldtests/Original/Markdown_Documentation_Syntax.markdown +++ /dev/null @@ -1,888 +0,0 @@ -Markdown: Syntax -================ - - - - -* [Overview](#overview) - * [Philosophy](#philosophy) - * [Inline HTML](#html) - * [Automatic Escaping for Special Characters](#autoescape) -* [Block Elements](#block) - * [Paragraphs and Line Breaks](#p) - * [Headers](#header) - * [Blockquotes](#blockquote) - * [Lists](#list) - * [Code Blocks](#precode) - * [Horizontal Rules](#hr) -* [Span Elements](#span) - * [Links](#link) - * [Emphasis](#em) - * [Code](#code) - * [Images](#img) -* [Miscellaneous](#misc) - * [Backslash Escapes](#backslash) - * [Automatic Links](#autolink) - - -**Note:** This document is itself written using Markdown; you -can [see the source for it by adding '.text' to the URL][src]. - - [src]: /projects/markdown/syntax.text - -* * * - -

Overview

- -

Philosophy

- -Markdown is intended to be as easy-to-read and easy-to-write as is feasible. - -Readability, however, is emphasized above all else. A Markdown-formatted -document should be publishable as-is, as plain text, without looking -like it's been marked up with tags or formatting instructions. While -Markdown's syntax has been influenced by several existing text-to-HTML -filters -- including [Setext] [1], [atx] [2], [Textile] [3], [reStructuredText] [4], -[Grutatext] [5], and [EtText] [6] -- the single biggest source of -inspiration for Markdown's syntax is the format of plain text email. - - [1]: http://docutils.sourceforge.net/mirror/setext.html - [2]: http://www.aaronsw.com/2002/atx/ - [3]: http://textism.com/tools/textile/ - [4]: http://docutils.sourceforge.net/rst.html - [5]: http://www.triptico.com/software/grutatxt.html - [6]: http://ettext.taint.org/doc/ - -To this end, Markdown's syntax is comprised entirely of punctuation -characters, which punctuation characters have been carefully chosen so -as to look like what they mean. E.g., asterisks around a word actually -look like \*emphasis\*. Markdown lists look like, well, lists. Even -blockquotes look like quoted passages of text, assuming you've ever -used email. - - - -

Inline HTML

- -Markdown's syntax is intended for one purpose: to be used as a -format for *writing* for the web. - -Markdown is not a replacement for HTML, or even close to it. Its -syntax is very small, corresponding only to a very small subset of -HTML tags. The idea is *not* to create a syntax that makes it easier -to insert HTML tags. In my opinion, HTML tags are already easy to -insert. The idea for Markdown is to make it easy to read, write, and -edit prose. HTML is a *publishing* format; Markdown is a *writing* -format. Thus, Markdown's formatting syntax only addresses issues that -can be conveyed in plain text. - -For any markup that is not covered by Markdown's syntax, you simply -use HTML itself. There's no need to preface it or delimit it to -indicate that you're switching from Markdown to HTML; you just use -the tags. - -The only restrictions are that block-level HTML elements -- e.g. `
`, -``, `
`, `

`, etc. -- must be separated from surrounding -content by blank lines, and the start and end tags of the block should -not be indented with tabs or spaces. Markdown is smart enough not -to add extra (unwanted) `

` tags around HTML block-level tags. - -For example, to add an HTML table to a Markdown article: - - This is a regular paragraph. - -

- - - -
Foo
- - This is another regular paragraph. - -Note that Markdown formatting syntax is not processed within block-level -HTML tags. E.g., you can't use Markdown-style `*emphasis*` inside an -HTML block. - -Span-level HTML tags -- e.g. ``, ``, or `` -- can be -used anywhere in a Markdown paragraph, list item, or header. If you -want, you can even use HTML tags instead of Markdown formatting; e.g. if -you'd prefer to use HTML `` or `` tags instead of Markdown's -link or image syntax, go right ahead. - -Unlike block-level HTML tags, Markdown syntax *is* processed within -span-level tags. - - -

Automatic Escaping for Special Characters

- -In HTML, there are two characters that demand special treatment: `<` -and `&`. Left angle brackets are used to start tags; ampersands are -used to denote HTML entities. If you want to use them as literal -characters, you must escape them as entities, e.g. `<`, and -`&`. - -Ampersands in particular are bedeviling for web writers. If you want to -write about 'AT&T', you need to write '`AT&T`'. You even need to -escape ampersands within URLs. Thus, if you want to link to: - - http://images.google.com/images?num=30&q=larry+bird - -you need to encode the URL as: - - http://images.google.com/images?num=30&q=larry+bird - -in your anchor tag `href` attribute. Needless to say, this is easy to -forget, and is probably the single most common source of HTML validation -errors in otherwise well-marked-up web sites. - -Markdown allows you to use these characters naturally, taking care of -all the necessary escaping for you. If you use an ampersand as part of -an HTML entity, it remains unchanged; otherwise it will be translated -into `&`. - -So, if you want to include a copyright symbol in your article, you can write: - - © - -and Markdown will leave it alone. But if you write: - - AT&T - -Markdown will translate it to: - - AT&T - -Similarly, because Markdown supports [inline HTML](#html), if you use -angle brackets as delimiters for HTML tags, Markdown will treat them as -such. But if you write: - - 4 < 5 - -Markdown will translate it to: - - 4 < 5 - -However, inside Markdown code spans and blocks, angle brackets and -ampersands are *always* encoded automatically. This makes it easy to use -Markdown to write about HTML code. (As opposed to raw HTML, which is a -terrible format for writing about HTML syntax, because every single `<` -and `&` in your example code needs to be escaped.) - - -* * * - - -

Block Elements

- - -

Paragraphs and Line Breaks

- -A paragraph is simply one or more consecutive lines of text, separated -by one or more blank lines. (A blank line is any line that looks like a -blank line -- a line containing nothing but spaces or tabs is considered -blank.) Normal paragraphs should not be intended with spaces or tabs. - -The implication of the "one or more consecutive lines of text" rule is -that Markdown supports "hard-wrapped" text paragraphs. This differs -significantly from most other text-to-HTML formatters (including Movable -Type's "Convert Line Breaks" option) which translate every line break -character in a paragraph into a `
` tag. - -When you *do* want to insert a `
` break tag using Markdown, you -end a line with two or more spaces, then type return. - -Yes, this takes a tad more effort to create a `
`, but a simplistic -"every line break is a `
`" rule wouldn't work for Markdown. -Markdown's email-style [blockquoting][bq] and multi-paragraph [list items][l] -work best -- and look better -- when you format them with hard breaks. - - [bq]: #blockquote - [l]: #list - - - - - -Markdown supports two styles of headers, [Setext] [1] and [atx] [2]. - -Setext-style headers are "underlined" using equal signs (for first-level -headers) and dashes (for second-level headers). For example: - - This is an H1 - ============= - - This is an H2 - ------------- - -Any number of underlining `=`'s or `-`'s will work. - -Atx-style headers use 1-6 hash characters at the start of the line, -corresponding to header levels 1-6. For example: - - # This is an H1 - - ## This is an H2 - - ###### This is an H6 - -Optionally, you may "close" atx-style headers. This is purely -cosmetic -- you can use this if you think it looks better. The -closing hashes don't even need to match the number of hashes -used to open the header. (The number of opening hashes -determines the header level.) : - - # This is an H1 # - - ## This is an H2 ## - - ### This is an H3 ###### - - -

Blockquotes

- -Markdown uses email-style `>` characters for blockquoting. If you're -familiar with quoting passages of text in an email message, then you -know how to create a blockquote in Markdown. It looks best if you hard -wrap the text and put a `>` before every line: - - > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, - > consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. - > Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus. - > - > Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse - > id sem consectetuer libero luctus adipiscing. - -Markdown allows you to be lazy and only put the `>` before the first -line of a hard-wrapped paragraph: - - > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, - consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. - Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus. - - > Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse - id sem consectetuer libero luctus adipiscing. - -Blockquotes can be nested (i.e. a blockquote-in-a-blockquote) by -adding additional levels of `>`: - - > This is the first level of quoting. - > - > > This is nested blockquote. - > - > Back to the first level. - -Blockquotes can contain other Markdown elements, including headers, lists, -and code blocks: - - > ## This is a header. - > - > 1. This is the first list item. - > 2. This is the second list item. - > - > Here's some example code: - > - > return shell_exec("echo $input | $markdown_script"); - -Any decent text editor should make email-style quoting easy. For -example, with BBEdit, you can make a selection and choose Increase -Quote Level from the Text menu. - - -

Lists

- -Markdown supports ordered (numbered) and unordered (bulleted) lists. - -Unordered lists use asterisks, pluses, and hyphens -- interchangably --- as list markers: - - * Red - * Green - * Blue - -is equivalent to: - - + Red - + Green - + Blue - -and: - - - Red - - Green - - Blue - -Ordered lists use numbers followed by periods: - - 1. Bird - 2. McHale - 3. Parish - -It's important to note that the actual numbers you use to mark the -list have no effect on the HTML output Markdown produces. The HTML -Markdown produces from the above list is: - -
    -
  1. Bird
  2. -
  3. McHale
  4. -
  5. Parish
  6. -
- -If you instead wrote the list in Markdown like this: - - 1. Bird - 1. McHale - 1. Parish - -or even: - - 3. Bird - 1. McHale - 8. Parish - -you'd get the exact same HTML output. The point is, if you want to, -you can use ordinal numbers in your ordered Markdown lists, so that -the numbers in your source match the numbers in your published HTML. -But if you want to be lazy, you don't have to. - -If you do use lazy list numbering, however, you should still start the -list with the number 1. At some point in the future, Markdown may support -starting ordered lists at an arbitrary number. - -List markers typically start at the left margin, but may be indented by -up to three spaces. List markers must be followed by one or more spaces -or a tab. - -To make lists look nice, you can wrap items with hanging indents: - - * Lorem ipsum dolor sit amet, consectetuer adipiscing elit. - Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, - viverra nec, fringilla in, laoreet vitae, risus. - * Donec sit amet nisl. Aliquam semper ipsum sit amet velit. - Suspendisse id sem consectetuer libero luctus adipiscing. - -But if you want to be lazy, you don't have to: - - * Lorem ipsum dolor sit amet, consectetuer adipiscing elit. - Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, - viverra nec, fringilla in, laoreet vitae, risus. - * Donec sit amet nisl. Aliquam semper ipsum sit amet velit. - Suspendisse id sem consectetuer libero luctus adipiscing. - -If list items are separated by blank lines, Markdown will wrap the -items in `

` tags in the HTML output. For example, this input: - - * Bird - * Magic - -will turn into: - -

    -
  • Bird
  • -
  • Magic
  • -
- -But this: - - * Bird - - * Magic - -will turn into: - -
    -
  • Bird

  • -
  • Magic

  • -
- -List items may consist of multiple paragraphs. Each subsequent -paragraph in a list item must be intended by either 4 spaces -or one tab: - - 1. This is a list item with two paragraphs. Lorem ipsum dolor - sit amet, consectetuer adipiscing elit. Aliquam hendrerit - mi posuere lectus. - - Vestibulum enim wisi, viverra nec, fringilla in, laoreet - vitae, risus. Donec sit amet nisl. Aliquam semper ipsum - sit amet velit. - - 2. Suspendisse id sem consectetuer libero luctus adipiscing. - -It looks nice if you indent every line of the subsequent -paragraphs, but here again, Markdown will allow you to be -lazy: - - * This is a list item with two paragraphs. - - This is the second paragraph in the list item. You're - only required to indent the first line. Lorem ipsum dolor - sit amet, consectetuer adipiscing elit. - - * Another item in the same list. - -To put a blockquote within a list item, the blockquote's `>` -delimiters need to be indented: - - * A list item with a blockquote: - - > This is a blockquote - > inside a list item. - -To put a code block within a list item, the code block needs -to be indented *twice* -- 8 spaces or two tabs: - - * A list item with a code block: - - - - -It's worth noting that it's possible to trigger an ordered list by -accident, by writing something like this: - - 1986. What a great season. - -In other words, a *number-period-space* sequence at the beginning of a -line. To avoid this, you can backslash-escape the period: - - 1986\. What a great season. - - - -

Code Blocks

- -Pre-formatted code blocks are used for writing about programming or -markup source code. Rather than forming normal paragraphs, the lines -of a code block are interpreted literally. Markdown wraps a code block -in both `
` and `` tags.
-
-To produce a code block in Markdown, simply indent every line of the
-block by at least 4 spaces or 1 tab. For example, given this input:
-
-    This is a normal paragraph:
-
-        This is a code block.
-
-Markdown will generate:
-
-    

This is a normal paragraph:

- -
This is a code block.
-    
- -One level of indentation -- 4 spaces or 1 tab -- is removed from each -line of the code block. For example, this: - - Here is an example of AppleScript: - - tell application "Foo" - beep - end tell - -will turn into: - -

Here is an example of AppleScript:

- -
tell application "Foo"
-        beep
-    end tell
-    
- -A code block continues until it reaches a line that is not indented -(or the end of the article). - -Within a code block, ampersands (`&`) and angle brackets (`<` and `>`) -are automatically converted into HTML entities. This makes it very -easy to include example HTML source code using Markdown -- just paste -it and indent it, and Markdown will handle the hassle of encoding the -ampersands and angle brackets. For example, this: - - - -will turn into: - -
<div class="footer">
-        &copy; 2004 Foo Corporation
-    </div>
-    
- -Regular Markdown syntax is not processed within code blocks. E.g., -asterisks are just literal asterisks within a code block. This means -it's also easy to use Markdown to write about Markdown's own syntax. - - - -

Horizontal Rules

- -You can produce a horizontal rule tag (`
`) by placing three or -more hyphens, asterisks, or underscores on a line by themselves. If you -wish, you may use spaces between the hyphens or asterisks. Each of the -following lines will produce a horizontal rule: - - * * * - - *** - - ***** - - - - - - - --------------------------------------- - - _ _ _ - - -* * * - -

Span Elements

- - - -Markdown supports two style of links: *inline* and *reference*. - -In both styles, the link text is delimited by [square brackets]. - -To create an inline link, use a set of regular parentheses immediately -after the link text's closing square bracket. Inside the parentheses, -put the URL where you want the link to point, along with an *optional* -title for the link, surrounded in quotes. For example: - - This is [an example](http://example.com/ "Title") inline link. - - [This link](http://example.net/) has no title attribute. - -Will produce: - -

This is - an example inline link.

- -

This link has no - title attribute.

- -If you're referring to a local resource on the same server, you can -use relative paths: - - See my [About](/about/) page for details. - -Reference-style links use a second set of square brackets, inside -which you place a label of your choosing to identify the link: - - This is [an example][id] reference-style link. - -You can optionally use a space to separate the sets of brackets: - - This is [an example] [id] reference-style link. - -Then, anywhere in the document, you define your link label like this, -on a line by itself: - - [id]: http://example.com/ "Optional Title Here" - -That is: - -* Square brackets containing the link identifier (optionally - indented from the left margin using up to three spaces); -* followed by a colon; -* followed by one or more spaces (or tabs); -* followed by the URL for the link; -* optionally followed by a title attribute for the link, enclosed - in double or single quotes. - -The link URL may, optionally, be surrounded by angle brackets: - - [id]: "Optional Title Here" - -You can put the title attribute on the next line and use extra spaces -or tabs for padding, which tends to look better with longer URLs: - - [id]: http://example.com/longish/path/to/resource/here - "Optional Title Here" - -Link definitions are only used for creating links during Markdown -processing, and are stripped from your document in the HTML output. - -Link definition names may constist of letters, numbers, spaces, and punctuation -- but they are *not* case sensitive. E.g. these two links: - - [link text][a] - [link text][A] - -are equivalent. - -The *implicit link name* shortcut allows you to omit the name of the -link, in which case the link text itself is used as the name. -Just use an empty set of square brackets -- e.g., to link the word -"Google" to the google.com web site, you could simply write: - - [Google][] - -And then define the link: - - [Google]: http://google.com/ - -Because link names may contain spaces, this shortcut even works for -multiple words in the link text: - - Visit [Daring Fireball][] for more information. - -And then define the link: - - [Daring Fireball]: http://daringfireball.net/ - -Link definitions can be placed anywhere in your Markdown document. I -tend to put them immediately after each paragraph in which they're -used, but if you want, you can put them all at the end of your -document, sort of like footnotes. - -Here's an example of reference links in action: - - I get 10 times more traffic from [Google] [1] than from - [Yahoo] [2] or [MSN] [3]. - - [1]: http://google.com/ "Google" - [2]: http://search.yahoo.com/ "Yahoo Search" - [3]: http://search.msn.com/ "MSN Search" - -Using the implicit link name shortcut, you could instead write: - - I get 10 times more traffic from [Google][] than from - [Yahoo][] or [MSN][]. - - [google]: http://google.com/ "Google" - [yahoo]: http://search.yahoo.com/ "Yahoo Search" - [msn]: http://search.msn.com/ "MSN Search" - -Both of the above examples will produce the following HTML output: - -

I get 10 times more traffic from Google than from - Yahoo - or MSN.

- -For comparison, here is the same paragraph written using -Markdown's inline link style: - - I get 10 times more traffic from [Google](http://google.com/ "Google") - than from [Yahoo](http://search.yahoo.com/ "Yahoo Search") or - [MSN](http://search.msn.com/ "MSN Search"). - -The point of reference-style links is not that they're easier to -write. The point is that with reference-style links, your document -source is vastly more readable. Compare the above examples: using -reference-style links, the paragraph itself is only 81 characters -long; with inline-style links, it's 176 characters; and as raw HTML, -it's 234 characters. In the raw HTML, there's more markup than there -is text. - -With Markdown's reference-style links, a source document much more -closely resembles the final output, as rendered in a browser. By -allowing you to move the markup-related metadata out of the paragraph, -you can add links without interrupting the narrative flow of your -prose. - - -

Emphasis

- -Markdown treats asterisks (`*`) and underscores (`_`) as indicators of -emphasis. Text wrapped with one `*` or `_` will be wrapped with an -HTML `` tag; double `*`'s or `_`'s will be wrapped with an HTML -`` tag. E.g., this input: - - *single asterisks* - - _single underscores_ - - **double asterisks** - - __double underscores__ - -will produce: - - single asterisks - - single underscores - - double asterisks - - double underscores - -You can use whichever style you prefer; the lone restriction is that -the same character must be used to open and close an emphasis span. - -Emphasis can be used in the middle of a word: - - un*fucking*believable - -But if you surround an `*` or `_` with spaces, it'll be treated as a -literal asterisk or underscore. - -To produce a literal asterisk or underscore at a position where it -would otherwise be used as an emphasis delimiter, you can backslash -escape it: - - \*this text is surrounded by literal asterisks\* - - - -

Code

- -To indicate a span of code, wrap it with backtick quotes (`` ` ``). -Unlike a pre-formatted code block, a code span indicates code within a -normal paragraph. For example: - - Use the `printf()` function. - -will produce: - -

Use the printf() function.

- -To include a literal backtick character within a code span, you can use -multiple backticks as the opening and closing delimiters: - - ``There is a literal backtick (`) here.`` - -which will produce this: - -

There is a literal backtick (`) here.

- -The backtick delimiters surrounding a code span may include spaces -- -one after the opening, one before the closing. This allows you to place -literal backtick characters at the beginning or end of a code span: - - A single backtick in a code span: `` ` `` - - A backtick-delimited string in a code span: `` `foo` `` - -will produce: - -

A single backtick in a code span: `

- -

A backtick-delimited string in a code span: `foo`

- -With a code span, ampersands and angle brackets are encoded as HTML -entities automatically, which makes it easy to include example HTML -tags. Markdown will turn this: - - Please don't use any `` tags. - -into: - -

Please don't use any <blink> tags.

- -You can write this: - - `—` is the decimal-encoded equivalent of `—`. - -to produce: - -

&#8212; is the decimal-encoded - equivalent of &mdash;.

- - - -

Images

- -Admittedly, it's fairly difficult to devise a "natural" syntax for -placing images into a plain text document format. - -Markdown uses an image syntax that is intended to resemble the syntax -for links, allowing for two styles: *inline* and *reference*. - -Inline image syntax looks like this: - - ![Alt text](/path/to/img.jpg) - - ![Alt text](/path/to/img.jpg "Optional title") - -That is: - -* An exclamation mark: `!`; -* followed by a set of square brackets, containing the `alt` - attribute text for the image; -* followed by a set of parentheses, containing the URL or path to - the image, and an optional `title` attribute enclosed in double - or single quotes. - -Reference-style image syntax looks like this: - - ![Alt text][id] - -Where "id" is the name of a defined image reference. Image references -are defined using syntax identical to link references: - - [id]: url/to/image "Optional title attribute" - -As of this writing, Markdown has no syntax for specifying the -dimensions of an image; if this is important to you, you can simply -use regular HTML `` tags. - - -* * * - - -

Miscellaneous

- - - -Markdown supports a shortcut style for creating "automatic" links for URLs and email addresses: simply surround the URL or email address with angle brackets. What this means is that if you want to show the actual text of a URL or email address, and also have it be a clickable link, you can do this: - - - -Markdown will turn this into: - - http://example.com/ - -Automatic links for email addresses work similarly, except that -Markdown will also perform a bit of randomized decimal and hex -entity-encoding to help obscure your address from address-harvesting -spambots. For example, Markdown will turn this: - - - -into something like this: - - address@exa - mple.com - -which will render in a browser as a clickable link to "address@example.com". - -(This sort of entity-encoding trick will indeed fool many, if not -most, address-harvesting bots, but it definitely won't fool all of -them. It's better than nothing, but an address published in this way -will probably eventually start receiving spam.) - - - -

Backslash Escapes

- -Markdown allows you to use backslash escapes to generate literal -characters which would otherwise have special meaning in Markdown's -formatting syntax. For example, if you wanted to surround a word with -literal asterisks (instead of an HTML `` tag), you can backslashes -before the asterisks, like this: - - \*literal asterisks\* - -Markdown provides backslash escapes for the following characters: - - \ backslash - ` backtick - * asterisk - _ underscore - {} curly braces - [] square brackets - () parentheses - # hash mark - + plus sign - - minus sign (hyphen) - . dot - ! exclamation mark - diff --git a/oldtests/Original/Nested_blockquotes.html b/oldtests/Original/Nested_blockquotes.html deleted file mode 100644 index 02efc59..0000000 --- a/oldtests/Original/Nested_blockquotes.html +++ /dev/null @@ -1,7 +0,0 @@ -
-

foo

-
-

bar

-
-

foo

-
diff --git a/oldtests/Original/Nested_blockquotes.markdown b/oldtests/Original/Nested_blockquotes.markdown deleted file mode 100644 index ed3c624..0000000 --- a/oldtests/Original/Nested_blockquotes.markdown +++ /dev/null @@ -1,5 +0,0 @@ -> foo -> -> > bar -> -> foo diff --git a/oldtests/Original/Ordered_and_unordered_lists.html b/oldtests/Original/Ordered_and_unordered_lists.html deleted file mode 100644 index 78d752e..0000000 --- a/oldtests/Original/Ordered_and_unordered_lists.html +++ /dev/null @@ -1,112 +0,0 @@ -

Unordered

-

Asterisks tight:

-
    -
  • asterisk 1
  • -
  • asterisk 2
  • -
  • asterisk 3
  • -
-

Asterisks loose:

-
    -
  • asterisk 1

  • -
  • asterisk 2

  • -
  • asterisk 3

  • -
-
-

Pluses tight:

-
    -
  • Plus 1
  • -
  • Plus 2
  • -
  • Plus 3
  • -
-

Pluses loose:

-
    -
  • Plus 1

  • -
  • Plus 2

  • -
  • Plus 3

  • -
-
-

Minuses tight:

-
    -
  • Minus 1
  • -
  • Minus 2
  • -
  • Minus 3
  • -
-

Minuses loose:

-
    -
  • Minus 1

  • -
  • Minus 2

  • -
  • Minus 3

  • -
-

Ordered

-

Tight:

-
    -
  1. First
  2. -
  3. Second
  4. -
  5. Third
  6. -
-

and:

-
    -
  1. One
  2. -
  3. Two
  4. -
  5. Three
  6. -
-

Loose using tabs:

-
    -
  1. First

  2. -
  3. Second

  4. -
  5. Third

  6. -
-

and using spaces:

-
    -
  1. One

  2. -
  3. Two

  4. -
  5. Three

  6. -
-

Multiple paragraphs:

-
    -
  1. Item 1, graf one.

    -

    Item 2. graf two. The quick brown fox jumped over the lazy dog's -back.

  2. -
  3. Item 2.

  4. -
  5. Item 3.

  6. -
-

Nested

-
    -
  • Tab -
      -
    • Tab -
        -
      • Tab
      • -
    • -
  • -
-

Here's another:

-
    -
  1. First
  2. -
  3. Second: -
      -
    • Fee
    • -
    • Fie
    • -
    • Foe
    • -
  4. -
  5. Third
  6. -
-

Same thing but with paragraphs:

-
    -
  1. First

  2. -
  3. Second:

    -
      -
    • Fee
    • -
    • Fie
    • -
    • Foe
    • -
  4. -
  5. Third

  6. -
-

This was an error in Markdown 1.0.1:

-
    -
  • this

    -
      -
    • sub
    • -
    -

    that

  • -
diff --git a/oldtests/Original/Ordered_and_unordered_lists.markdown b/oldtests/Original/Ordered_and_unordered_lists.markdown deleted file mode 100644 index 7f3b497..0000000 --- a/oldtests/Original/Ordered_and_unordered_lists.markdown +++ /dev/null @@ -1,131 +0,0 @@ -## Unordered - -Asterisks tight: - -* asterisk 1 -* asterisk 2 -* asterisk 3 - - -Asterisks loose: - -* asterisk 1 - -* asterisk 2 - -* asterisk 3 - -* * * - -Pluses tight: - -+ Plus 1 -+ Plus 2 -+ Plus 3 - - -Pluses loose: - -+ Plus 1 - -+ Plus 2 - -+ Plus 3 - -* * * - - -Minuses tight: - -- Minus 1 -- Minus 2 -- Minus 3 - - -Minuses loose: - -- Minus 1 - -- Minus 2 - -- Minus 3 - - -## Ordered - -Tight: - -1. First -2. Second -3. Third - -and: - -1. One -2. Two -3. Three - - -Loose using tabs: - -1. First - -2. Second - -3. Third - -and using spaces: - -1. One - -2. Two - -3. Three - -Multiple paragraphs: - -1. Item 1, graf one. - - Item 2. graf two. The quick brown fox jumped over the lazy dog's - back. - -2. Item 2. - -3. Item 3. - - - -## Nested - -* Tab - * Tab - * Tab - -Here's another: - -1. First -2. Second: - * Fee - * Fie - * Foe -3. Third - -Same thing but with paragraphs: - -1. First - -2. Second: - * Fee - * Fie - * Foe - -3. Third - - -This was an error in Markdown 1.0.1: - -* this - - * sub - - that diff --git a/oldtests/Original/README b/oldtests/Original/README deleted file mode 100644 index 5143258..0000000 --- a/oldtests/Original/README +++ /dev/null @@ -1,15 +0,0 @@ -These are from John Gruber's original markdown test suite, via -Michel Fortin's mdtest. - -The html files have been modified slightly in ways that do not affect the -semantics. For example, entities are used for quotes in text, and -blank lines are omitted between block-level tags. - -Trailing blank spaces are removed from lines in raw HTML blocks. - -The one (insignificant) semantic change is switching the order -of emph and strong tags in the output for ***strong and emph***. - -We have removed Hard-wrapped_paragraphs_with_list-like_lines tests, -because the new implementation no longer requires a blank line -before a list. diff --git a/oldtests/Original/Strong_and_em_together.html b/oldtests/Original/Strong_and_em_together.html deleted file mode 100644 index 2629594..0000000 --- a/oldtests/Original/Strong_and_em_together.html +++ /dev/null @@ -1,4 +0,0 @@ -

This is strong and em.

-

So is this word.

-

This is strong and em.

-

So is this word.

diff --git a/oldtests/Original/Strong_and_em_together.markdown b/oldtests/Original/Strong_and_em_together.markdown deleted file mode 100644 index 95ee690..0000000 --- a/oldtests/Original/Strong_and_em_together.markdown +++ /dev/null @@ -1,7 +0,0 @@ -***This is strong and em.*** - -So is ***this*** word. - -___This is strong and em.___ - -So is ___this___ word. diff --git a/oldtests/Original/Tabs.html b/oldtests/Original/Tabs.html deleted file mode 100644 index 5389bdf..0000000 --- a/oldtests/Original/Tabs.html +++ /dev/null @@ -1,19 +0,0 @@ -
    -
  • this is a list item -indented with tabs

  • -
  • this is a list item -indented with spaces

  • -
-

Code:

-
this code block is indented by one tab
-
-

And:

-
    this code block is indented by two tabs
-
-

And:

-
+   this is an example list item
-    indented with tabs
-
-+   this is an example list item
-    indented with spaces
-
diff --git a/oldtests/Original/Tabs.markdown b/oldtests/Original/Tabs.markdown deleted file mode 100644 index 589d113..0000000 --- a/oldtests/Original/Tabs.markdown +++ /dev/null @@ -1,21 +0,0 @@ -+ this is a list item - indented with tabs - -+ this is a list item - indented with spaces - -Code: - - this code block is indented by one tab - -And: - - this code block is indented by two tabs - -And: - - + this is an example list item - indented with tabs - - + this is an example list item - indented with spaces diff --git a/oldtests/Original/Tidyness.html b/oldtests/Original/Tidyness.html deleted file mode 100644 index f2a8ce7..0000000 --- a/oldtests/Original/Tidyness.html +++ /dev/null @@ -1,8 +0,0 @@ -
-

A list within a blockquote:

-
    -
  • asterisk 1
  • -
  • asterisk 2
  • -
  • asterisk 3
  • -
-
diff --git a/oldtests/Original/Tidyness.markdown b/oldtests/Original/Tidyness.markdown deleted file mode 100644 index 5f18b8d..0000000 --- a/oldtests/Original/Tidyness.markdown +++ /dev/null @@ -1,5 +0,0 @@ -> A list within a blockquote: -> -> * asterisk 1 -> * asterisk 2 -> * asterisk 3 diff --git a/oldtests/Tabs/TabConversionUnicode.html b/oldtests/Tabs/TabConversionUnicode.html deleted file mode 100644 index f596f6a..0000000 --- a/oldtests/Tabs/TabConversionUnicode.html +++ /dev/null @@ -1 +0,0 @@ -

То лпой is a Russian word with a tab inside.

diff --git a/oldtests/Tabs/TabConversionUnicode.markdown b/oldtests/Tabs/TabConversionUnicode.markdown deleted file mode 100644 index 0bd7b52..0000000 --- a/oldtests/Tabs/TabConversionUnicode.markdown +++ /dev/null @@ -1 +0,0 @@ -`То лпой` is a Russian word with a tab inside. -- cgit v1.2.3 From f1cc071037261cd7ebde1563f2f5fb045c9b1dd4 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 7 Oct 2014 22:35:44 -0700 Subject: Removed bstrlib license. We no longer use this library. --- license.bstrlib.txt | 29 ----------------------------- 1 file changed, 29 deletions(-) delete mode 100644 license.bstrlib.txt diff --git a/license.bstrlib.txt b/license.bstrlib.txt deleted file mode 100644 index cf78a98..0000000 --- a/license.bstrlib.txt +++ /dev/null @@ -1,29 +0,0 @@ -Copyright (c) 2002-2008 Paul Hsieh -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - - Neither the name of bstrlib nor the names of its contributors may be used - to endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. - -- cgit v1.2.3 From dceca7a50f0f4a447dd5ad13eedb774debf7ac7a Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 7 Oct 2014 22:36:20 -0700 Subject: Removed uthash license. We no longer use this library. --- license.uthash.txt | 21 --------------------- 1 file changed, 21 deletions(-) delete mode 100644 license.uthash.txt diff --git a/license.uthash.txt b/license.uthash.txt deleted file mode 100644 index ad8e16a..0000000 --- a/license.uthash.txt +++ /dev/null @@ -1,21 +0,0 @@ -Copyright (c) 2005-2013, Troy D. Hanson http://troydhanson.github.com/uthash/ -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER -OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -- cgit v1.2.3 From 8122177e49f9d28b6606ce8168788113508e3306 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 7 Oct 2014 22:45:19 -0700 Subject: Added test case from issue #147. --- spec.txt | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/spec.txt b/spec.txt index 2a7e3de..fa2a877 100644 --- a/spec.txt +++ b/spec.txt @@ -4532,6 +4532,18 @@ __foo _bar_ baz__

foo bar baz

. +. +**foo, *bar*, baz** +. +

foo, bar, baz

+. + +. +__foo, _bar_, baz__ +. +

foo, bar, baz

+. + But note: . -- cgit v1.2.3 From 735f77b2a6a016abd56dfd1717de5a4b14528c36 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 7 Oct 2014 23:00:56 -0700 Subject: Added cases from #51 to spec. Closes #51. --- spec.txt | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/spec.txt b/spec.txt index fa2a877..7b447f1 100644 --- a/spec.txt +++ b/spec.txt @@ -4357,6 +4357,32 @@ __this is a double underscore (`__`)__

this is a double underscore (__)

. +Or use the other emphasis character: + +. +*_* +. +

_

+. + +. +_*_ +. +

*

+. + +. +*__* +. +

__

+. + +. +_**_ +. +

**

+. + `*` delimiters allow intra-word emphasis; `_` delimiters do not: . -- cgit v1.2.3 From c74f15daae5d979f3fa90e840aeaf75d9b52e33f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 7 Oct 2014 23:07:28 -0700 Subject: Added link to commonmark.org on web page. Closes #23. --- narrative.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/narrative.md b/narrative.md index 2448ad8..11b41b8 100644 --- a/narrative.md +++ b/narrative.md @@ -129,3 +129,12 @@ working out the spec, I benefited greatly from collaboration with David Greenspan, and from extensive discussions with a group of industrial users of Markdown, including Jeff Atwood, Vincent Marti, and Neil Williams. + +### Contributing + +There is a [forum for discussing +CommonMark](http://talk.commonmark.org); you should use it instead of +github issues for questions and possibly open-ended discussions. +Use the [github issue tracker](http://github.com/jgm/stmd/issues) +only for simple, clear, actionable issues. + -- cgit v1.2.3 From daeb55edc7636deebc2a79621ea06c3548d67827 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bengt=20L=C3=BCers?= Date: Mon, 8 Sep 2014 16:16:36 +0200 Subject: Correct capitalization of JavaScript --- README.md | 4 ++-- js/stmd.js | 2 +- narrative.md | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 78fc837..96661ab 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ CommonMark ========== CommonMark is a [specification of Markdown syntax][the spec], -together with BSD3-licensed implementations (`stmd`) in C and javascript. +together with BSD3-licensed implementations (`stmd`) in C and JavaScript. The implementations ------------------- @@ -20,7 +20,7 @@ tarball.) --ast Print AST instead of HTML --version Print version -The javascript implementation is a single javascript file, with +The JavaScript implementation is a single JavaScript file, with no dependencies, that can be linked to in an HTML page. A standalone version (using `node.js`) is also provided (`js/markdown`), and there is a "dingus" for playing with it interactively. (`make dingus` will start diff --git a/js/stmd.js b/js/stmd.js index dd7876a..ba5c2a3 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -1,4 +1,4 @@ -// stmd.js - CommomMark in javascript +// stmd.js - CommomMark in JavaScript // Copyright (C) 2014 John MacFarlane // License: BSD3. diff --git a/narrative.md b/narrative.md index 11b41b8..0a86a88 100644 --- a/narrative.md +++ b/narrative.md @@ -4,7 +4,7 @@ title: CommonMark CommonMark is a [specification of Markdown syntax](http://jgm.github.io/stmd/spec.html), together with -BSD3-licensed implementations (`stmd`) in C and javascript. The source +BSD3-licensed implementations (`stmd`) in C and JavaScript. The source for the spec and the two implementations can be found in [this repository](http://github.com/jgm/stmd). @@ -12,7 +12,7 @@ The C implementation provides both a library and a standalone program `stmd` that converts Markdown to HTML. It is written in standard C99 and has no library dependencies. -The javascript implementation is a single javascript file, with no +The JavaScript implementation is a single JavaScript file, with no dependencies. [Try it now!](http://jgm.github.io/stmd/js/) [The spec](http://jgm.github.io/stmd/spec.html) contains over 400 -- cgit v1.2.3 From 1806a06c34aeec717e521b86d9e70894ff632e41 Mon Sep 17 00:00:00 2001 From: Will Bond Date: Wed, 8 Oct 2014 11:29:47 -0400 Subject: Remove duplicate `footer` --- spec.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec.txt b/spec.txt index 7b447f1..0c09c43 100644 --- a/spec.txt +++ b/spec.txt @@ -1355,8 +1355,8 @@ name is one of the following (case-insensitive): `output`, `col`, `p`, `colgroup`, `pre`, `dd`, `progress`, `div`, `section`, `dl`, `table`, `td`, `dt`, `tbody`, `embed`, `textarea`, `fieldset`, `tfoot`, `figcaption`, `th`, `figure`, `thead`, `footer`, -`footer`, `tr`, `form`, `ul`, `h1`, `h2`, `h3`, `h4`, `h5`, `h6`, -`video`, `script`, `style`. +`tr`, `form`, `ul`, `h1`, `h2`, `h3`, `h4`, `h5`, `h6`, `video`, +`script`, `style`. An [HTML block](#html-block) begins with an [HTML block tag](#html-block-tag), [HTML comment](#html-comment), -- cgit v1.2.3